aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_member/rte_member.h
blob: 56dd3b3a9774b341fe6d45a521e5c63d718a4a0b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
/*-
 *   BSD LICENSE
 *
 *   Copyright(c) 2017 Intel Corporation. All rights reserved.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/**
 * @file
 *
 * RTE Membership Library
 *
 * The Membership Library is an extension and generalization of a traditional
 * filter (for example Bloom Filter and cuckoo filter) structure that has
 * multiple usages in a variety of workloads and applications. The library is
 * used to test if a key belongs to certain sets. Two types of such
 * "set-summary" structures are implemented: hash-table based (HT) and vector
 * bloom filter (vBF). For HT setsummary, two subtypes or modes are available,
 * cache and non-cache modes. The table below summarize some properties of
 * the different implementations.
 *
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 */

/**
 * <!--
 * +==========+=====================+================+=========================+
 * |   type   |      vbf            |     HT-cache   |     HT-non-cache        |
 * +==========+=====================+==========================================+
 * |structure |  bloom-filter array |  hash-table like without storing key     |
 * +----------+---------------------+------------------------------------------+
 * |set id    | limited by bf count |           [1, 0x7fff]                    |
 * |          | up to 32.           |                                          |
 * +----------+---------------------+------------------------------------------+
 * |usages &  | small set range,    | can delete,    | cache most recent keys, |
 * |properties| user-specified      | big set range, | have both false-positive|
 * |          | false-positive rate,| small false    | and false-negative      |
 * |          | no deletion support.| positive depend| depend on table size,   |
 * |          |                     | on table size, | automatic overwritten.  |
 * |          |                     | new key does   |                         |
 * |          |                     | not overwrite  |                         |
 * |          |                     | existing key.  |                         |
 * +----------+---------------------+----------------+-------------------------+
 * -->
 */

#ifndef _RTE_MEMBER_H_
#define _RTE_MEMBER_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stdint.h>

#include <rte_common.h>
#include <rte_config.h>

/** The set ID type that stored internally in hash table based set summary. */
typedef uint16_t member_set_t;
/** Invalid set ID used to mean no match found. */
#define RTE_MEMBER_NO_MATCH 0
/** Maximum size of hash table that can be created. */
#define RTE_MEMBER_ENTRIES_MAX (1 << 30)
/** Maximum number of keys that can be searched as a bulk */
#define RTE_MEMBER_LOOKUP_BULK_MAX 64
/** Entry count per bucket in hash table based mode. */
#define RTE_MEMBER_BUCKET_ENTRIES 16
/** Maximum number of characters in setsum name. */
#define RTE_MEMBER_NAMESIZE 32

/** @internal Hash function used by membership library. */
#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32)
#include <rte_hash_crc.h>
#define MEMBER_HASH_FUNC       rte_hash_crc
#else
#include <rte_jhash.h>
#define MEMBER_HASH_FUNC       rte_jhash
#endif

extern int librte_member_logtype;

#define RTE_MEMBER_LOG(level, ...) \
	rte_log(RTE_LOG_ ## level, \
		librte_member_logtype, \
		RTE_FMT("%s(): " RTE_FMT_HEAD(__VA_ARGS__,), \
			__func__, \
			RTE_FMT_TAIL(__VA_ARGS__,)))

/** @internal setsummary structure. */
struct rte_member_setsum;

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Parameter struct used to create set summary
 */
struct rte_member_parameters;

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Define different set summary types
 */
enum rte_member_setsum_type {
	RTE_MEMBER_TYPE_HT = 0,  /**< Hash table based set summary. */
	RTE_MEMBER_TYPE_VBF,     /**< Vector of bloom filters. */
	RTE_MEMBER_NUM_TYPE
};

/** @internal compare function for different arch. */
enum rte_member_sig_compare_function {
	RTE_MEMBER_COMPARE_SCALAR = 0,
	RTE_MEMBER_COMPARE_AVX2,
	RTE_MEMBER_COMPARE_NUM
};

/** @internal setsummary structure. */
struct rte_member_setsum {
	enum rte_member_setsum_type type; /* Type of the set summary. */
	uint32_t key_len;		/* Length of key. */
	uint32_t prim_hash_seed;	/* Primary hash function seed. */
	uint32_t sec_hash_seed;		/* Secondary hash function seed. */

	/* Hash table based. */
	uint32_t bucket_cnt;		/* Number of buckets. */
	uint32_t bucket_mask;		/* Bit mask to get bucket index. */
	/* For runtime selecting AVX, scalar, etc for signature comparison. */
	enum rte_member_sig_compare_function sig_cmp_fn;
	uint8_t cache;			/* If it is cache mode for ht based. */

	/* Vector bloom filter. */
	uint32_t num_set;		/* Number of set (bf) in vbf. */
	uint32_t bits;			/* Number of bits in each bf. */
	uint32_t bit_mask;	/* Bit mask to get bit location in bf. */
	uint32_t num_hashes;	/* Number of hash values to index bf. */

	uint32_t mul_shift;  /* vbf internal variable used during bit test. */
	uint32_t div_shift;  /* vbf internal variable used during bit test. */

	void *table;	/* This is the handler of hash table or vBF array. */


	/* Second cache line should start here. */
	uint32_t socket_id;          /* NUMA Socket ID for memory. */
	char name[RTE_MEMBER_NAMESIZE]; /* Name of this set summary. */
} __rte_cache_aligned;

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Parameters used when create the set summary table. Currently user can
 * specify two types of setsummary: HT based and vBF. For HT based, user can
 * specify cache or non-cache mode. Here is a table to describe some differences
 *
 */
struct rte_member_parameters {
	const char *name;			/**< Name of the hash. */

	/**
	 * User to specify the type of the setsummary from one of
	 * rte_member_setsum_type.
	 *
	 * HT based setsummary is implemented like a hash table. User should use
	 * this type when there are many sets.
	 *
	 * vBF setsummary is a vector of bloom filters. It is used when number
	 * of sets is not big (less than 32 for current implementation).
	 */
	enum rte_member_setsum_type type;

	/**
	 * is_cache is only used for HT based setsummary.
	 *
	 * If it is HT based setsummary, user to specify the subtype or mode
	 * of the setsummary. It could be cache, or non-cache mode.
	 * Set is_cache to be 1 if to use as cache mode.
	 *
	 * For cache mode, keys can be evicted out of the HT setsummary. Keys
	 * with the same signature and map to the same bucket
	 * will overwrite each other in the setsummary table.
	 * This mode is useful for the case that the set-summary only
	 * needs to keep record of the recently inserted keys. Both
	 * false-negative and false-positive could happen.
	 *
	 * For non-cache mode, keys cannot be evicted out of the cache. So for
	 * this mode the setsummary will become full eventually. Keys with the
	 * same signature but map to the same bucket will still occupy multiple
	 * entries. This mode does not give false-negative result.
	 */
	uint8_t is_cache;

	/**
	 * For HT setsummary, num_keys equals to the number of entries of the
	 * table. When the number of keys inserted in the HT setsummary
	 * approaches this number, eviction could happen. For cache mode,
	 * keys could be evicted out of the table. For non-cache mode, keys will
	 * be evicted to other buckets like cuckoo hash. The table will also
	 * likely to become full before the number of inserted keys equal to the
	 * total number of entries.
	 *
	 * For vBF, num_keys equal to the expected number of keys that will
	 * be inserted into the vBF. The implementation assumes the keys are
	 * evenly distributed to each BF in vBF. This is used to calculate the
	 * number of bits we need for each BF. User does not specify the size of
	 * each BF directly because the optimal size depends on the num_keys
	 * and false positive rate.
	 */
	uint32_t num_keys;

	/**
	 * The length of key is used for hash calculation. Since key is not
	 * stored in set-summary, large key does not require more memory space.
	 */
	uint32_t key_len;

	/**
	 * num_set is only used for vBF, but not used for HT setsummary.
	 *
	 * num_set is equal to the number of BFs in vBF. For current
	 * implementation, it only supports 1,2,4,8,16,32 BFs in one vBF set
	 * summary. If other number of sets are needed, for example 5, the user
	 * should allocate the minimum available value that larger than 5,
	 * which is 8.
	 */
	uint32_t num_set;

	/**
	 * false_positive_rate is only used for vBF, but not used for HT
	 * setsummary.
	 *
	 * For vBF, false_positive_rate is the user-defined false positive rate
	 * given expected number of inserted keys (num_keys). It is used to
	 * calculate the total number of bits for each BF, and the number of
	 * hash values used during lookup and insertion. For details please
	 * refer to vBF implementation and membership library documentation.
	 *
	 * For HT, This parameter is not directly set by users.
	 * HT setsummary's false positive rate is in the order of:
	 * false_pos = (1/bucket_count)*(1/2^16), since we use 16-bit signature.
	 * This is because two keys needs to map to same bucket and same
	 * signature to have a collision (false positive). bucket_count is equal
	 * to number of entries (num_keys) divided by entry count per bucket
	 * (RTE_MEMBER_BUCKET_ENTRIES). Thus, the false_positive_rate is not
	 * directly set by users for HT mode.
	 */
	float false_positive_rate;

	/**
	 * We use two seeds to calculate two independent hashes for each key.
	 *
	 * For HT type, one hash is used as signature, and the other is used
	 * for bucket location.
	 * For vBF type, these two hashes and their combinations are used as
	 * hash locations to index the bit array.
	 */
	uint32_t prim_hash_seed;

	/**
	 * The secondary seed should be a different value from the primary seed.
	 */
	uint32_t sec_hash_seed;

	int socket_id;			/**< NUMA Socket ID for memory. */
};

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Find an existing set-summary and return a pointer to it.
 *
 * @param name
 *   Name of the set-summary.
 * @return
 *   Pointer to the set-summary or NULL if object not found
 *   with rte_errno set appropriately. Possible rte_errno values include:
 *    - ENOENT - value not available for return
 */
struct rte_member_setsum *
rte_member_find_existing(const char *name);

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Create set-summary (SS).
 *
 * @param params
 *   Parameters to initialize the setsummary.
 * @return
 *   Return the pointer to the setsummary.
 *   Return value is NULL if the creation failed.
 */
struct rte_member_setsum *
rte_member_create(const struct rte_member_parameters *params);

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Lookup key in set-summary (SS).
 * Single key lookup and return as soon as the first match found
 *
 * @param setsum
 *   Pointer of a setsummary.
 * @param key
 *   Pointer of the key to be looked up.
 * @param set_id
 *   Output the set id matches the key.
 * @return
 *   Return 1 for found a match and 0 for not found a match.
 */
int
rte_member_lookup(const struct rte_member_setsum *setsum, const void *key,
			member_set_t *set_id);

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Lookup bulk of keys in set-summary (SS).
 * Each key lookup returns as soon as the first match found
 *
 * @param setsum
 *   Pointer of a setsummary.
 * @param keys
 *   Pointer of the bulk of keys to be looked up.
 * @param num_keys
 *   Number of keys that will be lookup.
 * @param set_ids
 *   Output set ids for all the keys to this array.
 *   User should preallocate array that can contain all results, which size is
 *   the num_keys.
 * @return
 *   The number of keys that found a match.
 */
int
rte_member_lookup_bulk(const struct rte_member_setsum *setsum,
			const void **keys, uint32_t num_keys,
			member_set_t *set_ids);

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Lookup a key in set-summary (SS) for multiple matches.
 * The key lookup will find all matched entries (multiple match).
 * Note that for cache mode of HT, each key can have at most one match. This is
 * because keys with same signature that maps to same bucket will overwrite
 * each other. So multi-match lookup should be used for vBF and non-cache HT.
 *
 * @param setsum
 *   Pointer of a set-summary.
 * @param key
 *   Pointer of the key that to be looked up.
 * @param max_match_per_key
 *   User specified maximum number of matches for each key. The function returns
 *   as soon as this number of matches found for the key.
 * @param set_id
 *   Output set ids for all the matches of the key. User needs to preallocate
 *   the array that can contain max_match_per_key number of results.
 * @return
 *   The number of matches that found for the key.
 *   For cache mode HT set-summary, the number should be at most 1.
 */
int
rte_member_lookup_multi(const struct rte_member_setsum *setsum,
		const void *key, uint32_t max_match_per_key,
		member_set_t *set_id);

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Lookup a bulk of keys in set-summary (SS) for multiple matches each key.
 * Each key lookup will find all matched entries (multiple match).
 * Note that for cache mode HT, each key can have at most one match. So
 * multi-match function is mainly used for vBF and non-cache mode HT.
 *
 * @param setsum
 *   Pointer of a setsummary.
 * @param keys
 *   Pointer of the keys to be looked up.
 * @param num_keys
 *   The number of keys that will be lookup.
 * @param max_match_per_key
 *   The possible maximum number of matches for each key.
 * @param match_count
 *   Output the number of matches for each key in an array.
 * @param set_ids
 *   Return set ids for all the matches of all keys. Users pass in a
 *   preallocated 2D array with first dimension as key index and second
 *   dimension as match index. For example set_ids[bulk_size][max_match_per_key]
 * @return
 *   The number of keys that found one or more matches in the set-summary.
 */
int
rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum,
		const void **keys, uint32_t num_keys,
		uint32_t max_match_per_key,
		uint32_t *match_count,
		member_set_t *set_ids);

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Insert key into set-summary (SS).
 *
 * @param setsum
 *   Pointer of a set-summary.
 * @param key
 *   Pointer of the key to be added.
 * @param set_id
 *   The set id associated with the key that needs to be added. Different mode
 *   supports different set_id ranges. 0 cannot be used as set_id since
 *   RTE_MEMBER_NO_MATCH by default is set as 0.
 *   For HT mode, the set_id has range as [1, 0x7FFF], MSB is reserved.
 *   For vBF mode the set id is limited by the num_set parameter when create
 *   the set-summary.
 * @return
 *   HT (cache mode) and vBF should never fail unless the set_id is not in the
 *   valid range. In such case -EINVAL is returned.
 *   For HT (non-cache mode) it could fail with -ENOSPC error code when table is
 *   full.
 *   For success it returns different values for different modes to provide
 *   extra information for users.
 *   Return 0 for HT (cache mode) if the add does not cause
 *   eviction, return 1 otherwise. Return 0 for non-cache mode if success,
 *   -ENOSPC for full, and 1 if cuckoo eviction happens.
 *   Always returns 0 for vBF mode.
 */
int
rte_member_add(const struct rte_member_setsum *setsum, const void *key,
			member_set_t set_id);

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * De-allocate memory used by set-summary.
 *
 * @param setsum
 *   Pointer to the set summary.
 */
void
rte_member_free(struct rte_member_setsum *setsum);

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Reset the set-summary tables. E.g. reset bits to be 0 in BF,
 * reset set_id in each entry to be RTE_MEMBER_NO_MATCH in HT based SS.
 *
 * @param setsum
 *   Pointer to the set-summary.
 */
void
rte_member_reset(const struct rte_member_setsum *setsum);

/**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice
 *
 * Delete items from the set-summary. Note that vBF does not support deletion
 * in current implementation. For vBF, error code of -EINVAL will be returned.
 *
 * @param setsum
 *   Pointer to the set-summary.
 * @param key
 *   Pointer of the key to be deleted.
 * @param set_id
 *   For HT mode, we need both key and its corresponding set_id to
 *   properly delete the key. Without set_id, we may delete other keys with the
 *   same signature.
 * @return
 *   If no entry found to delete, an error code of -ENOENT could be returned.
 */
int
rte_member_delete(const struct rte_member_setsum *setsum, const void *key,
			member_set_t set_id);

#ifdef __cplusplus
}
#endif

#endif /* _RTE_MEMBER_H_ */