4 * Copyright(c) 2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * RTE Membership Library
39 * The Membership Library is an extension and generalization of a traditional
40 * filter (for example Bloom Filter and cuckoo filter) structure that has
41 * multiple usages in a variety of workloads and applications. The library is
42 * used to test if a key belongs to certain sets. Two types of such
43 * "set-summary" structures are implemented: hash-table based (HT) and vector
44 * bloom filter (vBF). For HT setsummary, two subtypes or modes are available,
45 * cache and non-cache modes. The table below summarize some properties of
46 * the different implementations.
49 * @b EXPERIMENTAL: this API may change without prior notice
54 * +==========+=====================+================+=========================+
55 * | type | vbf | HT-cache | HT-non-cache |
56 * +==========+=====================+==========================================+
57 * |structure | bloom-filter array | hash-table like without storing key |
58 * +----------+---------------------+------------------------------------------+
59 * |set id | limited by bf count | [1, 0x7fff] |
61 * +----------+---------------------+------------------------------------------+
62 * |usages & | small set range, | can delete, | cache most recent keys, |
63 * |properties| user-specified | big set range, | have both false-positive|
64 * | | false-positive rate,| small false | and false-negative |
65 * | | no deletion support.| positive depend| depend on table size, |
66 * | | | on table size, | automatic overwritten. |
67 * | | | new key does | |
68 * | | | not overwrite | |
69 * | | | existing key. | |
70 * +----------+---------------------+----------------+-------------------------+
74 #ifndef _RTE_MEMBER_H_
75 #define _RTE_MEMBER_H_
83 #include <rte_common.h>
84 #include <rte_config.h>
86 /** The set ID type that stored internally in hash table based set summary. */
87 typedef uint16_t member_set_t;
88 /** Invalid set ID used to mean no match found. */
89 #define RTE_MEMBER_NO_MATCH 0
90 /** Maximum size of hash table that can be created. */
91 #define RTE_MEMBER_ENTRIES_MAX (1 << 30)
92 /** Maximum number of keys that can be searched as a bulk */
93 #define RTE_MEMBER_LOOKUP_BULK_MAX 64
94 /** Entry count per bucket in hash table based mode. */
95 #define RTE_MEMBER_BUCKET_ENTRIES 16
96 /** Maximum number of characters in setsum name. */
97 #define RTE_MEMBER_NAMESIZE 32
99 /** @internal Hash function used by membership library. */
100 #if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32)
101 #include <rte_hash_crc.h>
102 #define MEMBER_HASH_FUNC rte_hash_crc
104 #include <rte_jhash.h>
105 #define MEMBER_HASH_FUNC rte_jhash
108 extern int librte_member_logtype;
110 #define RTE_MEMBER_LOG(level, ...) \
111 rte_log(RTE_LOG_ ## level, \
112 librte_member_logtype, \
113 RTE_FMT("%s(): " RTE_FMT_HEAD(__VA_ARGS__,), \
115 RTE_FMT_TAIL(__VA_ARGS__,)))
117 /** @internal setsummary structure. */
118 struct rte_member_setsum;
122 * @b EXPERIMENTAL: this API may change without prior notice
124 * Parameter struct used to create set summary
126 struct rte_member_parameters;
130 * @b EXPERIMENTAL: this API may change without prior notice
132 * Define different set summary types
134 enum rte_member_setsum_type {
135 RTE_MEMBER_TYPE_HT = 0, /**< Hash table based set summary. */
136 RTE_MEMBER_TYPE_VBF, /**< Vector of bloom filters. */
140 /** @internal compare function for different arch. */
141 enum rte_member_sig_compare_function {
142 RTE_MEMBER_COMPARE_SCALAR = 0,
143 RTE_MEMBER_COMPARE_AVX2,
144 RTE_MEMBER_COMPARE_NUM
147 /** @internal setsummary structure. */
148 struct rte_member_setsum {
149 enum rte_member_setsum_type type; /* Type of the set summary. */
150 uint32_t key_len; /* Length of key. */
151 uint32_t prim_hash_seed; /* Primary hash function seed. */
152 uint32_t sec_hash_seed; /* Secondary hash function seed. */
154 /* Hash table based. */
155 uint32_t bucket_cnt; /* Number of buckets. */
156 uint32_t bucket_mask; /* Bit mask to get bucket index. */
157 /* For runtime selecting AVX, scalar, etc for signature comparison. */
158 enum rte_member_sig_compare_function sig_cmp_fn;
159 uint8_t cache; /* If it is cache mode for ht based. */
161 /* Vector bloom filter. */
162 uint32_t num_set; /* Number of set (bf) in vbf. */
163 uint32_t bits; /* Number of bits in each bf. */
164 uint32_t bit_mask; /* Bit mask to get bit location in bf. */
165 uint32_t num_hashes; /* Number of hash values to index bf. */
167 uint32_t mul_shift; /* vbf internal variable used during bit test. */
168 uint32_t div_shift; /* vbf internal variable used during bit test. */
170 void *table; /* This is the handler of hash table or vBF array. */
173 /* Second cache line should start here. */
174 uint32_t socket_id; /* NUMA Socket ID for memory. */
175 char name[RTE_MEMBER_NAMESIZE]; /* Name of this set summary. */
176 } __rte_cache_aligned;
180 * @b EXPERIMENTAL: this API may change without prior notice
182 * Parameters used when create the set summary table. Currently user can
183 * specify two types of setsummary: HT based and vBF. For HT based, user can
184 * specify cache or non-cache mode. Here is a table to describe some differences
187 struct rte_member_parameters {
188 const char *name; /**< Name of the hash. */
191 * User to specify the type of the setsummary from one of
192 * rte_member_setsum_type.
194 * HT based setsummary is implemented like a hash table. User should use
195 * this type when there are many sets.
197 * vBF setsummary is a vector of bloom filters. It is used when number
198 * of sets is not big (less than 32 for current implementation).
200 enum rte_member_setsum_type type;
203 * is_cache is only used for HT based setsummary.
205 * If it is HT based setsummary, user to specify the subtype or mode
206 * of the setsummary. It could be cache, or non-cache mode.
207 * Set is_cache to be 1 if to use as cache mode.
209 * For cache mode, keys can be evicted out of the HT setsummary. Keys
210 * with the same signature and map to the same bucket
211 * will overwrite each other in the setsummary table.
212 * This mode is useful for the case that the set-summary only
213 * needs to keep record of the recently inserted keys. Both
214 * false-negative and false-positive could happen.
216 * For non-cache mode, keys cannot be evicted out of the cache. So for
217 * this mode the setsummary will become full eventually. Keys with the
218 * same signature but map to the same bucket will still occupy multiple
219 * entries. This mode does not give false-negative result.
224 * For HT setsummary, num_keys equals to the number of entries of the
225 * table. When the number of keys inserted in the HT setsummary
226 * approaches this number, eviction could happen. For cache mode,
227 * keys could be evicted out of the table. For non-cache mode, keys will
228 * be evicted to other buckets like cuckoo hash. The table will also
229 * likely to become full before the number of inserted keys equal to the
230 * total number of entries.
232 * For vBF, num_keys equal to the expected number of keys that will
233 * be inserted into the vBF. The implementation assumes the keys are
234 * evenly distributed to each BF in vBF. This is used to calculate the
235 * number of bits we need for each BF. User does not specify the size of
236 * each BF directly because the optimal size depends on the num_keys
237 * and false positive rate.
242 * The length of key is used for hash calculation. Since key is not
243 * stored in set-summary, large key does not require more memory space.
248 * num_set is only used for vBF, but not used for HT setsummary.
250 * num_set is equal to the number of BFs in vBF. For current
251 * implementation, it only supports 1,2,4,8,16,32 BFs in one vBF set
252 * summary. If other number of sets are needed, for example 5, the user
253 * should allocate the minimum available value that larger than 5,
259 * false_positive_rate is only used for vBF, but not used for HT
262 * For vBF, false_positive_rate is the user-defined false positive rate
263 * given expected number of inserted keys (num_keys). It is used to
264 * calculate the total number of bits for each BF, and the number of
265 * hash values used during lookup and insertion. For details please
266 * refer to vBF implementation and membership library documentation.
268 * For HT, This parameter is not directly set by users.
269 * HT setsummary's false positive rate is in the order of:
270 * false_pos = (1/bucket_count)*(1/2^16), since we use 16-bit signature.
271 * This is because two keys needs to map to same bucket and same
272 * signature to have a collision (false positive). bucket_count is equal
273 * to number of entries (num_keys) divided by entry count per bucket
274 * (RTE_MEMBER_BUCKET_ENTRIES). Thus, the false_positive_rate is not
275 * directly set by users for HT mode.
277 float false_positive_rate;
280 * We use two seeds to calculate two independent hashes for each key.
282 * For HT type, one hash is used as signature, and the other is used
283 * for bucket location.
284 * For vBF type, these two hashes and their combinations are used as
285 * hash locations to index the bit array.
287 uint32_t prim_hash_seed;
290 * The secondary seed should be a different value from the primary seed.
292 uint32_t sec_hash_seed;
294 int socket_id; /**< NUMA Socket ID for memory. */
299 * @b EXPERIMENTAL: this API may change without prior notice
301 * Find an existing set-summary and return a pointer to it.
304 * Name of the set-summary.
306 * Pointer to the set-summary or NULL if object not found
307 * with rte_errno set appropriately. Possible rte_errno values include:
308 * - ENOENT - value not available for return
310 struct rte_member_setsum *
311 rte_member_find_existing(const char *name);
315 * @b EXPERIMENTAL: this API may change without prior notice
317 * Create set-summary (SS).
320 * Parameters to initialize the setsummary.
322 * Return the pointer to the setsummary.
323 * Return value is NULL if the creation failed.
325 struct rte_member_setsum *
326 rte_member_create(const struct rte_member_parameters *params);
330 * @b EXPERIMENTAL: this API may change without prior notice
332 * Lookup key in set-summary (SS).
333 * Single key lookup and return as soon as the first match found
336 * Pointer of a setsummary.
338 * Pointer of the key to be looked up.
340 * Output the set id matches the key.
342 * Return 1 for found a match and 0 for not found a match.
345 rte_member_lookup(const struct rte_member_setsum *setsum, const void *key,
346 member_set_t *set_id);
350 * @b EXPERIMENTAL: this API may change without prior notice
352 * Lookup bulk of keys in set-summary (SS).
353 * Each key lookup returns as soon as the first match found
356 * Pointer of a setsummary.
358 * Pointer of the bulk of keys to be looked up.
360 * Number of keys that will be lookup.
362 * Output set ids for all the keys to this array.
363 * User should preallocate array that can contain all results, which size is
366 * The number of keys that found a match.
369 rte_member_lookup_bulk(const struct rte_member_setsum *setsum,
370 const void **keys, uint32_t num_keys,
371 member_set_t *set_ids);
375 * @b EXPERIMENTAL: this API may change without prior notice
377 * Lookup a key in set-summary (SS) for multiple matches.
378 * The key lookup will find all matched entries (multiple match).
379 * Note that for cache mode of HT, each key can have at most one match. This is
380 * because keys with same signature that maps to same bucket will overwrite
381 * each other. So multi-match lookup should be used for vBF and non-cache HT.
384 * Pointer of a set-summary.
386 * Pointer of the key that to be looked up.
387 * @param max_match_per_key
388 * User specified maximum number of matches for each key. The function returns
389 * as soon as this number of matches found for the key.
391 * Output set ids for all the matches of the key. User needs to preallocate
392 * the array that can contain max_match_per_key number of results.
394 * The number of matches that found for the key.
395 * For cache mode HT set-summary, the number should be at most 1.
398 rte_member_lookup_multi(const struct rte_member_setsum *setsum,
399 const void *key, uint32_t max_match_per_key,
400 member_set_t *set_id);
404 * @b EXPERIMENTAL: this API may change without prior notice
406 * Lookup a bulk of keys in set-summary (SS) for multiple matches each key.
407 * Each key lookup will find all matched entries (multiple match).
408 * Note that for cache mode HT, each key can have at most one match. So
409 * multi-match function is mainly used for vBF and non-cache mode HT.
412 * Pointer of a setsummary.
414 * Pointer of the keys to be looked up.
416 * The number of keys that will be lookup.
417 * @param max_match_per_key
418 * The possible maximum number of matches for each key.
420 * Output the number of matches for each key in an array.
422 * Return set ids for all the matches of all keys. Users pass in a
423 * preallocated 2D array with first dimension as key index and second
424 * dimension as match index. For example set_ids[bulk_size][max_match_per_key]
426 * The number of keys that found one or more matches in the set-summary.
429 rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum,
430 const void **keys, uint32_t num_keys,
431 uint32_t max_match_per_key,
432 uint32_t *match_count,
433 member_set_t *set_ids);
437 * @b EXPERIMENTAL: this API may change without prior notice
439 * Insert key into set-summary (SS).
442 * Pointer of a set-summary.
444 * Pointer of the key to be added.
446 * The set id associated with the key that needs to be added. Different mode
447 * supports different set_id ranges. 0 cannot be used as set_id since
448 * RTE_MEMBER_NO_MATCH by default is set as 0.
449 * For HT mode, the set_id has range as [1, 0x7FFF], MSB is reserved.
450 * For vBF mode the set id is limited by the num_set parameter when create
453 * HT (cache mode) and vBF should never fail unless the set_id is not in the
454 * valid range. In such case -EINVAL is returned.
455 * For HT (non-cache mode) it could fail with -ENOSPC error code when table is
457 * For success it returns different values for different modes to provide
458 * extra information for users.
459 * Return 0 for HT (cache mode) if the add does not cause
460 * eviction, return 1 otherwise. Return 0 for non-cache mode if success,
461 * -ENOSPC for full, and 1 if cuckoo eviction happens.
462 * Always returns 0 for vBF mode.
465 rte_member_add(const struct rte_member_setsum *setsum, const void *key,
466 member_set_t set_id);
470 * @b EXPERIMENTAL: this API may change without prior notice
472 * De-allocate memory used by set-summary.
475 * Pointer to the set summary.
478 rte_member_free(struct rte_member_setsum *setsum);
482 * @b EXPERIMENTAL: this API may change without prior notice
484 * Reset the set-summary tables. E.g. reset bits to be 0 in BF,
485 * reset set_id in each entry to be RTE_MEMBER_NO_MATCH in HT based SS.
488 * Pointer to the set-summary.
491 rte_member_reset(const struct rte_member_setsum *setsum);
495 * @b EXPERIMENTAL: this API may change without prior notice
497 * Delete items from the set-summary. Note that vBF does not support deletion
498 * in current implementation. For vBF, error code of -EINVAL will be returned.
501 * Pointer to the set-summary.
503 * Pointer of the key to be deleted.
505 * For HT mode, we need both key and its corresponding set_id to
506 * properly delete the key. Without set_id, we may delete other keys with the
509 * If no entry found to delete, an error code of -ENOENT could be returned.
512 rte_member_delete(const struct rte_member_setsum *setsum, const void *key,
513 member_set_t set_id);
519 #endif /* _RTE_MEMBER_H_ */