New upstream version 18.02
[deb_dpdk.git] / lib / librte_member / rte_member_ht.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <rte_errno.h>
6 #include <rte_malloc.h>
7 #include <rte_prefetch.h>
8 #include <rte_random.h>
9 #include <rte_log.h>
10
11 #include "rte_member.h"
12 #include "rte_member_ht.h"
13
14 #if defined(RTE_ARCH_X86)
15 #include "rte_member_x86.h"
16 #endif
17
18 /* Search bucket for entry with tmp_sig and update set_id */
19 static inline int
20 update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig,
21                 struct member_ht_bucket *buckets,
22                 member_set_t set_id)
23 {
24         uint32_t i;
25
26         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
27                 if (buckets[bucket_id].sigs[i] == tmp_sig) {
28                         buckets[bucket_id].sets[i] = set_id;
29                         return 1;
30                 }
31         }
32         return 0;
33 }
34
35 static inline int
36 search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig,
37                 struct member_ht_bucket *buckets,
38                 member_set_t *set_id)
39 {
40         uint32_t iter;
41
42         for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
43                 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
44                                 buckets[bucket_id].sets[iter] !=
45                                 RTE_MEMBER_NO_MATCH) {
46                         *set_id = buckets[bucket_id].sets[iter];
47                         return 1;
48                 }
49         }
50         return 0;
51 }
52
53 static inline void
54 search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig,
55                 struct member_ht_bucket *buckets,
56                 uint32_t *counter,
57                 uint32_t matches_per_key,
58                 member_set_t *set_id)
59 {
60         uint32_t iter;
61
62         for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
63                 if (tmp_sig == buckets[bucket_id].sigs[iter] &&
64                                 buckets[bucket_id].sets[iter] !=
65                                 RTE_MEMBER_NO_MATCH) {
66                         set_id[*counter] = buckets[bucket_id].sets[iter];
67                         (*counter)++;
68                         if (*counter >= matches_per_key)
69                                 return;
70                 }
71         }
72 }
73
74 int
75 rte_member_create_ht(struct rte_member_setsum *ss,
76                 const struct rte_member_parameters *params)
77 {
78         uint32_t i, j;
79         uint32_t size_bucket_t;
80         uint32_t num_entries = rte_align32pow2(params->num_keys);
81
82         if ((num_entries > RTE_MEMBER_ENTRIES_MAX) ||
83                         !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) ||
84                         num_entries < RTE_MEMBER_BUCKET_ENTRIES) {
85                 rte_errno = EINVAL;
86                 RTE_MEMBER_LOG(ERR,
87                         "Membership HT create with invalid parameters\n");
88                 return -EINVAL;
89         }
90
91         uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES;
92
93         size_bucket_t = sizeof(struct member_ht_bucket);
94
95         struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL,
96                         num_buckets * size_bucket_t,
97                         RTE_CACHE_LINE_SIZE, ss->socket_id);
98
99         if (buckets == NULL) {
100                 RTE_MEMBER_LOG(ERR, "memory allocation failed for HT "
101                                                 "setsummary\n");
102                 return -ENOMEM;
103         }
104
105         ss->table = buckets;
106         ss->bucket_cnt = num_buckets;
107         ss->bucket_mask = num_buckets - 1;
108         ss->cache = params->is_cache;
109
110         for (i = 0; i < num_buckets; i++) {
111                 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
112                         buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
113         }
114 #if defined(RTE_ARCH_X86)
115         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
116                         RTE_MEMBER_BUCKET_ENTRIES == 16)
117                 ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
118         else
119 #endif
120                 ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR;
121
122         RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, "
123                         "the table has %u entries, %u buckets\n",
124                         num_entries, num_buckets);
125         return 0;
126 }
127
128 static inline void
129 get_buckets_index(const struct rte_member_setsum *ss, const void *key,
130                 uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig)
131 {
132         uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len,
133                                                 ss->prim_hash_seed);
134         uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t),
135                                                 ss->sec_hash_seed);
136         /*
137          * We use the first hash value for the signature, and the second hash
138          * value to derive the primary and secondary bucket locations.
139          *
140          * For non-cache mode, we use the lower bits for the primary bucket
141          * location. Then we xor primary bucket location and the signature
142          * to get the secondary bucket location. This is called "partial-key
143          * cuckoo hashing" proposed by B. Fan, et al's paper
144          * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use
145          * xor is that one could derive the alternative bucket location
146          * by only using the current bucket location and the signature. This is
147          * generally required by non-cache mode's eviction and deletion
148          * process without the need to store alternative hash value nor the full
149          * key.
150          *
151          * For cache mode, we use the lower bits for the primary bucket
152          * location and the higher bits for the secondary bucket location. In
153          * cache mode, keys are simply overwritten if bucket is full. We do not
154          * use xor since lower/higher bits are more independent hash values thus
155          * should provide slightly better table load.
156          */
157         *sig = first_hash;
158         if (ss->cache) {
159                 *prim_bkt = sec_hash & ss->bucket_mask;
160                 *sec_bkt =  (sec_hash >> 16) & ss->bucket_mask;
161         } else {
162                 *prim_bkt = sec_hash & ss->bucket_mask;
163                 *sec_bkt =  (*prim_bkt ^ *sig) & ss->bucket_mask;
164         }
165 }
166
167 int
168 rte_member_lookup_ht(const struct rte_member_setsum *ss,
169                 const void *key, member_set_t *set_id)
170 {
171         uint32_t prim_bucket, sec_bucket;
172         member_sig_t tmp_sig;
173         struct member_ht_bucket *buckets = ss->table;
174
175         *set_id = RTE_MEMBER_NO_MATCH;
176         get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
177
178         switch (ss->sig_cmp_fn) {
179 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
180         case RTE_MEMBER_COMPARE_AVX2:
181                 if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,
182                                 set_id) ||
183                                 search_bucket_single_avx(sec_bucket, tmp_sig,
184                                         buckets, set_id))
185                         return 1;
186                 break;
187 #endif
188         default:
189                 if (search_bucket_single(prim_bucket, tmp_sig, buckets,
190                                 set_id) ||
191                                 search_bucket_single(sec_bucket, tmp_sig,
192                                         buckets, set_id))
193                         return 1;
194         }
195
196         return 0;
197 }
198
199 uint32_t
200 rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
201                 const void **keys, uint32_t num_keys, member_set_t *set_id)
202 {
203         uint32_t i;
204         uint32_t num_matches = 0;
205         struct member_ht_bucket *buckets = ss->table;
206         member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
207         uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
208         uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
209
210         for (i = 0; i < num_keys; i++) {
211                 get_buckets_index(ss, keys[i], &prim_buckets[i],
212                                 &sec_buckets[i], &tmp_sig[i]);
213                 rte_prefetch0(&buckets[prim_buckets[i]]);
214                 rte_prefetch0(&buckets[sec_buckets[i]]);
215         }
216
217         for (i = 0; i < num_keys; i++) {
218                 switch (ss->sig_cmp_fn) {
219 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
220                 case RTE_MEMBER_COMPARE_AVX2:
221                         if (search_bucket_single_avx(prim_buckets[i],
222                                         tmp_sig[i], buckets, &set_id[i]) ||
223                                 search_bucket_single_avx(sec_buckets[i],
224                                         tmp_sig[i], buckets, &set_id[i]))
225                                 num_matches++;
226                         else
227                                 set_id[i] = RTE_MEMBER_NO_MATCH;
228                         break;
229 #endif
230                 default:
231                         if (search_bucket_single(prim_buckets[i], tmp_sig[i],
232                                         buckets, &set_id[i]) ||
233                                         search_bucket_single(sec_buckets[i],
234                                         tmp_sig[i], buckets, &set_id[i]))
235                                 num_matches++;
236                         else
237                                 set_id[i] = RTE_MEMBER_NO_MATCH;
238                 }
239         }
240         return num_matches;
241 }
242
243 uint32_t
244 rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
245                 const void *key, uint32_t match_per_key,
246                 member_set_t *set_id)
247 {
248         uint32_t num_matches = 0;
249         uint32_t prim_bucket, sec_bucket;
250         member_sig_t tmp_sig;
251         struct member_ht_bucket *buckets = ss->table;
252
253         get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
254
255         switch (ss->sig_cmp_fn) {
256 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
257         case RTE_MEMBER_COMPARE_AVX2:
258                 search_bucket_multi_avx(prim_bucket, tmp_sig, buckets,
259                         &num_matches, match_per_key, set_id);
260                 if (num_matches < match_per_key)
261                         search_bucket_multi_avx(sec_bucket, tmp_sig,
262                                 buckets, &num_matches, match_per_key, set_id);
263                 return num_matches;
264 #endif
265         default:
266                 search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
267                                  match_per_key, set_id);
268                 if (num_matches < match_per_key)
269                         search_bucket_multi(sec_bucket, tmp_sig,
270                                 buckets, &num_matches, match_per_key, set_id);
271                 return num_matches;
272         }
273 }
274
275 uint32_t
276 rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
277                 const void **keys, uint32_t num_keys, uint32_t match_per_key,
278                 uint32_t *match_count,
279                 member_set_t *set_ids)
280 {
281         uint32_t i;
282         uint32_t num_matches = 0;
283         struct member_ht_bucket *buckets = ss->table;
284         uint32_t match_cnt_tmp;
285         member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
286         uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
287         uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
288
289         for (i = 0; i < num_keys; i++) {
290                 get_buckets_index(ss, keys[i], &prim_buckets[i],
291                                 &sec_buckets[i], &tmp_sig[i]);
292                 rte_prefetch0(&buckets[prim_buckets[i]]);
293                 rte_prefetch0(&buckets[sec_buckets[i]]);
294         }
295         for (i = 0; i < num_keys; i++) {
296                 match_cnt_tmp = 0;
297
298                 switch (ss->sig_cmp_fn) {
299 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
300                 case RTE_MEMBER_COMPARE_AVX2:
301                         search_bucket_multi_avx(prim_buckets[i], tmp_sig[i],
302                                 buckets, &match_cnt_tmp, match_per_key,
303                                 &set_ids[i*match_per_key]);
304                         if (match_cnt_tmp < match_per_key)
305                                 search_bucket_multi_avx(sec_buckets[i],
306                                         tmp_sig[i], buckets, &match_cnt_tmp,
307                                         match_per_key,
308                                         &set_ids[i*match_per_key]);
309                         match_count[i] = match_cnt_tmp;
310                         if (match_cnt_tmp != 0)
311                                 num_matches++;
312                         break;
313 #endif
314                 default:
315                         search_bucket_multi(prim_buckets[i], tmp_sig[i],
316                                 buckets, &match_cnt_tmp, match_per_key,
317                                 &set_ids[i*match_per_key]);
318                         if (match_cnt_tmp < match_per_key)
319                                 search_bucket_multi(sec_buckets[i], tmp_sig[i],
320                                         buckets, &match_cnt_tmp, match_per_key,
321                                         &set_ids[i*match_per_key]);
322                         match_count[i] = match_cnt_tmp;
323                         if (match_cnt_tmp != 0)
324                                 num_matches++;
325                 }
326         }
327         return num_matches;
328 }
329
330 static inline int
331 try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
332                 member_sig_t sig, member_set_t set_id)
333 {
334         int i;
335         /* If not full then insert into one slot */
336         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
337                 if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) {
338                         buckets[prim].sigs[i] = sig;
339                         buckets[prim].sets[i] = set_id;
340                         return 0;
341                 }
342         }
343         /* If prim failed, we need to access second bucket */
344         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
345                 if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) {
346                         buckets[sec].sigs[i] = sig;
347                         buckets[sec].sets[i] = set_id;
348                         return 0;
349                 }
350         }
351         return -1;
352 }
353
354 static inline int
355 try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
356                 member_sig_t sig, member_set_t set_id,
357                 enum rte_member_sig_compare_function cmp_fn)
358 {
359         switch (cmp_fn) {
360 #if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
361         case RTE_MEMBER_COMPARE_AVX2:
362                 if (update_entry_search_avx(prim, sig, buckets, set_id) ||
363                                 update_entry_search_avx(sec, sig, buckets,
364                                         set_id))
365                         return 0;
366                 break;
367 #endif
368         default:
369                 if (update_entry_search(prim, sig, buckets, set_id) ||
370                                 update_entry_search(sec, sig, buckets,
371                                         set_id))
372                         return 0;
373         }
374         return -1;
375 }
376
377 static inline int
378 evict_from_bucket(void)
379 {
380         /* For now, we randomly pick one entry to evict */
381         return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1);
382 }
383
384 /*
385  * This function is similar to the cuckoo hash make_space function in hash
386  * library
387  */
388 static inline int
389 make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx,
390                         unsigned int *nr_pushes)
391 {
392         unsigned int i, j;
393         int ret;
394         struct member_ht_bucket *buckets = ss->table;
395         uint32_t next_bucket_idx;
396         struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES];
397         struct member_ht_bucket *bkt = &buckets[bkt_idx];
398         /* MSB is set to indicate if an entry has been already pushed */
399         member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
400
401         /*
402          * Push existing item (search for bucket with space in
403          * alternative locations) to its alternative location
404          */
405         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
406                 /* Search for space in alternative locations */
407                 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
408                 next_bkt[i] = &buckets[next_bucket_idx];
409                 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) {
410                         if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH)
411                                 break;
412                 }
413
414                 if (j != RTE_MEMBER_BUCKET_ENTRIES)
415                         break;
416         }
417
418         /* Alternative location has spare room (end of recursive function) */
419         if (i != RTE_MEMBER_BUCKET_ENTRIES) {
420                 next_bkt[i]->sigs[j] = bkt->sigs[i];
421                 next_bkt[i]->sets[j] = bkt->sets[i];
422                 return i;
423         }
424
425         /* Pick entry that has not been pushed yet */
426         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++)
427                 if ((bkt->sets[i] & flag_mask) == 0)
428                         break;
429
430         /* All entries have been pushed, so entry cannot be added */
431         if (i == RTE_MEMBER_BUCKET_ENTRIES ||
432                         ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES)
433                 return -ENOSPC;
434
435         next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
436         /* Set flag to indicate that this entry is going to be pushed */
437         bkt->sets[i] |= flag_mask;
438
439         /* Need room in alternative bucket to insert the pushed entry */
440         ret = make_space_bucket(ss, next_bucket_idx, nr_pushes);
441         /*
442          * After recursive function.
443          * Clear flags and insert the pushed entry
444          * in its alternative location if successful,
445          * or return error
446          */
447         bkt->sets[i] &= ~flag_mask;
448         if (ret >= 0) {
449                 next_bkt[i]->sigs[ret] = bkt->sigs[i];
450                 next_bkt[i]->sets[ret] = bkt->sets[i];
451                 return i;
452         } else
453                 return ret;
454 }
455
456 int
457 rte_member_add_ht(const struct rte_member_setsum *ss,
458                 const void *key, member_set_t set_id)
459 {
460         int ret;
461         unsigned int nr_pushes = 0;
462         uint32_t prim_bucket, sec_bucket;
463         member_sig_t tmp_sig;
464         struct member_ht_bucket *buckets = ss->table;
465         member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
466
467         if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0)
468                 return -EINVAL;
469
470         get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
471
472         /*
473          * If it is cache based setsummary, we try overwriting (updating)
474          * existing entry with the same signature first. In cache mode, we allow
475          * false negatives and only cache the most recent keys.
476          *
477          * For non-cache mode, we do not update existing entry with the same
478          * signature. This is because if two keys with same signature update
479          * each other, false negative may happen, which is not the expected
480          * behavior for non-cache setsummary.
481          */
482         if (ss->cache) {
483                 ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig,
484                                         set_id, ss->sig_cmp_fn);
485                 if (ret != -1)
486                         return ret;
487         }
488         /* If not full then insert into one slot */
489         ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id);
490         if (ret != -1)
491                 return ret;
492
493         /* Random pick prim or sec for recursive displacement */
494         uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket;
495         if (ss->cache) {
496                 ret = evict_from_bucket();
497                 buckets[select_bucket].sigs[ret] = tmp_sig;
498                 buckets[select_bucket].sets[ret] = set_id;
499                 return 1;
500         }
501
502         ret = make_space_bucket(ss, select_bucket, &nr_pushes);
503         if (ret >= 0) {
504                 buckets[select_bucket].sigs[ret] = tmp_sig;
505                 buckets[select_bucket].sets[ret] = set_id;
506                 ret = 1;
507         }
508
509         return ret;
510 }
511
512 void
513 rte_member_free_ht(struct rte_member_setsum *ss)
514 {
515         rte_free(ss->table);
516 }
517
518 int
519 rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key,
520                 member_set_t set_id)
521 {
522         int i;
523         uint32_t prim_bucket, sec_bucket;
524         member_sig_t tmp_sig;
525         struct member_ht_bucket *buckets = ss->table;
526
527         get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
528
529         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
530                 if (tmp_sig == buckets[prim_bucket].sigs[i] &&
531                                 set_id == buckets[prim_bucket].sets[i]) {
532                         buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
533                         return 0;
534                 }
535         }
536
537         for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
538                 if (tmp_sig == buckets[sec_bucket].sigs[i] &&
539                                 set_id == buckets[sec_bucket].sets[i]) {
540                         buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
541                         return 0;
542                 }
543         }
544         return -ENOENT;
545 }
546
547 void
548 rte_member_reset_ht(const struct rte_member_setsum *ss)
549 {
550         uint32_t i, j;
551         struct member_ht_bucket *buckets = ss->table;
552
553         for (i = 0; i < ss->bucket_cnt; i++) {
554                 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
555                         buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
556         }
557 }