New upstream version 18.11-rc1
[deb_dpdk.git] / lib / librte_hash / rte_cuckoo_hash.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation
3  * Copyright(c) 2018 Arm Limited
4  */
5
6 #include <string.h>
7 #include <stdint.h>
8 #include <errno.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11 #include <sys/queue.h>
12
13 #include <rte_common.h>
14 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
15 #include <rte_log.h>
16 #include <rte_memcpy.h>
17 #include <rte_prefetch.h>
18 #include <rte_branch_prediction.h>
19 #include <rte_malloc.h>
20 #include <rte_eal.h>
21 #include <rte_eal_memconfig.h>
22 #include <rte_per_lcore.h>
23 #include <rte_errno.h>
24 #include <rte_string_fns.h>
25 #include <rte_cpuflags.h>
26 #include <rte_rwlock.h>
27 #include <rte_spinlock.h>
28 #include <rte_ring.h>
29 #include <rte_compat.h>
30
31 #include "rte_hash.h"
32 #include "rte_cuckoo_hash.h"
33
34 #define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
35         for (CURRENT_BKT = START_BUCKET;                                      \
36                 CURRENT_BKT != NULL;                                          \
37                 CURRENT_BKT = CURRENT_BKT->next)
38
39 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
40
41 static struct rte_tailq_elem rte_hash_tailq = {
42         .name = "RTE_HASH",
43 };
44 EAL_REGISTER_TAILQ(rte_hash_tailq)
45
46 struct rte_hash *
47 rte_hash_find_existing(const char *name)
48 {
49         struct rte_hash *h = NULL;
50         struct rte_tailq_entry *te;
51         struct rte_hash_list *hash_list;
52
53         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
54
55         rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
56         TAILQ_FOREACH(te, hash_list, next) {
57                 h = (struct rte_hash *) te->data;
58                 if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
59                         break;
60         }
61         rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
62
63         if (te == NULL) {
64                 rte_errno = ENOENT;
65                 return NULL;
66         }
67         return h;
68 }
69
70 static inline struct rte_hash_bucket *
71 rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
72 {
73         while (lst_bkt->next != NULL)
74                 lst_bkt = lst_bkt->next;
75         return lst_bkt;
76 }
77
78 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
79 {
80         h->cmp_jump_table_idx = KEY_CUSTOM;
81         h->rte_hash_custom_cmp_eq = func;
82 }
83
84 static inline int
85 rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
86 {
87         if (h->cmp_jump_table_idx == KEY_CUSTOM)
88                 return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len);
89         else
90                 return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
91 }
92
93 /*
94  * We use higher 16 bits of hash as the signature value stored in table.
95  * We use the lower bits for the primary bucket
96  * location. Then we XOR primary bucket location and the signature
97  * to get the secondary bucket location. This is same as
98  * proposed in Bin Fan, et al's paper
99  * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
100  * Smarter Hashing". The benefit to use
101  * XOR is that one could derive the alternative bucket location
102  * by only using the current bucket location and the signature.
103  */
104 static inline uint16_t
105 get_short_sig(const hash_sig_t hash)
106 {
107         return hash >> 16;
108 }
109
110 static inline uint32_t
111 get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
112 {
113         return hash & h->bucket_bitmask;
114 }
115
116 static inline uint32_t
117 get_alt_bucket_index(const struct rte_hash *h,
118                         uint32_t cur_bkt_idx, uint16_t sig)
119 {
120         return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
121 }
122
123 struct rte_hash *
124 rte_hash_create(const struct rte_hash_parameters *params)
125 {
126         struct rte_hash *h = NULL;
127         struct rte_tailq_entry *te = NULL;
128         struct rte_hash_list *hash_list;
129         struct rte_ring *r = NULL;
130         struct rte_ring *r_ext = NULL;
131         char hash_name[RTE_HASH_NAMESIZE];
132         void *k = NULL;
133         void *buckets = NULL;
134         void *buckets_ext = NULL;
135         char ring_name[RTE_RING_NAMESIZE];
136         char ext_ring_name[RTE_RING_NAMESIZE];
137         unsigned num_key_slots;
138         unsigned i;
139         unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
140         unsigned int ext_table_support = 0;
141         unsigned int readwrite_concur_support = 0;
142         unsigned int writer_takes_lock = 0;
143         unsigned int no_free_on_del = 0;
144         uint32_t *tbl_chng_cnt = NULL;
145         unsigned int readwrite_concur_lf_support = 0;
146
147         rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
148
149         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
150
151         if (params == NULL) {
152                 RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
153                 return NULL;
154         }
155
156         /* Check for valid parameters */
157         if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
158                         (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
159                         (params->key_len == 0)) {
160                 rte_errno = EINVAL;
161                 RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
162                 return NULL;
163         }
164
165         /* Validate correct usage of extra options */
166         if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
167             (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
168                 rte_errno = EINVAL;
169                 RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
170                         "rw concurrency lock free\n");
171                 return NULL;
172         }
173
174         if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
175             (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {
176                 rte_errno = EINVAL;
177                 RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "
178                         "feature not supported with rw concurrency "
179                         "lock free\n");
180                 return NULL;
181         }
182
183         /* Check extra flags field to check extra options. */
184         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
185                 hw_trans_mem_support = 1;
186
187         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
188                 use_local_cache = 1;
189                 writer_takes_lock = 1;
190         }
191
192         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
193                 readwrite_concur_support = 1;
194                 writer_takes_lock = 1;
195         }
196
197         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
198                 ext_table_support = 1;
199
200         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
201                 no_free_on_del = 1;
202
203         if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
204                 readwrite_concur_lf_support = 1;
205                 /* Enable not freeing internal memory/index on delete */
206                 no_free_on_del = 1;
207         }
208
209         /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
210         if (use_local_cache)
211                 /*
212                  * Increase number of slots by total number of indices
213                  * that can be stored in the lcore caches
214                  * except for the first cache
215                  */
216                 num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
217                                         (LCORE_CACHE_SIZE - 1) + 1;
218         else
219                 num_key_slots = params->entries + 1;
220
221         snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
222         /* Create ring (Dummy slot index is not enqueued) */
223         r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
224                         params->socket_id, 0);
225         if (r == NULL) {
226                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
227                 goto err;
228         }
229
230         const uint32_t num_buckets = rte_align32pow2(params->entries) /
231                                                 RTE_HASH_BUCKET_ENTRIES;
232
233         /* Create ring for extendable buckets. */
234         if (ext_table_support) {
235                 snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
236                                                                 params->name);
237                 r_ext = rte_ring_create(ext_ring_name,
238                                 rte_align32pow2(num_buckets + 1),
239                                 params->socket_id, 0);
240
241                 if (r_ext == NULL) {
242                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
243                                                                 "failed\n");
244                         goto err;
245                 }
246         }
247
248         snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
249
250         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
251
252         /* guarantee there's no existing: this is normally already checked
253          * by ring creation above */
254         TAILQ_FOREACH(te, hash_list, next) {
255                 h = (struct rte_hash *) te->data;
256                 if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
257                         break;
258         }
259         h = NULL;
260         if (te != NULL) {
261                 rte_errno = EEXIST;
262                 te = NULL;
263                 goto err_unlock;
264         }
265
266         te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
267         if (te == NULL) {
268                 RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
269                 goto err_unlock;
270         }
271
272         h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
273                                         RTE_CACHE_LINE_SIZE, params->socket_id);
274
275         if (h == NULL) {
276                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
277                 goto err_unlock;
278         }
279
280         buckets = rte_zmalloc_socket(NULL,
281                                 num_buckets * sizeof(struct rte_hash_bucket),
282                                 RTE_CACHE_LINE_SIZE, params->socket_id);
283
284         if (buckets == NULL) {
285                 RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
286                 goto err_unlock;
287         }
288
289         /* Allocate same number of extendable buckets */
290         if (ext_table_support) {
291                 buckets_ext = rte_zmalloc_socket(NULL,
292                                 num_buckets * sizeof(struct rte_hash_bucket),
293                                 RTE_CACHE_LINE_SIZE, params->socket_id);
294                 if (buckets_ext == NULL) {
295                         RTE_LOG(ERR, HASH, "ext buckets memory allocation "
296                                                         "failed\n");
297                         goto err_unlock;
298                 }
299                 /* Populate ext bkt ring. We reserve 0 similar to the
300                  * key-data slot, just in case in future we want to
301                  * use bucket index for the linked list and 0 means NULL
302                  * for next bucket
303                  */
304                 for (i = 1; i <= num_buckets; i++)
305                         rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
306         }
307
308         const uint32_t key_entry_size =
309                 RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
310                           KEY_ALIGNMENT);
311         const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
312
313         k = rte_zmalloc_socket(NULL, key_tbl_size,
314                         RTE_CACHE_LINE_SIZE, params->socket_id);
315
316         if (k == NULL) {
317                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
318                 goto err_unlock;
319         }
320
321         tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
322                         RTE_CACHE_LINE_SIZE, params->socket_id);
323
324         if (tbl_chng_cnt == NULL) {
325                 RTE_LOG(ERR, HASH, "memory allocation failed\n");
326                 goto err_unlock;
327         }
328
329 /*
330  * If x86 architecture is used, select appropriate compare function,
331  * which may use x86 intrinsics, otherwise use memcmp
332  */
333 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
334         /* Select function to compare keys */
335         switch (params->key_len) {
336         case 16:
337                 h->cmp_jump_table_idx = KEY_16_BYTES;
338                 break;
339         case 32:
340                 h->cmp_jump_table_idx = KEY_32_BYTES;
341                 break;
342         case 48:
343                 h->cmp_jump_table_idx = KEY_48_BYTES;
344                 break;
345         case 64:
346                 h->cmp_jump_table_idx = KEY_64_BYTES;
347                 break;
348         case 80:
349                 h->cmp_jump_table_idx = KEY_80_BYTES;
350                 break;
351         case 96:
352                 h->cmp_jump_table_idx = KEY_96_BYTES;
353                 break;
354         case 112:
355                 h->cmp_jump_table_idx = KEY_112_BYTES;
356                 break;
357         case 128:
358                 h->cmp_jump_table_idx = KEY_128_BYTES;
359                 break;
360         default:
361                 /* If key is not multiple of 16, use generic memcmp */
362                 h->cmp_jump_table_idx = KEY_OTHER_BYTES;
363         }
364 #else
365         h->cmp_jump_table_idx = KEY_OTHER_BYTES;
366 #endif
367
368         if (use_local_cache) {
369                 h->local_free_slots = rte_zmalloc_socket(NULL,
370                                 sizeof(struct lcore_cache) * RTE_MAX_LCORE,
371                                 RTE_CACHE_LINE_SIZE, params->socket_id);
372         }
373
374         /* Default hash function */
375 #if defined(RTE_ARCH_X86)
376         default_hash_func = (rte_hash_function)rte_hash_crc;
377 #elif defined(RTE_ARCH_ARM64)
378         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
379                 default_hash_func = (rte_hash_function)rte_hash_crc;
380 #endif
381         /* Setup hash context */
382         snprintf(h->name, sizeof(h->name), "%s", params->name);
383         h->entries = params->entries;
384         h->key_len = params->key_len;
385         h->key_entry_size = key_entry_size;
386         h->hash_func_init_val = params->hash_func_init_val;
387
388         h->num_buckets = num_buckets;
389         h->bucket_bitmask = h->num_buckets - 1;
390         h->buckets = buckets;
391         h->buckets_ext = buckets_ext;
392         h->free_ext_bkts = r_ext;
393         h->hash_func = (params->hash_func == NULL) ?
394                 default_hash_func : params->hash_func;
395         h->key_store = k;
396         h->free_slots = r;
397         h->tbl_chng_cnt = tbl_chng_cnt;
398         *h->tbl_chng_cnt = 0;
399         h->hw_trans_mem_support = hw_trans_mem_support;
400         h->use_local_cache = use_local_cache;
401         h->readwrite_concur_support = readwrite_concur_support;
402         h->ext_table_support = ext_table_support;
403         h->writer_takes_lock = writer_takes_lock;
404         h->no_free_on_del = no_free_on_del;
405         h->readwrite_concur_lf_support = readwrite_concur_lf_support;
406
407 #if defined(RTE_ARCH_X86)
408         if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
409                 h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
410         else
411 #endif
412                 h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
413
414         /* Writer threads need to take the lock when:
415          * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
416          * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
417          */
418         if (h->writer_takes_lock) {
419                 h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
420                                                 RTE_CACHE_LINE_SIZE);
421                 if (h->readwrite_lock == NULL)
422                         goto err_unlock;
423
424                 rte_rwlock_init(h->readwrite_lock);
425         }
426
427         /* Populate free slots ring. Entry zero is reserved for key misses. */
428         for (i = 1; i < num_key_slots; i++)
429                 rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
430
431         te->data = (void *) h;
432         TAILQ_INSERT_TAIL(hash_list, te, next);
433         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
434
435         return h;
436 err_unlock:
437         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
438 err:
439         rte_ring_free(r);
440         rte_ring_free(r_ext);
441         rte_free(te);
442         rte_free(h);
443         rte_free(buckets);
444         rte_free(buckets_ext);
445         rte_free(k);
446         rte_free(tbl_chng_cnt);
447         return NULL;
448 }
449
450 void
451 rte_hash_free(struct rte_hash *h)
452 {
453         struct rte_tailq_entry *te;
454         struct rte_hash_list *hash_list;
455
456         if (h == NULL)
457                 return;
458
459         hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
460
461         rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
462
463         /* find out tailq entry */
464         TAILQ_FOREACH(te, hash_list, next) {
465                 if (te->data == (void *) h)
466                         break;
467         }
468
469         if (te == NULL) {
470                 rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
471                 return;
472         }
473
474         TAILQ_REMOVE(hash_list, te, next);
475
476         rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
477
478         if (h->use_local_cache)
479                 rte_free(h->local_free_slots);
480         if (h->writer_takes_lock)
481                 rte_free(h->readwrite_lock);
482         rte_ring_free(h->free_slots);
483         rte_ring_free(h->free_ext_bkts);
484         rte_free(h->key_store);
485         rte_free(h->buckets);
486         rte_free(h->buckets_ext);
487         rte_free(h->tbl_chng_cnt);
488         rte_free(h);
489         rte_free(te);
490 }
491
492 hash_sig_t
493 rte_hash_hash(const struct rte_hash *h, const void *key)
494 {
495         /* calc hash result by key */
496         return h->hash_func(key, h->key_len, h->hash_func_init_val);
497 }
498
499 int32_t
500 rte_hash_count(const struct rte_hash *h)
501 {
502         uint32_t tot_ring_cnt, cached_cnt = 0;
503         uint32_t i, ret;
504
505         if (h == NULL)
506                 return -EINVAL;
507
508         if (h->use_local_cache) {
509                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
510                                         (LCORE_CACHE_SIZE - 1);
511                 for (i = 0; i < RTE_MAX_LCORE; i++)
512                         cached_cnt += h->local_free_slots[i].len;
513
514                 ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
515                                                                 cached_cnt;
516         } else {
517                 tot_ring_cnt = h->entries;
518                 ret = tot_ring_cnt - rte_ring_count(h->free_slots);
519         }
520         return ret;
521 }
522
523 /* Read write locks implemented using rte_rwlock */
524 static inline void
525 __hash_rw_writer_lock(const struct rte_hash *h)
526 {
527         if (h->writer_takes_lock && h->hw_trans_mem_support)
528                 rte_rwlock_write_lock_tm(h->readwrite_lock);
529         else if (h->writer_takes_lock)
530                 rte_rwlock_write_lock(h->readwrite_lock);
531 }
532
533 static inline void
534 __hash_rw_reader_lock(const struct rte_hash *h)
535 {
536         if (h->readwrite_concur_support && h->hw_trans_mem_support)
537                 rte_rwlock_read_lock_tm(h->readwrite_lock);
538         else if (h->readwrite_concur_support)
539                 rte_rwlock_read_lock(h->readwrite_lock);
540 }
541
542 static inline void
543 __hash_rw_writer_unlock(const struct rte_hash *h)
544 {
545         if (h->writer_takes_lock && h->hw_trans_mem_support)
546                 rte_rwlock_write_unlock_tm(h->readwrite_lock);
547         else if (h->writer_takes_lock)
548                 rte_rwlock_write_unlock(h->readwrite_lock);
549 }
550
551 static inline void
552 __hash_rw_reader_unlock(const struct rte_hash *h)
553 {
554         if (h->readwrite_concur_support && h->hw_trans_mem_support)
555                 rte_rwlock_read_unlock_tm(h->readwrite_lock);
556         else if (h->readwrite_concur_support)
557                 rte_rwlock_read_unlock(h->readwrite_lock);
558 }
559
560 void
561 rte_hash_reset(struct rte_hash *h)
562 {
563         void *ptr;
564         uint32_t tot_ring_cnt, i;
565
566         if (h == NULL)
567                 return;
568
569         __hash_rw_writer_lock(h);
570         memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
571         memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
572         *h->tbl_chng_cnt = 0;
573
574         /* clear the free ring */
575         while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
576                 continue;
577
578         /* clear free extendable bucket ring and memory */
579         if (h->ext_table_support) {
580                 memset(h->buckets_ext, 0, h->num_buckets *
581                                                 sizeof(struct rte_hash_bucket));
582                 while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
583                         continue;
584         }
585
586         /* Repopulate the free slots ring. Entry zero is reserved for key misses */
587         if (h->use_local_cache)
588                 tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
589                                         (LCORE_CACHE_SIZE - 1);
590         else
591                 tot_ring_cnt = h->entries;
592
593         for (i = 1; i < tot_ring_cnt + 1; i++)
594                 rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
595
596         /* Repopulate the free ext bkt ring. */
597         if (h->ext_table_support) {
598                 for (i = 1; i <= h->num_buckets; i++)
599                         rte_ring_sp_enqueue(h->free_ext_bkts,
600                                                 (void *)((uintptr_t) i));
601         }
602
603         if (h->use_local_cache) {
604                 /* Reset local caches per lcore */
605                 for (i = 0; i < RTE_MAX_LCORE; i++)
606                         h->local_free_slots[i].len = 0;
607         }
608         __hash_rw_writer_unlock(h);
609 }
610
611 /*
612  * Function called to enqueue back an index in the cache/ring,
613  * as slot has not being used and it can be used in the
614  * next addition attempt.
615  */
616 static inline void
617 enqueue_slot_back(const struct rte_hash *h,
618                 struct lcore_cache *cached_free_slots,
619                 void *slot_id)
620 {
621         if (h->use_local_cache) {
622                 cached_free_slots->objs[cached_free_slots->len] = slot_id;
623                 cached_free_slots->len++;
624         } else
625                 rte_ring_sp_enqueue(h->free_slots, slot_id);
626 }
627
628 /* Search a key from bucket and update its data.
629  * Writer holds the lock before calling this.
630  */
631 static inline int32_t
632 search_and_update(const struct rte_hash *h, void *data, const void *key,
633         struct rte_hash_bucket *bkt, uint16_t sig)
634 {
635         int i;
636         struct rte_hash_key *k, *keys = h->key_store;
637
638         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
639                 if (bkt->sig_current[i] == sig) {
640                         k = (struct rte_hash_key *) ((char *)keys +
641                                         bkt->key_idx[i] * h->key_entry_size);
642                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
643                                 /* 'pdata' acts as the synchronization point
644                                  * when an existing hash entry is updated.
645                                  * Key is not updated in this case.
646                                  */
647                                 __atomic_store_n(&k->pdata,
648                                         data,
649                                         __ATOMIC_RELEASE);
650                                 /*
651                                  * Return index where key is stored,
652                                  * subtracting the first dummy index
653                                  */
654                                 return bkt->key_idx[i] - 1;
655                         }
656                 }
657         }
658         return -1;
659 }
660
661 /* Only tries to insert at one bucket (@prim_bkt) without trying to push
662  * buckets around.
663  * return 1 if matching existing key, return 0 if succeeds, return -1 for no
664  * empty entry.
665  */
666 static inline int32_t
667 rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
668                 struct rte_hash_bucket *prim_bkt,
669                 struct rte_hash_bucket *sec_bkt,
670                 const struct rte_hash_key *key, void *data,
671                 uint16_t sig, uint32_t new_idx,
672                 int32_t *ret_val)
673 {
674         unsigned int i;
675         struct rte_hash_bucket *cur_bkt;
676         int32_t ret;
677
678         __hash_rw_writer_lock(h);
679         /* Check if key was inserted after last check but before this
680          * protected region in case of inserting duplicated keys.
681          */
682         ret = search_and_update(h, data, key, prim_bkt, sig);
683         if (ret != -1) {
684                 __hash_rw_writer_unlock(h);
685                 *ret_val = ret;
686                 return 1;
687         }
688
689         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
690                 ret = search_and_update(h, data, key, cur_bkt, sig);
691                 if (ret != -1) {
692                         __hash_rw_writer_unlock(h);
693                         *ret_val = ret;
694                         return 1;
695                 }
696         }
697
698         /* Insert new entry if there is room in the primary
699          * bucket.
700          */
701         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
702                 /* Check if slot is available */
703                 if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
704                         prim_bkt->sig_current[i] = sig;
705                         /* Key can be of arbitrary length, so it is
706                          * not possible to store it atomically.
707                          * Hence the new key element's memory stores
708                          * (key as well as data) should be complete
709                          * before it is referenced.
710                          */
711                         __atomic_store_n(&prim_bkt->key_idx[i],
712                                          new_idx,
713                                          __ATOMIC_RELEASE);
714                         break;
715                 }
716         }
717         __hash_rw_writer_unlock(h);
718
719         if (i != RTE_HASH_BUCKET_ENTRIES)
720                 return 0;
721
722         /* no empty entry */
723         return -1;
724 }
725
726 /* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
727  * the path head with new entry (sig, alt_hash, new_idx)
728  * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
729  * return 0 if succeeds.
730  */
731 static inline int
732 rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
733                         struct rte_hash_bucket *bkt,
734                         struct rte_hash_bucket *alt_bkt,
735                         const struct rte_hash_key *key, void *data,
736                         struct queue_node *leaf, uint32_t leaf_slot,
737                         uint16_t sig, uint32_t new_idx,
738                         int32_t *ret_val)
739 {
740         uint32_t prev_alt_bkt_idx;
741         struct rte_hash_bucket *cur_bkt;
742         struct queue_node *prev_node, *curr_node = leaf;
743         struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
744         uint32_t prev_slot, curr_slot = leaf_slot;
745         int32_t ret;
746
747         __hash_rw_writer_lock(h);
748
749         /* In case empty slot was gone before entering protected region */
750         if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
751                 __hash_rw_writer_unlock(h);
752                 return -1;
753         }
754
755         /* Check if key was inserted after last check but before this
756          * protected region.
757          */
758         ret = search_and_update(h, data, key, bkt, sig);
759         if (ret != -1) {
760                 __hash_rw_writer_unlock(h);
761                 *ret_val = ret;
762                 return 1;
763         }
764
765         FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
766                 ret = search_and_update(h, data, key, cur_bkt, sig);
767                 if (ret != -1) {
768                         __hash_rw_writer_unlock(h);
769                         *ret_val = ret;
770                         return 1;
771                 }
772         }
773
774         while (likely(curr_node->prev != NULL)) {
775                 prev_node = curr_node->prev;
776                 prev_bkt = prev_node->bkt;
777                 prev_slot = curr_node->prev_slot;
778
779                 prev_alt_bkt_idx = get_alt_bucket_index(h,
780                                         prev_node->cur_bkt_idx,
781                                         prev_bkt->sig_current[prev_slot]);
782
783                 if (unlikely(&h->buckets[prev_alt_bkt_idx]
784                                 != curr_bkt)) {
785                         /* revert it to empty, otherwise duplicated keys */
786                         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
787                                 EMPTY_SLOT,
788                                 __ATOMIC_RELEASE);
789                         __hash_rw_writer_unlock(h);
790                         return -1;
791                 }
792
793                 if (h->readwrite_concur_lf_support) {
794                         /* Inform the previous move. The current move need
795                          * not be informed now as the current bucket entry
796                          * is present in both primary and secondary.
797                          * Since there is one writer, load acquires on
798                          * tbl_chng_cnt are not required.
799                          */
800                         __atomic_store_n(h->tbl_chng_cnt,
801                                          *h->tbl_chng_cnt + 1,
802                                          __ATOMIC_RELEASE);
803                         /* The stores to sig_alt and sig_current should not
804                          * move above the store to tbl_chng_cnt.
805                          */
806                         __atomic_thread_fence(__ATOMIC_RELEASE);
807                 }
808
809                 /* Need to swap current/alt sig to allow later
810                  * Cuckoo insert to move elements back to its
811                  * primary bucket if available
812                  */
813                 curr_bkt->sig_current[curr_slot] =
814                         prev_bkt->sig_current[prev_slot];
815                 /* Release the updated bucket entry */
816                 __atomic_store_n(&curr_bkt->key_idx[curr_slot],
817                         prev_bkt->key_idx[prev_slot],
818                         __ATOMIC_RELEASE);
819
820                 curr_slot = prev_slot;
821                 curr_node = prev_node;
822                 curr_bkt = curr_node->bkt;
823         }
824
825         if (h->readwrite_concur_lf_support) {
826                 /* Inform the previous move. The current move need
827                  * not be informed now as the current bucket entry
828                  * is present in both primary and secondary.
829                  * Since there is one writer, load acquires on
830                  * tbl_chng_cnt are not required.
831                  */
832                 __atomic_store_n(h->tbl_chng_cnt,
833                                  *h->tbl_chng_cnt + 1,
834                                  __ATOMIC_RELEASE);
835                 /* The stores to sig_alt and sig_current should not
836                  * move above the store to tbl_chng_cnt.
837                  */
838                 __atomic_thread_fence(__ATOMIC_RELEASE);
839         }
840
841         curr_bkt->sig_current[curr_slot] = sig;
842         /* Release the new bucket entry */
843         __atomic_store_n(&curr_bkt->key_idx[curr_slot],
844                          new_idx,
845                          __ATOMIC_RELEASE);
846
847         __hash_rw_writer_unlock(h);
848
849         return 0;
850
851 }
852
853 /*
854  * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
855  * Cuckoo
856  */
857 static inline int
858 rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
859                         struct rte_hash_bucket *bkt,
860                         struct rte_hash_bucket *sec_bkt,
861                         const struct rte_hash_key *key, void *data,
862                         uint16_t sig, uint32_t bucket_idx,
863                         uint32_t new_idx, int32_t *ret_val)
864 {
865         unsigned int i;
866         struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
867         struct queue_node *tail, *head;
868         struct rte_hash_bucket *curr_bkt, *alt_bkt;
869         uint32_t cur_idx, alt_idx;
870
871         tail = queue;
872         head = queue + 1;
873         tail->bkt = bkt;
874         tail->prev = NULL;
875         tail->prev_slot = -1;
876         tail->cur_bkt_idx = bucket_idx;
877
878         /* Cuckoo bfs Search */
879         while (likely(tail != head && head <
880                                         queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
881                                         RTE_HASH_BUCKET_ENTRIES)) {
882                 curr_bkt = tail->bkt;
883                 cur_idx = tail->cur_bkt_idx;
884                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
885                         if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
886                                 int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
887                                                 bkt, sec_bkt, key, data,
888                                                 tail, i, sig,
889                                                 new_idx, ret_val);
890                                 if (likely(ret != -1))
891                                         return ret;
892                         }
893
894                         /* Enqueue new node and keep prev node info */
895                         alt_idx = get_alt_bucket_index(h, cur_idx,
896                                                 curr_bkt->sig_current[i]);
897                         alt_bkt = &(h->buckets[alt_idx]);
898                         head->bkt = alt_bkt;
899                         head->cur_bkt_idx = alt_idx;
900                         head->prev = tail;
901                         head->prev_slot = i;
902                         head++;
903                 }
904                 tail++;
905         }
906
907         return -ENOSPC;
908 }
909
910 static inline int32_t
911 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
912                                                 hash_sig_t sig, void *data)
913 {
914         uint16_t short_sig;
915         uint32_t prim_bucket_idx, sec_bucket_idx;
916         struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
917         struct rte_hash_key *new_k, *keys = h->key_store;
918         void *slot_id = NULL;
919         void *ext_bkt_id = NULL;
920         uint32_t new_idx, bkt_id;
921         int ret;
922         unsigned n_slots;
923         unsigned lcore_id;
924         unsigned int i;
925         struct lcore_cache *cached_free_slots = NULL;
926         int32_t ret_val;
927         struct rte_hash_bucket *last;
928
929         short_sig = get_short_sig(sig);
930         prim_bucket_idx = get_prim_bucket_index(h, sig);
931         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
932         prim_bkt = &h->buckets[prim_bucket_idx];
933         sec_bkt = &h->buckets[sec_bucket_idx];
934         rte_prefetch0(prim_bkt);
935         rte_prefetch0(sec_bkt);
936
937         /* Check if key is already inserted in primary location */
938         __hash_rw_writer_lock(h);
939         ret = search_and_update(h, data, key, prim_bkt, short_sig);
940         if (ret != -1) {
941                 __hash_rw_writer_unlock(h);
942                 return ret;
943         }
944
945         /* Check if key is already inserted in secondary location */
946         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
947                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
948                 if (ret != -1) {
949                         __hash_rw_writer_unlock(h);
950                         return ret;
951                 }
952         }
953
954         __hash_rw_writer_unlock(h);
955
956         /* Did not find a match, so get a new slot for storing the new key */
957         if (h->use_local_cache) {
958                 lcore_id = rte_lcore_id();
959                 cached_free_slots = &h->local_free_slots[lcore_id];
960                 /* Try to get a free slot from the local cache */
961                 if (cached_free_slots->len == 0) {
962                         /* Need to get another burst of free slots from global ring */
963                         n_slots = rte_ring_mc_dequeue_burst(h->free_slots,
964                                         cached_free_slots->objs,
965                                         LCORE_CACHE_SIZE, NULL);
966                         if (n_slots == 0) {
967                                 return -ENOSPC;
968                         }
969
970                         cached_free_slots->len += n_slots;
971                 }
972
973                 /* Get a free slot from the local cache */
974                 cached_free_slots->len--;
975                 slot_id = cached_free_slots->objs[cached_free_slots->len];
976         } else {
977                 if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
978                         return -ENOSPC;
979                 }
980         }
981
982         new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
983         new_idx = (uint32_t)((uintptr_t) slot_id);
984         /* Copy key */
985         rte_memcpy(new_k->key, key, h->key_len);
986         /* Key can be of arbitrary length, so it is not possible to store
987          * it atomically. Hence the new key element's memory stores
988          * (key as well as data) should be complete before it is referenced.
989          * 'pdata' acts as the synchronization point when an existing hash
990          * entry is updated.
991          */
992         __atomic_store_n(&new_k->pdata,
993                 data,
994                 __ATOMIC_RELEASE);
995
996         /* Find an empty slot and insert */
997         ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
998                                         short_sig, new_idx, &ret_val);
999         if (ret == 0)
1000                 return new_idx - 1;
1001         else if (ret == 1) {
1002                 enqueue_slot_back(h, cached_free_slots, slot_id);
1003                 return ret_val;
1004         }
1005
1006         /* Primary bucket full, need to make space for new entry */
1007         ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
1008                                 short_sig, prim_bucket_idx, new_idx, &ret_val);
1009         if (ret == 0)
1010                 return new_idx - 1;
1011         else if (ret == 1) {
1012                 enqueue_slot_back(h, cached_free_slots, slot_id);
1013                 return ret_val;
1014         }
1015
1016         /* Also search secondary bucket to get better occupancy */
1017         ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
1018                                 short_sig, sec_bucket_idx, new_idx, &ret_val);
1019
1020         if (ret == 0)
1021                 return new_idx - 1;
1022         else if (ret == 1) {
1023                 enqueue_slot_back(h, cached_free_slots, slot_id);
1024                 return ret_val;
1025         }
1026
1027         /* if ext table not enabled, we failed the insertion */
1028         if (!h->ext_table_support) {
1029                 enqueue_slot_back(h, cached_free_slots, slot_id);
1030                 return ret;
1031         }
1032
1033         /* Now we need to go through the extendable bucket. Protection is needed
1034          * to protect all extendable bucket processes.
1035          */
1036         __hash_rw_writer_lock(h);
1037         /* We check for duplicates again since could be inserted before the lock */
1038         ret = search_and_update(h, data, key, prim_bkt, short_sig);
1039         if (ret != -1) {
1040                 enqueue_slot_back(h, cached_free_slots, slot_id);
1041                 goto failure;
1042         }
1043
1044         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1045                 ret = search_and_update(h, data, key, cur_bkt, short_sig);
1046                 if (ret != -1) {
1047                         enqueue_slot_back(h, cached_free_slots, slot_id);
1048                         goto failure;
1049                 }
1050         }
1051
1052         /* Search sec and ext buckets to find an empty entry to insert. */
1053         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1054                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1055                         /* Check if slot is available */
1056                         if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
1057                                 cur_bkt->sig_current[i] = short_sig;
1058                                 cur_bkt->key_idx[i] = new_idx;
1059                                 __hash_rw_writer_unlock(h);
1060                                 return new_idx - 1;
1061                         }
1062                 }
1063         }
1064
1065         /* Failed to get an empty entry from extendable buckets. Link a new
1066          * extendable bucket. We first get a free bucket from ring.
1067          */
1068         if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
1069                 ret = -ENOSPC;
1070                 goto failure;
1071         }
1072
1073         bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
1074         /* Use the first location of the new bucket */
1075         (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
1076         (h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
1077         /* Link the new bucket to sec bucket linked list */
1078         last = rte_hash_get_last_bkt(sec_bkt);
1079         last->next = &h->buckets_ext[bkt_id];
1080         __hash_rw_writer_unlock(h);
1081         return new_idx - 1;
1082
1083 failure:
1084         __hash_rw_writer_unlock(h);
1085         return ret;
1086
1087 }
1088
1089 int32_t
1090 rte_hash_add_key_with_hash(const struct rte_hash *h,
1091                         const void *key, hash_sig_t sig)
1092 {
1093         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1094         return __rte_hash_add_key_with_hash(h, key, sig, 0);
1095 }
1096
1097 int32_t
1098 rte_hash_add_key(const struct rte_hash *h, const void *key)
1099 {
1100         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1101         return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0);
1102 }
1103
1104 int
1105 rte_hash_add_key_with_hash_data(const struct rte_hash *h,
1106                         const void *key, hash_sig_t sig, void *data)
1107 {
1108         int ret;
1109
1110         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1111         ret = __rte_hash_add_key_with_hash(h, key, sig, data);
1112         if (ret >= 0)
1113                 return 0;
1114         else
1115                 return ret;
1116 }
1117
1118 int
1119 rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
1120 {
1121         int ret;
1122
1123         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1124
1125         ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data);
1126         if (ret >= 0)
1127                 return 0;
1128         else
1129                 return ret;
1130 }
1131
1132 /* Search one bucket to find the match key */
1133 static inline int32_t
1134 search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig,
1135                         void **data, const struct rte_hash_bucket *bkt)
1136 {
1137         int i;
1138         uint32_t key_idx;
1139         void *pdata;
1140         struct rte_hash_key *k, *keys = h->key_store;
1141
1142         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1143                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1144                                           __ATOMIC_ACQUIRE);
1145                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1146                         k = (struct rte_hash_key *) ((char *)keys +
1147                                         key_idx * h->key_entry_size);
1148                         pdata = __atomic_load_n(&k->pdata,
1149                                         __ATOMIC_ACQUIRE);
1150
1151                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1152                                 if (data != NULL)
1153                                         *data = pdata;
1154                                 /*
1155                                  * Return index where key is stored,
1156                                  * subtracting the first dummy index
1157                                  */
1158                                 return key_idx - 1;
1159                         }
1160                 }
1161         }
1162         return -1;
1163 }
1164
1165 static inline int32_t
1166 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
1167                                         hash_sig_t sig, void **data)
1168 {
1169         uint32_t prim_bucket_idx, sec_bucket_idx;
1170         struct rte_hash_bucket *bkt, *cur_bkt;
1171         uint32_t cnt_b, cnt_a;
1172         int ret;
1173         uint16_t short_sig;
1174
1175         short_sig = get_short_sig(sig);
1176         prim_bucket_idx = get_prim_bucket_index(h, sig);
1177         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1178
1179         __hash_rw_reader_lock(h);
1180
1181         do {
1182                 /* Load the table change counter before the lookup
1183                  * starts. Acquire semantics will make sure that
1184                  * loads in search_one_bucket are not hoisted.
1185                  */
1186                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1187                                 __ATOMIC_ACQUIRE);
1188
1189                 /* Check if key is in primary location */
1190                 bkt = &h->buckets[prim_bucket_idx];
1191                 ret = search_one_bucket(h, key, short_sig, data, bkt);
1192                 if (ret != -1) {
1193                         __hash_rw_reader_unlock(h);
1194                         return ret;
1195                 }
1196                 /* Calculate secondary hash */
1197                 bkt = &h->buckets[sec_bucket_idx];
1198
1199                 /* Check if key is in secondary location */
1200                 FOR_EACH_BUCKET(cur_bkt, bkt) {
1201                         ret = search_one_bucket(h, key, short_sig,
1202                                                 data, cur_bkt);
1203                         if (ret != -1) {
1204                                 __hash_rw_reader_unlock(h);
1205                                 return ret;
1206                         }
1207                 }
1208
1209                 /* The loads of sig_current in search_one_bucket
1210                  * should not move below the load from tbl_chng_cnt.
1211                  */
1212                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1213                 /* Re-read the table change counter to check if the
1214                  * table has changed during search. If yes, re-do
1215                  * the search.
1216                  * This load should not get hoisted. The load
1217                  * acquires on cnt_b, key index in primary bucket
1218                  * and key index in secondary bucket will make sure
1219                  * that it does not get hoisted.
1220                  */
1221                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1222                                         __ATOMIC_ACQUIRE);
1223         } while (cnt_b != cnt_a);
1224
1225         __hash_rw_reader_unlock(h);
1226
1227         return -ENOENT;
1228 }
1229
1230 int32_t
1231 rte_hash_lookup_with_hash(const struct rte_hash *h,
1232                         const void *key, hash_sig_t sig)
1233 {
1234         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1235         return __rte_hash_lookup_with_hash(h, key, sig, NULL);
1236 }
1237
1238 int32_t
1239 rte_hash_lookup(const struct rte_hash *h, const void *key)
1240 {
1241         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1242         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL);
1243 }
1244
1245 int
1246 rte_hash_lookup_with_hash_data(const struct rte_hash *h,
1247                         const void *key, hash_sig_t sig, void **data)
1248 {
1249         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1250         return __rte_hash_lookup_with_hash(h, key, sig, data);
1251 }
1252
1253 int
1254 rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data)
1255 {
1256         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1257         return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data);
1258 }
1259
1260 static inline void
1261 remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
1262 {
1263         unsigned lcore_id, n_slots;
1264         struct lcore_cache *cached_free_slots;
1265
1266         if (h->use_local_cache) {
1267                 lcore_id = rte_lcore_id();
1268                 cached_free_slots = &h->local_free_slots[lcore_id];
1269                 /* Cache full, need to free it. */
1270                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1271                         /* Need to enqueue the free slots in global ring. */
1272                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1273                                                 cached_free_slots->objs,
1274                                                 LCORE_CACHE_SIZE, NULL);
1275                         cached_free_slots->len -= n_slots;
1276                 }
1277                 /* Put index of new free slot in cache. */
1278                 cached_free_slots->objs[cached_free_slots->len] =
1279                                 (void *)((uintptr_t)bkt->key_idx[i]);
1280                 cached_free_slots->len++;
1281         } else {
1282                 rte_ring_sp_enqueue(h->free_slots,
1283                                 (void *)((uintptr_t)bkt->key_idx[i]));
1284         }
1285 }
1286
1287 /* Compact the linked list by moving key from last entry in linked list to the
1288  * empty slot.
1289  */
1290 static inline void
1291 __rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
1292         int i;
1293         struct rte_hash_bucket *last_bkt;
1294
1295         if (!cur_bkt->next)
1296                 return;
1297
1298         last_bkt = rte_hash_get_last_bkt(cur_bkt);
1299
1300         for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
1301                 if (last_bkt->key_idx[i] != EMPTY_SLOT) {
1302                         cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
1303                         cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
1304                         last_bkt->sig_current[i] = NULL_SIGNATURE;
1305                         last_bkt->key_idx[i] = EMPTY_SLOT;
1306                         return;
1307                 }
1308         }
1309 }
1310
1311 /* Search one bucket and remove the matched key.
1312  * Writer is expected to hold the lock while calling this
1313  * function.
1314  */
1315 static inline int32_t
1316 search_and_remove(const struct rte_hash *h, const void *key,
1317                         struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
1318 {
1319         struct rte_hash_key *k, *keys = h->key_store;
1320         unsigned int i;
1321         uint32_t key_idx;
1322
1323         /* Check if key is in bucket */
1324         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1325                 key_idx = __atomic_load_n(&bkt->key_idx[i],
1326                                           __ATOMIC_ACQUIRE);
1327                 if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
1328                         k = (struct rte_hash_key *) ((char *)keys +
1329                                         key_idx * h->key_entry_size);
1330                         if (rte_hash_cmp_eq(key, k->key, h) == 0) {
1331                                 bkt->sig_current[i] = NULL_SIGNATURE;
1332                                 /* Free the key store index if
1333                                  * no_free_on_del is disabled.
1334                                  */
1335                                 if (!h->no_free_on_del)
1336                                         remove_entry(h, bkt, i);
1337
1338                                 __atomic_store_n(&bkt->key_idx[i],
1339                                                  EMPTY_SLOT,
1340                                                  __ATOMIC_RELEASE);
1341
1342                                 *pos = i;
1343                                 /*
1344                                  * Return index where key is stored,
1345                                  * subtracting the first dummy index
1346                                  */
1347                                 return key_idx - 1;
1348                         }
1349                 }
1350         }
1351         return -1;
1352 }
1353
1354 static inline int32_t
1355 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
1356                                                 hash_sig_t sig)
1357 {
1358         uint32_t prim_bucket_idx, sec_bucket_idx;
1359         struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
1360         struct rte_hash_bucket *cur_bkt;
1361         int pos;
1362         int32_t ret, i;
1363         uint16_t short_sig;
1364
1365         short_sig = get_short_sig(sig);
1366         prim_bucket_idx = get_prim_bucket_index(h, sig);
1367         sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
1368         prim_bkt = &h->buckets[prim_bucket_idx];
1369
1370         __hash_rw_writer_lock(h);
1371         /* look for key in primary bucket */
1372         ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
1373         if (ret != -1) {
1374                 __rte_hash_compact_ll(prim_bkt, pos);
1375                 last_bkt = prim_bkt->next;
1376                 prev_bkt = prim_bkt;
1377                 goto return_bkt;
1378         }
1379
1380         /* Calculate secondary hash */
1381         sec_bkt = &h->buckets[sec_bucket_idx];
1382
1383         FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
1384                 ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
1385                 if (ret != -1) {
1386                         __rte_hash_compact_ll(cur_bkt, pos);
1387                         last_bkt = sec_bkt->next;
1388                         prev_bkt = sec_bkt;
1389                         goto return_bkt;
1390                 }
1391         }
1392
1393         __hash_rw_writer_unlock(h);
1394         return -ENOENT;
1395
1396 /* Search last bucket to see if empty to be recycled */
1397 return_bkt:
1398         if (!last_bkt) {
1399                 __hash_rw_writer_unlock(h);
1400                 return ret;
1401         }
1402         while (last_bkt->next) {
1403                 prev_bkt = last_bkt;
1404                 last_bkt = last_bkt->next;
1405         }
1406
1407         for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1408                 if (last_bkt->key_idx[i] != EMPTY_SLOT)
1409                         break;
1410         }
1411         /* found empty bucket and recycle */
1412         if (i == RTE_HASH_BUCKET_ENTRIES) {
1413                 prev_bkt->next = last_bkt->next = NULL;
1414                 uint32_t index = last_bkt - h->buckets_ext + 1;
1415                 rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
1416         }
1417
1418         __hash_rw_writer_unlock(h);
1419         return ret;
1420 }
1421
1422 int32_t
1423 rte_hash_del_key_with_hash(const struct rte_hash *h,
1424                         const void *key, hash_sig_t sig)
1425 {
1426         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1427         return __rte_hash_del_key_with_hash(h, key, sig);
1428 }
1429
1430 int32_t
1431 rte_hash_del_key(const struct rte_hash *h, const void *key)
1432 {
1433         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1434         return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
1435 }
1436
1437 int
1438 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
1439                                void **key)
1440 {
1441         RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
1442
1443         struct rte_hash_key *k, *keys = h->key_store;
1444         k = (struct rte_hash_key *) ((char *) keys + (position + 1) *
1445                                      h->key_entry_size);
1446         *key = k->key;
1447
1448         if (position !=
1449             __rte_hash_lookup_with_hash(h, *key, rte_hash_hash(h, *key),
1450                                         NULL)) {
1451                 return -ENOENT;
1452         }
1453
1454         return 0;
1455 }
1456
1457 int __rte_experimental
1458 rte_hash_free_key_with_position(const struct rte_hash *h,
1459                                 const int32_t position)
1460 {
1461         RETURN_IF_TRUE(((h == NULL) || (position == EMPTY_SLOT)), -EINVAL);
1462
1463         unsigned int lcore_id, n_slots;
1464         struct lcore_cache *cached_free_slots;
1465         const int32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
1466
1467         /* Out of bounds */
1468         if (position >= total_entries)
1469                 return -EINVAL;
1470
1471         if (h->use_local_cache) {
1472                 lcore_id = rte_lcore_id();
1473                 cached_free_slots = &h->local_free_slots[lcore_id];
1474                 /* Cache full, need to free it. */
1475                 if (cached_free_slots->len == LCORE_CACHE_SIZE) {
1476                         /* Need to enqueue the free slots in global ring. */
1477                         n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
1478                                                 cached_free_slots->objs,
1479                                                 LCORE_CACHE_SIZE, NULL);
1480                         cached_free_slots->len -= n_slots;
1481                 }
1482                 /* Put index of new free slot in cache. */
1483                 cached_free_slots->objs[cached_free_slots->len] =
1484                                         (void *)((uintptr_t)position);
1485                 cached_free_slots->len++;
1486         } else {
1487                 rte_ring_sp_enqueue(h->free_slots,
1488                                 (void *)((uintptr_t)position));
1489         }
1490
1491         return 0;
1492 }
1493
1494 static inline void
1495 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
1496                         const struct rte_hash_bucket *prim_bkt,
1497                         const struct rte_hash_bucket *sec_bkt,
1498                         uint16_t sig,
1499                         enum rte_hash_sig_compare_function sig_cmp_fn)
1500 {
1501         unsigned int i;
1502
1503         /* For match mask the first bit of every two bits indicates the match */
1504         switch (sig_cmp_fn) {
1505 #ifdef RTE_MACHINE_CPUFLAG_SSE2
1506         case RTE_HASH_COMPARE_SSE:
1507                 /* Compare all signatures in the bucket */
1508                 *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1509                                 _mm_load_si128(
1510                                         (__m128i const *)prim_bkt->sig_current),
1511                                 _mm_set1_epi16(sig)));
1512                 /* Compare all signatures in the bucket */
1513                 *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
1514                                 _mm_load_si128(
1515                                         (__m128i const *)sec_bkt->sig_current),
1516                                 _mm_set1_epi16(sig)));
1517                 break;
1518 #endif
1519         default:
1520                 for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
1521                         *prim_hash_matches |=
1522                                 ((sig == prim_bkt->sig_current[i]) << (i << 1));
1523                         *sec_hash_matches |=
1524                                 ((sig == sec_bkt->sig_current[i]) << (i << 1));
1525                 }
1526         }
1527 }
1528
1529 #define PREFETCH_OFFSET 4
1530 static inline void
1531 __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1532                         int32_t num_keys, int32_t *positions,
1533                         uint64_t *hit_mask, void *data[])
1534 {
1535         uint64_t hits = 0;
1536         int32_t i;
1537         int32_t ret;
1538         uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
1539         uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
1540         uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
1541         uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
1542         const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1543         const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
1544         uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1545         uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
1546         struct rte_hash_bucket *cur_bkt, *next_bkt;
1547         void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
1548         uint32_t cnt_b, cnt_a;
1549
1550         /* Prefetch first keys */
1551         for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
1552                 rte_prefetch0(keys[i]);
1553
1554         /*
1555          * Prefetch rest of the keys, calculate primary and
1556          * secondary bucket and prefetch them
1557          */
1558         for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) {
1559                 rte_prefetch0(keys[i + PREFETCH_OFFSET]);
1560
1561                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1562
1563                 sig[i] = get_short_sig(prim_hash[i]);
1564                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1565                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1566
1567                 primary_bkt[i] = &h->buckets[prim_index[i]];
1568                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1569
1570                 rte_prefetch0(primary_bkt[i]);
1571                 rte_prefetch0(secondary_bkt[i]);
1572         }
1573
1574         /* Calculate and prefetch rest of the buckets */
1575         for (; i < num_keys; i++) {
1576                 prim_hash[i] = rte_hash_hash(h, keys[i]);
1577
1578                 sig[i] = get_short_sig(prim_hash[i]);
1579                 prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
1580                 sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
1581
1582                 primary_bkt[i] = &h->buckets[prim_index[i]];
1583                 secondary_bkt[i] = &h->buckets[sec_index[i]];
1584
1585                 rte_prefetch0(primary_bkt[i]);
1586                 rte_prefetch0(secondary_bkt[i]);
1587         }
1588
1589         __hash_rw_reader_lock(h);
1590         do {
1591                 /* Load the table change counter before the lookup
1592                  * starts. Acquire semantics will make sure that
1593                  * loads in compare_signatures are not hoisted.
1594                  */
1595                 cnt_b = __atomic_load_n(h->tbl_chng_cnt,
1596                                         __ATOMIC_ACQUIRE);
1597
1598                 /* Compare signatures and prefetch key slot of first hit */
1599                 for (i = 0; i < num_keys; i++) {
1600                         compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
1601                                 primary_bkt[i], secondary_bkt[i],
1602                                 sig[i], h->sig_cmp_fn);
1603
1604                         if (prim_hitmask[i]) {
1605                                 uint32_t first_hit =
1606                                                 __builtin_ctzl(prim_hitmask[i])
1607                                                 >> 1;
1608                                 uint32_t key_idx =
1609                                         primary_bkt[i]->key_idx[first_hit];
1610                                 const struct rte_hash_key *key_slot =
1611                                         (const struct rte_hash_key *)(
1612                                         (const char *)h->key_store +
1613                                         key_idx * h->key_entry_size);
1614                                 rte_prefetch0(key_slot);
1615                                 continue;
1616                         }
1617
1618                         if (sec_hitmask[i]) {
1619                                 uint32_t first_hit =
1620                                                 __builtin_ctzl(sec_hitmask[i])
1621                                                 >> 1;
1622                                 uint32_t key_idx =
1623                                         secondary_bkt[i]->key_idx[first_hit];
1624                                 const struct rte_hash_key *key_slot =
1625                                         (const struct rte_hash_key *)(
1626                                         (const char *)h->key_store +
1627                                         key_idx * h->key_entry_size);
1628                                 rte_prefetch0(key_slot);
1629                         }
1630                 }
1631
1632                 /* Compare keys, first hits in primary first */
1633                 for (i = 0; i < num_keys; i++) {
1634                         positions[i] = -ENOENT;
1635                         while (prim_hitmask[i]) {
1636                                 uint32_t hit_index =
1637                                                 __builtin_ctzl(prim_hitmask[i])
1638                                                 >> 1;
1639                                 uint32_t key_idx =
1640                                 __atomic_load_n(
1641                                         &primary_bkt[i]->key_idx[hit_index],
1642                                         __ATOMIC_ACQUIRE);
1643                                 const struct rte_hash_key *key_slot =
1644                                         (const struct rte_hash_key *)(
1645                                         (const char *)h->key_store +
1646                                         key_idx * h->key_entry_size);
1647
1648                                 if (key_idx != EMPTY_SLOT)
1649                                         pdata[i] = __atomic_load_n(
1650                                                         &key_slot->pdata,
1651                                                         __ATOMIC_ACQUIRE);
1652                                 /*
1653                                  * If key index is 0, do not compare key,
1654                                  * as it is checking the dummy slot
1655                                  */
1656                                 if (!!key_idx &
1657                                         !rte_hash_cmp_eq(
1658                                                 key_slot->key, keys[i], h)) {
1659                                         if (data != NULL)
1660                                                 data[i] = pdata[i];
1661
1662                                         hits |= 1ULL << i;
1663                                         positions[i] = key_idx - 1;
1664                                         goto next_key;
1665                                 }
1666                                 prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
1667                         }
1668
1669                         while (sec_hitmask[i]) {
1670                                 uint32_t hit_index =
1671                                                 __builtin_ctzl(sec_hitmask[i])
1672                                                 >> 1;
1673                                 uint32_t key_idx =
1674                                 __atomic_load_n(
1675                                         &secondary_bkt[i]->key_idx[hit_index],
1676                                         __ATOMIC_ACQUIRE);
1677                                 const struct rte_hash_key *key_slot =
1678                                         (const struct rte_hash_key *)(
1679                                         (const char *)h->key_store +
1680                                         key_idx * h->key_entry_size);
1681
1682                                 if (key_idx != EMPTY_SLOT)
1683                                         pdata[i] = __atomic_load_n(
1684                                                         &key_slot->pdata,
1685                                                         __ATOMIC_ACQUIRE);
1686                                 /*
1687                                  * If key index is 0, do not compare key,
1688                                  * as it is checking the dummy slot
1689                                  */
1690
1691                                 if (!!key_idx &
1692                                         !rte_hash_cmp_eq(
1693                                                 key_slot->key, keys[i], h)) {
1694                                         if (data != NULL)
1695                                                 data[i] = pdata[i];
1696
1697                                         hits |= 1ULL << i;
1698                                         positions[i] = key_idx - 1;
1699                                         goto next_key;
1700                                 }
1701                                 sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
1702                         }
1703 next_key:
1704                         continue;
1705                 }
1706
1707                 /* The loads of sig_current in compare_signatures
1708                  * should not move below the load from tbl_chng_cnt.
1709                  */
1710                 __atomic_thread_fence(__ATOMIC_ACQUIRE);
1711                 /* Re-read the table change counter to check if the
1712                  * table has changed during search. If yes, re-do
1713                  * the search.
1714                  * This load should not get hoisted. The load
1715                  * acquires on cnt_b, primary key index and secondary
1716                  * key index will make sure that it does not get
1717                  * hoisted.
1718                  */
1719                 cnt_a = __atomic_load_n(h->tbl_chng_cnt,
1720                                         __ATOMIC_ACQUIRE);
1721         } while (cnt_b != cnt_a);
1722
1723         /* all found, do not need to go through ext bkt */
1724         if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
1725                 if (hit_mask != NULL)
1726                         *hit_mask = hits;
1727                 __hash_rw_reader_unlock(h);
1728                 return;
1729         }
1730
1731         /* need to check ext buckets for match */
1732         for (i = 0; i < num_keys; i++) {
1733                 if ((hits & (1ULL << i)) != 0)
1734                         continue;
1735                 next_bkt = secondary_bkt[i]->next;
1736                 FOR_EACH_BUCKET(cur_bkt, next_bkt) {
1737                         if (data != NULL)
1738                                 ret = search_one_bucket(h, keys[i],
1739                                                 sig[i], &data[i], cur_bkt);
1740                         else
1741                                 ret = search_one_bucket(h, keys[i],
1742                                                 sig[i], NULL, cur_bkt);
1743                         if (ret != -1) {
1744                                 positions[i] = ret;
1745                                 hits |= 1ULL << i;
1746                                 break;
1747                         }
1748                 }
1749         }
1750
1751         __hash_rw_reader_unlock(h);
1752
1753         if (hit_mask != NULL)
1754                 *hit_mask = hits;
1755 }
1756
1757 int
1758 rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
1759                       uint32_t num_keys, int32_t *positions)
1760 {
1761         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1762                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1763                         (positions == NULL)), -EINVAL);
1764
1765         __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL);
1766         return 0;
1767 }
1768
1769 int
1770 rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
1771                       uint32_t num_keys, uint64_t *hit_mask, void *data[])
1772 {
1773         RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
1774                         (num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
1775                         (hit_mask == NULL)), -EINVAL);
1776
1777         int32_t positions[num_keys];
1778
1779         __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data);
1780
1781         /* Return number of hits */
1782         return __builtin_popcountl(*hit_mask);
1783 }
1784
1785 int32_t
1786 rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
1787 {
1788         uint32_t bucket_idx, idx, position;
1789         struct rte_hash_key *next_key;
1790
1791         RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
1792
1793         const uint32_t total_entries_main = h->num_buckets *
1794                                                         RTE_HASH_BUCKET_ENTRIES;
1795         const uint32_t total_entries = total_entries_main << 1;
1796
1797         /* Out of bounds of all buckets (both main table and ext table) */
1798         if (*next >= total_entries_main)
1799                 goto extend_table;
1800
1801         /* Calculate bucket and index of current iterator */
1802         bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1803         idx = *next % RTE_HASH_BUCKET_ENTRIES;
1804
1805         /* If current position is empty, go to the next one */
1806         while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
1807                                         __ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
1808                 (*next)++;
1809                 /* End of table */
1810                 if (*next == total_entries_main)
1811                         goto extend_table;
1812                 bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
1813                 idx = *next % RTE_HASH_BUCKET_ENTRIES;
1814         }
1815
1816         __hash_rw_reader_lock(h);
1817         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1818                                 position * h->key_entry_size);
1819         /* Return key and data */
1820         *key = next_key->key;
1821         *data = next_key->pdata;
1822
1823         __hash_rw_reader_unlock(h);
1824
1825         /* Increment iterator */
1826         (*next)++;
1827
1828         return position - 1;
1829
1830 /* Begin to iterate extendable buckets */
1831 extend_table:
1832         /* Out of total bound or if ext bucket feature is not enabled */
1833         if (*next >= total_entries || !h->ext_table_support)
1834                 return -ENOENT;
1835
1836         bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
1837         idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1838
1839         while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
1840                 (*next)++;
1841                 if (*next == total_entries)
1842                         return -ENOENT;
1843                 bucket_idx = (*next - total_entries_main) /
1844                                                 RTE_HASH_BUCKET_ENTRIES;
1845                 idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
1846         }
1847         __hash_rw_reader_lock(h);
1848         next_key = (struct rte_hash_key *) ((char *)h->key_store +
1849                                 position * h->key_entry_size);
1850         /* Return key and data */
1851         *key = next_key->key;
1852         *data = next_key->pdata;
1853
1854         __hash_rw_reader_unlock(h);
1855
1856         /* Increment iterator */
1857         (*next)++;
1858         return position - 1;
1859 }