New upstream version 18.02
[deb_dpdk.git] / lib / librte_table / rte_table_hash_key8.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <string.h>
5 #include <stdio.h>
6
7 #include <rte_common.h>
8 #include <rte_mbuf.h>
9 #include <rte_memory.h>
10 #include <rte_malloc.h>
11 #include <rte_log.h>
12
13 #include "rte_table_hash.h"
14 #include "rte_lru.h"
15
16 #define KEY_SIZE                                                8
17
18 #define KEYS_PER_BUCKET                                 4
19
20 #ifdef RTE_TABLE_STATS_COLLECT
21
22 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val) \
23         table->stats.n_pkts_in += val
24 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val) \
25         table->stats.n_pkts_lookup_miss += val
26
27 #else
28
29 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val)
30 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val)
31
32 #endif
33
34 struct rte_bucket_4_8 {
35         /* Cache line 0 */
36         uint64_t signature;
37         uint64_t lru_list;
38         struct rte_bucket_4_8 *next;
39         uint64_t next_valid;
40
41         uint64_t key[4];
42
43         /* Cache line 1 */
44         uint8_t data[0];
45 };
46
47 struct rte_table_hash {
48         struct rte_table_stats stats;
49
50         /* Input parameters */
51         uint32_t n_buckets;
52         uint32_t key_size;
53         uint32_t entry_size;
54         uint32_t bucket_size;
55         uint32_t key_offset;
56         uint64_t key_mask;
57         rte_table_hash_op_hash f_hash;
58         uint64_t seed;
59
60         /* Extendible buckets */
61         uint32_t n_buckets_ext;
62         uint32_t stack_pos;
63         uint32_t *stack;
64
65         /* Lookup table */
66         uint8_t memory[0] __rte_cache_aligned;
67 };
68
69 static int
70 keycmp(void *a, void *b, void *b_mask)
71 {
72         uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
73
74         return a64[0] != (b64[0] & b_mask64[0]);
75 }
76
77 static void
78 keycpy(void *dst, void *src, void *src_mask)
79 {
80         uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
81
82         dst64[0] = src64[0] & src_mask64[0];
83 }
84
85 static int
86 check_params_create(struct rte_table_hash_params *params)
87 {
88         /* name */
89         if (params->name == NULL) {
90                 RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
91                 return -EINVAL;
92         }
93
94         /* key_size */
95         if (params->key_size != KEY_SIZE) {
96                 RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
97                 return -EINVAL;
98         }
99
100         /* n_keys */
101         if (params->n_keys == 0) {
102                 RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
103                 return -EINVAL;
104         }
105
106         /* n_buckets */
107         if ((params->n_buckets == 0) ||
108                 (!rte_is_power_of_2(params->n_buckets))) {
109                 RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
110                 return -EINVAL;
111         }
112
113         /* f_hash */
114         if (params->f_hash == NULL) {
115                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
116                         __func__);
117                 return -EINVAL;
118         }
119
120         return 0;
121 }
122
123 static void *
124 rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
125 {
126         struct rte_table_hash_params *p = params;
127         struct rte_table_hash *f;
128         uint64_t bucket_size, total_size;
129         uint32_t n_buckets, i;
130
131         /* Check input parameters */
132         if ((check_params_create(p) != 0) ||
133                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
134                 ((sizeof(struct rte_bucket_4_8) % 64) != 0))
135                 return NULL;
136
137         /*
138          * Table dimensioning
139          *
140          * Objective: Pick the number of buckets (n_buckets) so that there a chance
141          * to store n_keys keys in the table.
142          *
143          * Note: Since the buckets do not get extended, it is not possible to
144          * guarantee that n_keys keys can be stored in the table at any time. In the
145          * worst case scenario when all the n_keys fall into the same bucket, only
146          * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
147          * defeats the purpose of the hash table. It indicates unsuitable f_hash or
148          * n_keys to n_buckets ratio.
149          *
150          * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
151          */
152         n_buckets = rte_align32pow2(
153                 (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
154         n_buckets = RTE_MAX(n_buckets, p->n_buckets);
155
156         /* Memory allocation */
157         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
158                 KEYS_PER_BUCKET * entry_size);
159         total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
160
161         if (total_size > SIZE_MAX) {
162                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
163                         " for hash table %s\n",
164                         __func__, total_size, p->name);
165                 return NULL;
166         }
167
168         f = rte_zmalloc_socket(p->name,
169                 (size_t)total_size,
170                 RTE_CACHE_LINE_SIZE,
171                 socket_id);
172         if (f == NULL) {
173                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
174                         " for hash table %s\n",
175                         __func__, total_size, p->name);
176                 return NULL;
177         }
178
179         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
180                 "is %" PRIu64 " bytes\n",
181                 __func__, p->name, total_size);
182
183         /* Memory initialization */
184         f->n_buckets = n_buckets;
185         f->key_size = KEY_SIZE;
186         f->entry_size = entry_size;
187         f->bucket_size = bucket_size;
188         f->key_offset = p->key_offset;
189         f->f_hash = p->f_hash;
190         f->seed = p->seed;
191
192         if (p->key_mask != NULL)
193                 f->key_mask = ((uint64_t *)p->key_mask)[0];
194         else
195                 f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
196
197         for (i = 0; i < n_buckets; i++) {
198                 struct rte_bucket_4_8 *bucket;
199
200                 bucket = (struct rte_bucket_4_8 *) &f->memory[i *
201                         f->bucket_size];
202                 bucket->lru_list = 0x0000000100020003LLU;
203         }
204
205         return f;
206 }
207
208 static int
209 rte_table_hash_free_key8_lru(void *table)
210 {
211         struct rte_table_hash *f = table;
212
213         /* Check input parameters */
214         if (f == NULL) {
215                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
216                 return -EINVAL;
217         }
218
219         rte_free(f);
220         return 0;
221 }
222
223 static int
224 rte_table_hash_entry_add_key8_lru(
225         void *table,
226         void *key,
227         void *entry,
228         int *key_found,
229         void **entry_ptr)
230 {
231         struct rte_table_hash *f = table;
232         struct rte_bucket_4_8 *bucket;
233         uint64_t signature, mask, pos;
234         uint32_t bucket_index, i;
235
236         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
237         bucket_index = signature & (f->n_buckets - 1);
238         bucket = (struct rte_bucket_4_8 *)
239                 &f->memory[bucket_index * f->bucket_size];
240
241         /* Key is present in the bucket */
242         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
243                 uint64_t bucket_signature = bucket->signature;
244                 uint64_t *bucket_key = &bucket->key[i];
245
246                 if ((bucket_signature & mask) &&
247                         (keycmp(bucket_key, key, &f->key_mask) == 0)) {
248                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
249
250                         memcpy(bucket_data, entry, f->entry_size);
251                         lru_update(bucket, i);
252                         *key_found = 1;
253                         *entry_ptr = (void *) bucket_data;
254                         return 0;
255                 }
256         }
257
258         /* Key is not present in the bucket */
259         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
260                 uint64_t bucket_signature = bucket->signature;
261
262                 if ((bucket_signature & mask) == 0) {
263                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
264
265                         bucket->signature |= mask;
266                         keycpy(&bucket->key[i], key, &f->key_mask);
267                         memcpy(bucket_data, entry, f->entry_size);
268                         lru_update(bucket, i);
269                         *key_found = 0;
270                         *entry_ptr = (void *) bucket_data;
271
272                         return 0;
273                 }
274         }
275
276         /* Bucket full: replace LRU entry */
277         pos = lru_pos(bucket);
278         keycpy(&bucket->key[pos], key, &f->key_mask);
279         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
280         lru_update(bucket, pos);
281         *key_found = 0;
282         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
283
284         return 0;
285 }
286
287 static int
288 rte_table_hash_entry_delete_key8_lru(
289         void *table,
290         void *key,
291         int *key_found,
292         void *entry)
293 {
294         struct rte_table_hash *f = table;
295         struct rte_bucket_4_8 *bucket;
296         uint64_t signature, mask;
297         uint32_t bucket_index, i;
298
299         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
300         bucket_index = signature & (f->n_buckets - 1);
301         bucket = (struct rte_bucket_4_8 *)
302                 &f->memory[bucket_index * f->bucket_size];
303
304         /* Key is present in the bucket */
305         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
306                 uint64_t bucket_signature = bucket->signature;
307                 uint64_t *bucket_key = &bucket->key[i];
308
309                 if ((bucket_signature & mask) &&
310                         (keycmp(bucket_key, key, &f->key_mask) == 0)) {
311                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
312
313                         bucket->signature &= ~mask;
314                         *key_found = 1;
315                         if (entry)
316                                 memcpy(entry, bucket_data, f->entry_size);
317
318                         return 0;
319                 }
320         }
321
322         /* Key is not present in the bucket */
323         *key_found = 0;
324         return 0;
325 }
326
327 static void *
328 rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
329 {
330         struct rte_table_hash_params *p = params;
331         struct rte_table_hash *f;
332         uint64_t bucket_size, stack_size, total_size;
333         uint32_t n_buckets_ext, i;
334
335         /* Check input parameters */
336         if ((check_params_create(p) != 0) ||
337                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
338                 ((sizeof(struct rte_bucket_4_8) % 64) != 0))
339                 return NULL;
340
341         /*
342          * Table dimensioning
343          *
344          * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
345          * it is guaranteed that n_keys keys can be stored in the table at any time.
346          *
347          * The worst case scenario takes place when all the n_keys keys fall into
348          * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
349          * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
350          * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
351          * into a different bucket. This case defeats the purpose of the hash table.
352          * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
353          *
354          * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
355          */
356         n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
357
358         /* Memory allocation */
359         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
360                 KEYS_PER_BUCKET * entry_size);
361         stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
362         total_size = sizeof(struct rte_table_hash) +
363                 (p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
364
365         if (total_size > SIZE_MAX) {
366                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
367                         "for hash table %s\n",
368                         __func__, total_size, p->name);
369                 return NULL;
370         }
371
372         f = rte_zmalloc_socket(p->name,
373                 (size_t)total_size,
374                 RTE_CACHE_LINE_SIZE,
375                 socket_id);
376         if (f == NULL) {
377                 RTE_LOG(ERR, TABLE,
378                         "%s: Cannot allocate %" PRIu64 " bytes "
379                         "for hash table %s\n",
380                         __func__, total_size, p->name);
381                 return NULL;
382         }
383         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
384                 "is %" PRIu64 " bytes\n",
385                 __func__, p->name, total_size);
386
387         /* Memory initialization */
388         f->n_buckets = p->n_buckets;
389         f->key_size = KEY_SIZE;
390         f->entry_size = entry_size;
391         f->bucket_size = bucket_size;
392         f->key_offset = p->key_offset;
393         f->f_hash = p->f_hash;
394         f->seed = p->seed;
395
396         f->n_buckets_ext = n_buckets_ext;
397         f->stack_pos = n_buckets_ext;
398         f->stack = (uint32_t *)
399                 &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
400
401         if (p->key_mask != NULL)
402                 f->key_mask = ((uint64_t *)p->key_mask)[0];
403         else
404                 f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
405
406         for (i = 0; i < n_buckets_ext; i++)
407                 f->stack[i] = i;
408
409         return f;
410 }
411
412 static int
413 rte_table_hash_free_key8_ext(void *table)
414 {
415         struct rte_table_hash *f = table;
416
417         /* Check input parameters */
418         if (f == NULL) {
419                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
420                 return -EINVAL;
421         }
422
423         rte_free(f);
424         return 0;
425 }
426
427 static int
428 rte_table_hash_entry_add_key8_ext(
429         void *table,
430         void *key,
431         void *entry,
432         int *key_found,
433         void **entry_ptr)
434 {
435         struct rte_table_hash *f = table;
436         struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
437         uint64_t signature;
438         uint32_t bucket_index, i;
439
440         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
441         bucket_index = signature & (f->n_buckets - 1);
442         bucket0 = (struct rte_bucket_4_8 *)
443                 &f->memory[bucket_index * f->bucket_size];
444
445         /* Key is present in the bucket */
446         for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
447                 uint64_t mask;
448
449                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
450                         uint64_t bucket_signature = bucket->signature;
451                         uint64_t *bucket_key = &bucket->key[i];
452
453                         if ((bucket_signature & mask) &&
454                                 (keycmp(bucket_key, key, &f->key_mask) == 0)) {
455                                 uint8_t *bucket_data = &bucket->data[i *
456                                         f->entry_size];
457
458                                 memcpy(bucket_data, entry, f->entry_size);
459                                 *key_found = 1;
460                                 *entry_ptr = (void *) bucket_data;
461                                 return 0;
462                         }
463                 }
464         }
465
466         /* Key is not present in the bucket */
467         for (bucket_prev = NULL, bucket = bucket0;
468                 bucket != NULL; bucket_prev = bucket, bucket = bucket->next) {
469                 uint64_t mask;
470
471                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
472                         uint64_t bucket_signature = bucket->signature;
473
474                         if ((bucket_signature & mask) == 0) {
475                                 uint8_t *bucket_data = &bucket->data[i *
476                                         f->entry_size];
477
478                                 bucket->signature |= mask;
479                                 keycpy(&bucket->key[i], key, &f->key_mask);
480                                 memcpy(bucket_data, entry, f->entry_size);
481                                 *key_found = 0;
482                                 *entry_ptr = (void *) bucket_data;
483
484                                 return 0;
485                         }
486                 }
487         }
488
489         /* Bucket full: extend bucket */
490         if (f->stack_pos > 0) {
491                 bucket_index = f->stack[--f->stack_pos];
492
493                 bucket = (struct rte_bucket_4_8 *) &f->memory[(f->n_buckets +
494                         bucket_index) * f->bucket_size];
495                 bucket_prev->next = bucket;
496                 bucket_prev->next_valid = 1;
497
498                 bucket->signature = 1;
499                 keycpy(&bucket->key[0], key, &f->key_mask);
500                 memcpy(&bucket->data[0], entry, f->entry_size);
501                 *key_found = 0;
502                 *entry_ptr = (void *) &bucket->data[0];
503                 return 0;
504         }
505
506         return -ENOSPC;
507 }
508
509 static int
510 rte_table_hash_entry_delete_key8_ext(
511         void *table,
512         void *key,
513         int *key_found,
514         void *entry)
515 {
516         struct rte_table_hash *f = table;
517         struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
518         uint64_t signature;
519         uint32_t bucket_index, i;
520
521         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
522         bucket_index = signature & (f->n_buckets - 1);
523         bucket0 = (struct rte_bucket_4_8 *)
524                 &f->memory[bucket_index * f->bucket_size];
525
526         /* Key is present in the bucket */
527         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
528                 bucket_prev = bucket, bucket = bucket->next) {
529                 uint64_t mask;
530
531                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
532                         uint64_t bucket_signature = bucket->signature;
533                         uint64_t *bucket_key = &bucket->key[i];
534
535                         if ((bucket_signature & mask) &&
536                                 (keycmp(bucket_key, key, &f->key_mask) == 0)) {
537                                 uint8_t *bucket_data = &bucket->data[i *
538                                         f->entry_size];
539
540                                 bucket->signature &= ~mask;
541                                 *key_found = 1;
542                                 if (entry)
543                                         memcpy(entry, bucket_data,
544                                                 f->entry_size);
545
546                                 if ((bucket->signature == 0) &&
547                                     (bucket_prev != NULL)) {
548                                         bucket_prev->next = bucket->next;
549                                         bucket_prev->next_valid =
550                                                 bucket->next_valid;
551
552                                         memset(bucket, 0,
553                                                 sizeof(struct rte_bucket_4_8));
554                                         bucket_index = (((uint8_t *)bucket -
555                                                 (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
556                                         f->stack[f->stack_pos++] = bucket_index;
557                                 }
558
559                                 return 0;
560                         }
561                 }
562         }
563
564         /* Key is not present in the bucket */
565         *key_found = 0;
566         return 0;
567 }
568
569 #define lookup_key8_cmp(key_in, bucket, pos, f)                 \
570 {                                                               \
571         uint64_t xor[4], signature, k;                          \
572                                                                 \
573         signature = ~bucket->signature;                         \
574                                                                 \
575         k = key_in[0] & f->key_mask;                            \
576         xor[0] = (k ^ bucket->key[0]) | (signature & 1);                \
577         xor[1] = (k ^ bucket->key[1]) | (signature & 2);                \
578         xor[2] = (k ^ bucket->key[2]) | (signature & 4);                \
579         xor[3] = (k ^ bucket->key[3]) | (signature & 8);                \
580                                                                 \
581         pos = 4;                                                \
582         if (xor[0] == 0)                                        \
583                 pos = 0;                                        \
584         if (xor[1] == 0)                                        \
585                 pos = 1;                                        \
586         if (xor[2] == 0)                                        \
587                 pos = 2;                                        \
588         if (xor[3] == 0)                                        \
589                 pos = 3;                                        \
590 }
591
592 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)   \
593 {                                                               \
594         uint64_t pkt_mask;                                      \
595         uint32_t key_offset = f->key_offset;\
596                                                                 \
597         pkt0_index = __builtin_ctzll(pkts_mask);                \
598         pkt_mask = 1LLU << pkt0_index;                          \
599         pkts_mask &= ~pkt_mask;                                 \
600                                                                 \
601         mbuf0 = pkts[pkt0_index];                               \
602         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));  \
603 }
604
605 #define lookup1_stage1(mbuf1, bucket1, f)                       \
606 {                                                               \
607         uint64_t *key;                                          \
608         uint64_t signature;                                     \
609         uint32_t bucket_index;                                  \
610                                                                 \
611         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
612         signature = f->f_hash(key, &f->key_mask, KEY_SIZE, f->seed);    \
613         bucket_index = signature & (f->n_buckets - 1);          \
614         bucket1 = (struct rte_bucket_4_8 *)                     \
615                 &f->memory[bucket_index * f->bucket_size];      \
616         rte_prefetch0(bucket1);                                 \
617 }
618
619 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
620         pkts_mask_out, entries, f)                              \
621 {                                                               \
622         void *a;                                                \
623         uint64_t pkt_mask;                                      \
624         uint64_t *key;                                          \
625         uint32_t pos;                                           \
626                                                                 \
627         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
628         lookup_key8_cmp(key, bucket2, pos, f);  \
629                                                                 \
630         pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
631         pkts_mask_out |= pkt_mask;                              \
632                                                                 \
633         a = (void *) &bucket2->data[pos * f->entry_size];       \
634         rte_prefetch0(a);                                       \
635         entries[pkt2_index] = a;                                \
636         lru_update(bucket2, pos);                               \
637 }
638
639 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
640         entries, buckets_mask, buckets, keys, f)                \
641 {                                                               \
642         struct rte_bucket_4_8 *bucket_next;                     \
643         void *a;                                                \
644         uint64_t pkt_mask, bucket_mask;                         \
645         uint64_t *key;                                          \
646         uint32_t pos;                                           \
647                                                                 \
648         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
649         lookup_key8_cmp(key, bucket2, pos, f);  \
650                                                                 \
651         pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
652         pkts_mask_out |= pkt_mask;                              \
653                                                                 \
654         a = (void *) &bucket2->data[pos * f->entry_size];       \
655         rte_prefetch0(a);                                       \
656         entries[pkt2_index] = a;                                \
657                                                                 \
658         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
659         buckets_mask |= bucket_mask;                            \
660         bucket_next = bucket2->next;                            \
661         buckets[pkt2_index] = bucket_next;                      \
662         keys[pkt2_index] = key;                                 \
663 }
664
665 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
666         buckets_mask, f)                                        \
667 {                                                               \
668         struct rte_bucket_4_8 *bucket, *bucket_next;            \
669         void *a;                                                \
670         uint64_t pkt_mask, bucket_mask;                         \
671         uint64_t *key;                                          \
672         uint32_t pos;                                           \
673                                                                 \
674         bucket = buckets[pkt_index];                            \
675         key = keys[pkt_index];                                  \
676         lookup_key8_cmp(key, bucket, pos, f);                   \
677                                                                 \
678         pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\
679         pkts_mask_out |= pkt_mask;                              \
680                                                                 \
681         a = (void *) &bucket->data[pos * f->entry_size];        \
682         rte_prefetch0(a);                                       \
683         entries[pkt_index] = a;                                 \
684                                                                 \
685         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
686         buckets_mask |= bucket_mask;                            \
687         bucket_next = bucket->next;                             \
688         rte_prefetch0(bucket_next);                             \
689         buckets[pkt_index] = bucket_next;                       \
690         keys[pkt_index] = key;                                  \
691 }
692
693 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
694         pkts, pkts_mask, f)                                     \
695 {                                                               \
696         uint64_t pkt00_mask, pkt01_mask;                        \
697         uint32_t key_offset = f->key_offset;            \
698                                                                 \
699         pkt00_index = __builtin_ctzll(pkts_mask);               \
700         pkt00_mask = 1LLU << pkt00_index;                       \
701         pkts_mask &= ~pkt00_mask;                               \
702                                                                 \
703         mbuf00 = pkts[pkt00_index];                             \
704         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
705                                                                 \
706         pkt01_index = __builtin_ctzll(pkts_mask);               \
707         pkt01_mask = 1LLU << pkt01_index;                       \
708         pkts_mask &= ~pkt01_mask;                               \
709                                                                 \
710         mbuf01 = pkts[pkt01_index];                             \
711         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
712 }
713
714 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
715         mbuf00, mbuf01, pkts, pkts_mask, f)                     \
716 {                                                               \
717         uint64_t pkt00_mask, pkt01_mask;                        \
718         uint32_t key_offset = f->key_offset;            \
719                                                                 \
720         pkt00_index = __builtin_ctzll(pkts_mask);               \
721         pkt00_mask = 1LLU << pkt00_index;                       \
722         pkts_mask &= ~pkt00_mask;                               \
723                                                                 \
724         mbuf00 = pkts[pkt00_index];                             \
725         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
726                                                                 \
727         pkt01_index = __builtin_ctzll(pkts_mask);               \
728         if (pkts_mask == 0)                                     \
729                 pkt01_index = pkt00_index;                      \
730                                                                 \
731         pkt01_mask = 1LLU << pkt01_index;                       \
732         pkts_mask &= ~pkt01_mask;                               \
733                                                                 \
734         mbuf01 = pkts[pkt01_index];                             \
735         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
736 }
737
738 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)\
739 {                                                               \
740         uint64_t *key10, *key11;                                \
741         uint64_t signature10, signature11;                      \
742         uint32_t bucket10_index, bucket11_index;                \
743         rte_table_hash_op_hash f_hash = f->f_hash;              \
744         uint64_t seed = f->seed;                                \
745         uint32_t key_offset = f->key_offset;                    \
746                                                                 \
747         key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\
748         key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\
749                                                                 \
750         signature10 = f_hash(key10, &f->key_mask, KEY_SIZE, seed);      \
751         bucket10_index = signature10 & (f->n_buckets - 1);      \
752         bucket10 = (struct rte_bucket_4_8 *)                    \
753                 &f->memory[bucket10_index * f->bucket_size];    \
754         rte_prefetch0(bucket10);                                \
755                                                                 \
756         signature11 = f_hash(key11, &f->key_mask, KEY_SIZE, seed);      \
757         bucket11_index = signature11 & (f->n_buckets - 1);      \
758         bucket11 = (struct rte_bucket_4_8 *)                    \
759                 &f->memory[bucket11_index * f->bucket_size];    \
760         rte_prefetch0(bucket11);                                \
761 }
762
763 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
764         bucket20, bucket21, pkts_mask_out, entries, f)          \
765 {                                                               \
766         void *a20, *a21;                                        \
767         uint64_t pkt20_mask, pkt21_mask;                        \
768         uint64_t *key20, *key21;                                \
769         uint32_t pos20, pos21;                                  \
770                                                                 \
771         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
772         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
773                                                                 \
774         lookup_key8_cmp(key20, bucket20, pos20, f);                     \
775         lookup_key8_cmp(key21, bucket21, pos21, f);                     \
776                                                                 \
777         pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
778         pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
779         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
780                                                                 \
781         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
782         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
783         rte_prefetch0(a20);                                     \
784         rte_prefetch0(a21);                                     \
785         entries[pkt20_index] = a20;                             \
786         entries[pkt21_index] = a21;                             \
787         lru_update(bucket20, pos20);                            \
788         lru_update(bucket21, pos21);                            \
789 }
790
791 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
792         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
793 {                                                               \
794         struct rte_bucket_4_8 *bucket20_next, *bucket21_next;   \
795         void *a20, *a21;                                        \
796         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
797         uint64_t *key20, *key21;                                \
798         uint32_t pos20, pos21;                                  \
799                                                                 \
800         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
801         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
802                                                                 \
803         lookup_key8_cmp(key20, bucket20, pos20, f);                     \
804         lookup_key8_cmp(key21, bucket21, pos21, f);                     \
805                                                                 \
806         pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
807         pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
808         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
809                                                                 \
810         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
811         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
812         rte_prefetch0(a20);                                     \
813         rte_prefetch0(a21);                                     \
814         entries[pkt20_index] = a20;                             \
815         entries[pkt21_index] = a21;                             \
816                                                                 \
817         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
818         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
819         buckets_mask |= bucket20_mask | bucket21_mask;          \
820         bucket20_next = bucket20->next;                         \
821         bucket21_next = bucket21->next;                         \
822         buckets[pkt20_index] = bucket20_next;                   \
823         buckets[pkt21_index] = bucket21_next;                   \
824         keys[pkt20_index] = key20;                              \
825         keys[pkt21_index] = key21;                              \
826 }
827
828 static int
829 rte_table_hash_lookup_key8_lru(
830         void *table,
831         struct rte_mbuf **pkts,
832         uint64_t pkts_mask,
833         uint64_t *lookup_hit_mask,
834         void **entries)
835 {
836         struct rte_table_hash *f = (struct rte_table_hash *) table;
837         struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
838         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
839         uint32_t pkt00_index, pkt01_index, pkt10_index;
840         uint32_t pkt11_index, pkt20_index, pkt21_index;
841         uint64_t pkts_mask_out = 0;
842
843         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
844         RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
845
846         /* Cannot run the pipeline with less than 5 packets */
847         if (__builtin_popcountll(pkts_mask) < 5) {
848                 for ( ; pkts_mask; ) {
849                         struct rte_bucket_4_8 *bucket;
850                         struct rte_mbuf *mbuf;
851                         uint32_t pkt_index;
852
853                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
854                         lookup1_stage1(mbuf, bucket, f);
855                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
856                                 pkts_mask_out, entries, f);
857                 }
858
859                 *lookup_hit_mask = pkts_mask_out;
860                 RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
861                 return 0;
862         }
863
864         /*
865          * Pipeline fill
866          *
867          */
868         /* Pipeline stage 0 */
869         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
870                 pkts_mask, f);
871
872         /* Pipeline feed */
873         mbuf10 = mbuf00;
874         mbuf11 = mbuf01;
875         pkt10_index = pkt00_index;
876         pkt11_index = pkt01_index;
877
878         /* Pipeline stage 0 */
879         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
880                 pkts_mask, f);
881
882         /* Pipeline stage 1 */
883         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
884
885         /*
886          * Pipeline run
887          *
888          */
889         for ( ; pkts_mask; ) {
890                 /* Pipeline feed */
891                 bucket20 = bucket10;
892                 bucket21 = bucket11;
893                 mbuf20 = mbuf10;
894                 mbuf21 = mbuf11;
895                 mbuf10 = mbuf00;
896                 mbuf11 = mbuf01;
897                 pkt20_index = pkt10_index;
898                 pkt21_index = pkt11_index;
899                 pkt10_index = pkt00_index;
900                 pkt11_index = pkt01_index;
901
902                 /* Pipeline stage 0 */
903                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
904                         mbuf00, mbuf01, pkts, pkts_mask, f);
905
906                 /* Pipeline stage 1 */
907                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
908
909                 /* Pipeline stage 2 */
910                 lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
911                         bucket20, bucket21, pkts_mask_out, entries, f);
912         }
913
914         /*
915          * Pipeline flush
916          *
917          */
918         /* Pipeline feed */
919         bucket20 = bucket10;
920         bucket21 = bucket11;
921         mbuf20 = mbuf10;
922         mbuf21 = mbuf11;
923         mbuf10 = mbuf00;
924         mbuf11 = mbuf01;
925         pkt20_index = pkt10_index;
926         pkt21_index = pkt11_index;
927         pkt10_index = pkt00_index;
928         pkt11_index = pkt01_index;
929
930         /* Pipeline stage 1 */
931         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
932
933         /* Pipeline stage 2 */
934         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
935                 bucket20, bucket21, pkts_mask_out, entries, f);
936
937         /* Pipeline feed */
938         bucket20 = bucket10;
939         bucket21 = bucket11;
940         mbuf20 = mbuf10;
941         mbuf21 = mbuf11;
942         pkt20_index = pkt10_index;
943         pkt21_index = pkt11_index;
944
945         /* Pipeline stage 2 */
946         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
947                 bucket20, bucket21, pkts_mask_out, entries, f);
948
949         *lookup_hit_mask = pkts_mask_out;
950         RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
951         return 0;
952 } /* lookup LRU */
953
954 static int
955 rte_table_hash_lookup_key8_ext(
956         void *table,
957         struct rte_mbuf **pkts,
958         uint64_t pkts_mask,
959         uint64_t *lookup_hit_mask,
960         void **entries)
961 {
962         struct rte_table_hash *f = (struct rte_table_hash *) table;
963         struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
964         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
965         uint32_t pkt00_index, pkt01_index, pkt10_index;
966         uint32_t pkt11_index, pkt20_index, pkt21_index;
967         uint64_t pkts_mask_out = 0, buckets_mask = 0;
968         struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
969         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
970
971         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
972         RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
973
974         /* Cannot run the pipeline with less than 5 packets */
975         if (__builtin_popcountll(pkts_mask) < 5) {
976                 for ( ; pkts_mask; ) {
977                         struct rte_bucket_4_8 *bucket;
978                         struct rte_mbuf *mbuf;
979                         uint32_t pkt_index;
980
981                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
982                         lookup1_stage1(mbuf, bucket, f);
983                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
984                                 pkts_mask_out, entries, buckets_mask,
985                                 buckets, keys, f);
986                 }
987
988                 goto grind_next_buckets;
989         }
990
991         /*
992          * Pipeline fill
993          *
994          */
995         /* Pipeline stage 0 */
996         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
997                 pkts_mask, f);
998
999         /* Pipeline feed */
1000         mbuf10 = mbuf00;
1001         mbuf11 = mbuf01;
1002         pkt10_index = pkt00_index;
1003         pkt11_index = pkt01_index;
1004
1005         /* Pipeline stage 0 */
1006         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1007                 pkts_mask, f);
1008
1009         /* Pipeline stage 1 */
1010         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1011
1012         /*
1013          * Pipeline run
1014          *
1015          */
1016         for ( ; pkts_mask; ) {
1017                 /* Pipeline feed */
1018                 bucket20 = bucket10;
1019                 bucket21 = bucket11;
1020                 mbuf20 = mbuf10;
1021                 mbuf21 = mbuf11;
1022                 mbuf10 = mbuf00;
1023                 mbuf11 = mbuf01;
1024                 pkt20_index = pkt10_index;
1025                 pkt21_index = pkt11_index;
1026                 pkt10_index = pkt00_index;
1027                 pkt11_index = pkt01_index;
1028
1029                 /* Pipeline stage 0 */
1030                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1031                         mbuf00, mbuf01, pkts, pkts_mask, f);
1032
1033                 /* Pipeline stage 1 */
1034                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1035
1036                 /* Pipeline stage 2 */
1037                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1038                         bucket20, bucket21, pkts_mask_out, entries,
1039                         buckets_mask, buckets, keys, f);
1040         }
1041
1042         /*
1043          * Pipeline flush
1044          *
1045          */
1046         /* Pipeline feed */
1047         bucket20 = bucket10;
1048         bucket21 = bucket11;
1049         mbuf20 = mbuf10;
1050         mbuf21 = mbuf11;
1051         mbuf10 = mbuf00;
1052         mbuf11 = mbuf01;
1053         pkt20_index = pkt10_index;
1054         pkt21_index = pkt11_index;
1055         pkt10_index = pkt00_index;
1056         pkt11_index = pkt01_index;
1057
1058         /* Pipeline stage 1 */
1059         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1060
1061         /* Pipeline stage 2 */
1062         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1063                 bucket20, bucket21, pkts_mask_out, entries,
1064                 buckets_mask, buckets, keys, f);
1065
1066         /* Pipeline feed */
1067         bucket20 = bucket10;
1068         bucket21 = bucket11;
1069         mbuf20 = mbuf10;
1070         mbuf21 = mbuf11;
1071         pkt20_index = pkt10_index;
1072         pkt21_index = pkt11_index;
1073
1074         /* Pipeline stage 2 */
1075         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1076                 bucket20, bucket21, pkts_mask_out, entries,
1077                 buckets_mask, buckets, keys, f);
1078
1079 grind_next_buckets:
1080         /* Grind next buckets */
1081         for ( ; buckets_mask; ) {
1082                 uint64_t buckets_mask_next = 0;
1083
1084                 for ( ; buckets_mask; ) {
1085                         uint64_t pkt_mask;
1086                         uint32_t pkt_index;
1087
1088                         pkt_index = __builtin_ctzll(buckets_mask);
1089                         pkt_mask = 1LLU << pkt_index;
1090                         buckets_mask &= ~pkt_mask;
1091
1092                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1093                                 entries, buckets_mask_next, f);
1094                 }
1095
1096                 buckets_mask = buckets_mask_next;
1097         }
1098
1099         *lookup_hit_mask = pkts_mask_out;
1100         RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
1101         return 0;
1102 } /* lookup EXT */
1103
1104 static int
1105 rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear)
1106 {
1107         struct rte_table_hash *t = table;
1108
1109         if (stats != NULL)
1110                 memcpy(stats, &t->stats, sizeof(t->stats));
1111
1112         if (clear)
1113                 memset(&t->stats, 0, sizeof(t->stats));
1114
1115         return 0;
1116 }
1117
1118 struct rte_table_ops rte_table_hash_key8_lru_ops = {
1119         .f_create = rte_table_hash_create_key8_lru,
1120         .f_free = rte_table_hash_free_key8_lru,
1121         .f_add = rte_table_hash_entry_add_key8_lru,
1122         .f_delete = rte_table_hash_entry_delete_key8_lru,
1123         .f_add_bulk = NULL,
1124         .f_delete_bulk = NULL,
1125         .f_lookup = rte_table_hash_lookup_key8_lru,
1126         .f_stats = rte_table_hash_key8_stats_read,
1127 };
1128
1129 struct rte_table_ops rte_table_hash_key8_ext_ops = {
1130         .f_create = rte_table_hash_create_key8_ext,
1131         .f_free = rte_table_hash_free_key8_ext,
1132         .f_add = rte_table_hash_entry_add_key8_ext,
1133         .f_delete = rte_table_hash_entry_delete_key8_ext,
1134         .f_add_bulk = NULL,
1135         .f_delete_bulk = NULL,
1136         .f_lookup = rte_table_hash_lookup_key8_ext,
1137         .f_stats = rte_table_hash_key8_stats_read,
1138 };