New upstream version 17.11-rc3
[deb_dpdk.git] / lib / librte_table / rte_table_hash_key8.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <string.h>
34 #include <stdio.h>
35
36 #include <rte_common.h>
37 #include <rte_mbuf.h>
38 #include <rte_memory.h>
39 #include <rte_malloc.h>
40 #include <rte_log.h>
41
42 #include "rte_table_hash.h"
43 #include "rte_lru.h"
44
45 #define KEY_SIZE                                                8
46
47 #define KEYS_PER_BUCKET                                 4
48
49 #ifdef RTE_TABLE_STATS_COLLECT
50
51 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val) \
52         table->stats.n_pkts_in += val
53 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val) \
54         table->stats.n_pkts_lookup_miss += val
55
56 #else
57
58 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val)
59 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val)
60
61 #endif
62
63 struct rte_bucket_4_8 {
64         /* Cache line 0 */
65         uint64_t signature;
66         uint64_t lru_list;
67         struct rte_bucket_4_8 *next;
68         uint64_t next_valid;
69
70         uint64_t key[4];
71
72         /* Cache line 1 */
73         uint8_t data[0];
74 };
75
76 struct rte_table_hash {
77         struct rte_table_stats stats;
78
79         /* Input parameters */
80         uint32_t n_buckets;
81         uint32_t key_size;
82         uint32_t entry_size;
83         uint32_t bucket_size;
84         uint32_t key_offset;
85         uint64_t key_mask;
86         rte_table_hash_op_hash f_hash;
87         uint64_t seed;
88
89         /* Extendible buckets */
90         uint32_t n_buckets_ext;
91         uint32_t stack_pos;
92         uint32_t *stack;
93
94         /* Lookup table */
95         uint8_t memory[0] __rte_cache_aligned;
96 };
97
98 static int
99 keycmp(void *a, void *b, void *b_mask)
100 {
101         uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
102
103         return a64[0] != (b64[0] & b_mask64[0]);
104 }
105
106 static void
107 keycpy(void *dst, void *src, void *src_mask)
108 {
109         uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
110
111         dst64[0] = src64[0] & src_mask64[0];
112 }
113
114 static int
115 check_params_create(struct rte_table_hash_params *params)
116 {
117         /* name */
118         if (params->name == NULL) {
119                 RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
120                 return -EINVAL;
121         }
122
123         /* key_size */
124         if (params->key_size != KEY_SIZE) {
125                 RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
126                 return -EINVAL;
127         }
128
129         /* n_keys */
130         if (params->n_keys == 0) {
131                 RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
132                 return -EINVAL;
133         }
134
135         /* n_buckets */
136         if ((params->n_buckets == 0) ||
137                 (!rte_is_power_of_2(params->n_buckets))) {
138                 RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
139                 return -EINVAL;
140         }
141
142         /* f_hash */
143         if (params->f_hash == NULL) {
144                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
145                         __func__);
146                 return -EINVAL;
147         }
148
149         return 0;
150 }
151
152 static void *
153 rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
154 {
155         struct rte_table_hash_params *p = params;
156         struct rte_table_hash *f;
157         uint64_t bucket_size, total_size;
158         uint32_t n_buckets, i;
159
160         /* Check input parameters */
161         if ((check_params_create(p) != 0) ||
162                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
163                 ((sizeof(struct rte_bucket_4_8) % 64) != 0))
164                 return NULL;
165
166         /*
167          * Table dimensioning
168          *
169          * Objective: Pick the number of buckets (n_buckets) so that there a chance
170          * to store n_keys keys in the table.
171          *
172          * Note: Since the buckets do not get extended, it is not possible to
173          * guarantee that n_keys keys can be stored in the table at any time. In the
174          * worst case scenario when all the n_keys fall into the same bucket, only
175          * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
176          * defeats the purpose of the hash table. It indicates unsuitable f_hash or
177          * n_keys to n_buckets ratio.
178          *
179          * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
180          */
181         n_buckets = rte_align32pow2(
182                 (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
183         n_buckets = RTE_MAX(n_buckets, p->n_buckets);
184
185         /* Memory allocation */
186         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
187                 KEYS_PER_BUCKET * entry_size);
188         total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
189
190         if (total_size > SIZE_MAX) {
191                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
192                         " for hash table %s\n",
193                         __func__, total_size, p->name);
194                 return NULL;
195         }
196
197         f = rte_zmalloc_socket(p->name,
198                 (size_t)total_size,
199                 RTE_CACHE_LINE_SIZE,
200                 socket_id);
201         if (f == NULL) {
202                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
203                         " for hash table %s\n",
204                         __func__, total_size, p->name);
205                 return NULL;
206         }
207
208         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
209                 "is %" PRIu64 " bytes\n",
210                 __func__, p->name, total_size);
211
212         /* Memory initialization */
213         f->n_buckets = n_buckets;
214         f->key_size = KEY_SIZE;
215         f->entry_size = entry_size;
216         f->bucket_size = bucket_size;
217         f->key_offset = p->key_offset;
218         f->f_hash = p->f_hash;
219         f->seed = p->seed;
220
221         if (p->key_mask != NULL)
222                 f->key_mask = ((uint64_t *)p->key_mask)[0];
223         else
224                 f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
225
226         for (i = 0; i < n_buckets; i++) {
227                 struct rte_bucket_4_8 *bucket;
228
229                 bucket = (struct rte_bucket_4_8 *) &f->memory[i *
230                         f->bucket_size];
231                 bucket->lru_list = 0x0000000100020003LLU;
232         }
233
234         return f;
235 }
236
237 static int
238 rte_table_hash_free_key8_lru(void *table)
239 {
240         struct rte_table_hash *f = table;
241
242         /* Check input parameters */
243         if (f == NULL) {
244                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
245                 return -EINVAL;
246         }
247
248         rte_free(f);
249         return 0;
250 }
251
252 static int
253 rte_table_hash_entry_add_key8_lru(
254         void *table,
255         void *key,
256         void *entry,
257         int *key_found,
258         void **entry_ptr)
259 {
260         struct rte_table_hash *f = table;
261         struct rte_bucket_4_8 *bucket;
262         uint64_t signature, mask, pos;
263         uint32_t bucket_index, i;
264
265         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
266         bucket_index = signature & (f->n_buckets - 1);
267         bucket = (struct rte_bucket_4_8 *)
268                 &f->memory[bucket_index * f->bucket_size];
269
270         /* Key is present in the bucket */
271         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
272                 uint64_t bucket_signature = bucket->signature;
273                 uint64_t *bucket_key = &bucket->key[i];
274
275                 if ((bucket_signature & mask) &&
276                         (keycmp(bucket_key, key, &f->key_mask) == 0)) {
277                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
278
279                         memcpy(bucket_data, entry, f->entry_size);
280                         lru_update(bucket, i);
281                         *key_found = 1;
282                         *entry_ptr = (void *) bucket_data;
283                         return 0;
284                 }
285         }
286
287         /* Key is not present in the bucket */
288         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
289                 uint64_t bucket_signature = bucket->signature;
290
291                 if ((bucket_signature & mask) == 0) {
292                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
293
294                         bucket->signature |= mask;
295                         keycpy(&bucket->key[i], key, &f->key_mask);
296                         memcpy(bucket_data, entry, f->entry_size);
297                         lru_update(bucket, i);
298                         *key_found = 0;
299                         *entry_ptr = (void *) bucket_data;
300
301                         return 0;
302                 }
303         }
304
305         /* Bucket full: replace LRU entry */
306         pos = lru_pos(bucket);
307         keycpy(&bucket->key[pos], key, &f->key_mask);
308         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
309         lru_update(bucket, pos);
310         *key_found = 0;
311         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
312
313         return 0;
314 }
315
316 static int
317 rte_table_hash_entry_delete_key8_lru(
318         void *table,
319         void *key,
320         int *key_found,
321         void *entry)
322 {
323         struct rte_table_hash *f = table;
324         struct rte_bucket_4_8 *bucket;
325         uint64_t signature, mask;
326         uint32_t bucket_index, i;
327
328         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
329         bucket_index = signature & (f->n_buckets - 1);
330         bucket = (struct rte_bucket_4_8 *)
331                 &f->memory[bucket_index * f->bucket_size];
332
333         /* Key is present in the bucket */
334         for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
335                 uint64_t bucket_signature = bucket->signature;
336                 uint64_t *bucket_key = &bucket->key[i];
337
338                 if ((bucket_signature & mask) &&
339                         (keycmp(bucket_key, key, &f->key_mask) == 0)) {
340                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
341
342                         bucket->signature &= ~mask;
343                         *key_found = 1;
344                         if (entry)
345                                 memcpy(entry, bucket_data, f->entry_size);
346
347                         return 0;
348                 }
349         }
350
351         /* Key is not present in the bucket */
352         *key_found = 0;
353         return 0;
354 }
355
356 static void *
357 rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
358 {
359         struct rte_table_hash_params *p = params;
360         struct rte_table_hash *f;
361         uint64_t bucket_size, stack_size, total_size;
362         uint32_t n_buckets_ext, i;
363
364         /* Check input parameters */
365         if ((check_params_create(p) != 0) ||
366                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
367                 ((sizeof(struct rte_bucket_4_8) % 64) != 0))
368                 return NULL;
369
370         /*
371          * Table dimensioning
372          *
373          * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
374          * it is guaranteed that n_keys keys can be stored in the table at any time.
375          *
376          * The worst case scenario takes place when all the n_keys keys fall into
377          * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
378          * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
379          * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
380          * into a different bucket. This case defeats the purpose of the hash table.
381          * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
382          *
383          * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
384          */
385         n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
386
387         /* Memory allocation */
388         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
389                 KEYS_PER_BUCKET * entry_size);
390         stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
391         total_size = sizeof(struct rte_table_hash) +
392                 (p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
393
394         if (total_size > SIZE_MAX) {
395                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
396                         "for hash table %s\n",
397                         __func__, total_size, p->name);
398                 return NULL;
399         }
400
401         f = rte_zmalloc_socket(p->name,
402                 (size_t)total_size,
403                 RTE_CACHE_LINE_SIZE,
404                 socket_id);
405         if (f == NULL) {
406                 RTE_LOG(ERR, TABLE,
407                         "%s: Cannot allocate %" PRIu64 " bytes "
408                         "for hash table %s\n",
409                         __func__, total_size, p->name);
410                 return NULL;
411         }
412         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
413                 "is %" PRIu64 " bytes\n",
414                 __func__, p->name, total_size);
415
416         /* Memory initialization */
417         f->n_buckets = p->n_buckets;
418         f->key_size = KEY_SIZE;
419         f->entry_size = entry_size;
420         f->bucket_size = bucket_size;
421         f->key_offset = p->key_offset;
422         f->f_hash = p->f_hash;
423         f->seed = p->seed;
424
425         f->n_buckets_ext = n_buckets_ext;
426         f->stack_pos = n_buckets_ext;
427         f->stack = (uint32_t *)
428                 &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
429
430         if (p->key_mask != NULL)
431                 f->key_mask = ((uint64_t *)p->key_mask)[0];
432         else
433                 f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
434
435         for (i = 0; i < n_buckets_ext; i++)
436                 f->stack[i] = i;
437
438         return f;
439 }
440
441 static int
442 rte_table_hash_free_key8_ext(void *table)
443 {
444         struct rte_table_hash *f = table;
445
446         /* Check input parameters */
447         if (f == NULL) {
448                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
449                 return -EINVAL;
450         }
451
452         rte_free(f);
453         return 0;
454 }
455
456 static int
457 rte_table_hash_entry_add_key8_ext(
458         void *table,
459         void *key,
460         void *entry,
461         int *key_found,
462         void **entry_ptr)
463 {
464         struct rte_table_hash *f = table;
465         struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
466         uint64_t signature;
467         uint32_t bucket_index, i;
468
469         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
470         bucket_index = signature & (f->n_buckets - 1);
471         bucket0 = (struct rte_bucket_4_8 *)
472                 &f->memory[bucket_index * f->bucket_size];
473
474         /* Key is present in the bucket */
475         for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
476                 uint64_t mask;
477
478                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
479                         uint64_t bucket_signature = bucket->signature;
480                         uint64_t *bucket_key = &bucket->key[i];
481
482                         if ((bucket_signature & mask) &&
483                                 (keycmp(bucket_key, key, &f->key_mask) == 0)) {
484                                 uint8_t *bucket_data = &bucket->data[i *
485                                         f->entry_size];
486
487                                 memcpy(bucket_data, entry, f->entry_size);
488                                 *key_found = 1;
489                                 *entry_ptr = (void *) bucket_data;
490                                 return 0;
491                         }
492                 }
493         }
494
495         /* Key is not present in the bucket */
496         for (bucket_prev = NULL, bucket = bucket0;
497                 bucket != NULL; bucket_prev = bucket, bucket = bucket->next) {
498                 uint64_t mask;
499
500                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
501                         uint64_t bucket_signature = bucket->signature;
502
503                         if ((bucket_signature & mask) == 0) {
504                                 uint8_t *bucket_data = &bucket->data[i *
505                                         f->entry_size];
506
507                                 bucket->signature |= mask;
508                                 keycpy(&bucket->key[i], key, &f->key_mask);
509                                 memcpy(bucket_data, entry, f->entry_size);
510                                 *key_found = 0;
511                                 *entry_ptr = (void *) bucket_data;
512
513                                 return 0;
514                         }
515                 }
516         }
517
518         /* Bucket full: extend bucket */
519         if (f->stack_pos > 0) {
520                 bucket_index = f->stack[--f->stack_pos];
521
522                 bucket = (struct rte_bucket_4_8 *) &f->memory[(f->n_buckets +
523                         bucket_index) * f->bucket_size];
524                 bucket_prev->next = bucket;
525                 bucket_prev->next_valid = 1;
526
527                 bucket->signature = 1;
528                 keycpy(&bucket->key[0], key, &f->key_mask);
529                 memcpy(&bucket->data[0], entry, f->entry_size);
530                 *key_found = 0;
531                 *entry_ptr = (void *) &bucket->data[0];
532                 return 0;
533         }
534
535         return -ENOSPC;
536 }
537
538 static int
539 rte_table_hash_entry_delete_key8_ext(
540         void *table,
541         void *key,
542         int *key_found,
543         void *entry)
544 {
545         struct rte_table_hash *f = table;
546         struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
547         uint64_t signature;
548         uint32_t bucket_index, i;
549
550         signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
551         bucket_index = signature & (f->n_buckets - 1);
552         bucket0 = (struct rte_bucket_4_8 *)
553                 &f->memory[bucket_index * f->bucket_size];
554
555         /* Key is present in the bucket */
556         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
557                 bucket_prev = bucket, bucket = bucket->next) {
558                 uint64_t mask;
559
560                 for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
561                         uint64_t bucket_signature = bucket->signature;
562                         uint64_t *bucket_key = &bucket->key[i];
563
564                         if ((bucket_signature & mask) &&
565                                 (keycmp(bucket_key, key, &f->key_mask) == 0)) {
566                                 uint8_t *bucket_data = &bucket->data[i *
567                                         f->entry_size];
568
569                                 bucket->signature &= ~mask;
570                                 *key_found = 1;
571                                 if (entry)
572                                         memcpy(entry, bucket_data,
573                                                 f->entry_size);
574
575                                 if ((bucket->signature == 0) &&
576                                     (bucket_prev != NULL)) {
577                                         bucket_prev->next = bucket->next;
578                                         bucket_prev->next_valid =
579                                                 bucket->next_valid;
580
581                                         memset(bucket, 0,
582                                                 sizeof(struct rte_bucket_4_8));
583                                         bucket_index = (((uint8_t *)bucket -
584                                                 (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
585                                         f->stack[f->stack_pos++] = bucket_index;
586                                 }
587
588                                 return 0;
589                         }
590                 }
591         }
592
593         /* Key is not present in the bucket */
594         *key_found = 0;
595         return 0;
596 }
597
598 #define lookup_key8_cmp(key_in, bucket, pos, f)                 \
599 {                                                               \
600         uint64_t xor[4], signature, k;                          \
601                                                                 \
602         signature = ~bucket->signature;                         \
603                                                                 \
604         k = key_in[0] & f->key_mask;                            \
605         xor[0] = (k ^ bucket->key[0]) | (signature & 1);                \
606         xor[1] = (k ^ bucket->key[1]) | (signature & 2);                \
607         xor[2] = (k ^ bucket->key[2]) | (signature & 4);                \
608         xor[3] = (k ^ bucket->key[3]) | (signature & 8);                \
609                                                                 \
610         pos = 4;                                                \
611         if (xor[0] == 0)                                        \
612                 pos = 0;                                        \
613         if (xor[1] == 0)                                        \
614                 pos = 1;                                        \
615         if (xor[2] == 0)                                        \
616                 pos = 2;                                        \
617         if (xor[3] == 0)                                        \
618                 pos = 3;                                        \
619 }
620
621 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)   \
622 {                                                               \
623         uint64_t pkt_mask;                                      \
624         uint32_t key_offset = f->key_offset;\
625                                                                 \
626         pkt0_index = __builtin_ctzll(pkts_mask);                \
627         pkt_mask = 1LLU << pkt0_index;                          \
628         pkts_mask &= ~pkt_mask;                                 \
629                                                                 \
630         mbuf0 = pkts[pkt0_index];                               \
631         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));  \
632 }
633
634 #define lookup1_stage1(mbuf1, bucket1, f)                       \
635 {                                                               \
636         uint64_t *key;                                          \
637         uint64_t signature;                                     \
638         uint32_t bucket_index;                                  \
639                                                                 \
640         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
641         signature = f->f_hash(key, &f->key_mask, KEY_SIZE, f->seed);    \
642         bucket_index = signature & (f->n_buckets - 1);          \
643         bucket1 = (struct rte_bucket_4_8 *)                     \
644                 &f->memory[bucket_index * f->bucket_size];      \
645         rte_prefetch0(bucket1);                                 \
646 }
647
648 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
649         pkts_mask_out, entries, f)                              \
650 {                                                               \
651         void *a;                                                \
652         uint64_t pkt_mask;                                      \
653         uint64_t *key;                                          \
654         uint32_t pos;                                           \
655                                                                 \
656         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
657         lookup_key8_cmp(key, bucket2, pos, f);  \
658                                                                 \
659         pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
660         pkts_mask_out |= pkt_mask;                              \
661                                                                 \
662         a = (void *) &bucket2->data[pos * f->entry_size];       \
663         rte_prefetch0(a);                                       \
664         entries[pkt2_index] = a;                                \
665         lru_update(bucket2, pos);                               \
666 }
667
668 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
669         entries, buckets_mask, buckets, keys, f)                \
670 {                                                               \
671         struct rte_bucket_4_8 *bucket_next;                     \
672         void *a;                                                \
673         uint64_t pkt_mask, bucket_mask;                         \
674         uint64_t *key;                                          \
675         uint32_t pos;                                           \
676                                                                 \
677         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
678         lookup_key8_cmp(key, bucket2, pos, f);  \
679                                                                 \
680         pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
681         pkts_mask_out |= pkt_mask;                              \
682                                                                 \
683         a = (void *) &bucket2->data[pos * f->entry_size];       \
684         rte_prefetch0(a);                                       \
685         entries[pkt2_index] = a;                                \
686                                                                 \
687         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
688         buckets_mask |= bucket_mask;                            \
689         bucket_next = bucket2->next;                            \
690         buckets[pkt2_index] = bucket_next;                      \
691         keys[pkt2_index] = key;                                 \
692 }
693
694 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
695         buckets_mask, f)                                        \
696 {                                                               \
697         struct rte_bucket_4_8 *bucket, *bucket_next;            \
698         void *a;                                                \
699         uint64_t pkt_mask, bucket_mask;                         \
700         uint64_t *key;                                          \
701         uint32_t pos;                                           \
702                                                                 \
703         bucket = buckets[pkt_index];                            \
704         key = keys[pkt_index];                                  \
705         lookup_key8_cmp(key, bucket, pos, f);                   \
706                                                                 \
707         pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\
708         pkts_mask_out |= pkt_mask;                              \
709                                                                 \
710         a = (void *) &bucket->data[pos * f->entry_size];        \
711         rte_prefetch0(a);                                       \
712         entries[pkt_index] = a;                                 \
713                                                                 \
714         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
715         buckets_mask |= bucket_mask;                            \
716         bucket_next = bucket->next;                             \
717         rte_prefetch0(bucket_next);                             \
718         buckets[pkt_index] = bucket_next;                       \
719         keys[pkt_index] = key;                                  \
720 }
721
722 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
723         pkts, pkts_mask, f)                                     \
724 {                                                               \
725         uint64_t pkt00_mask, pkt01_mask;                        \
726         uint32_t key_offset = f->key_offset;            \
727                                                                 \
728         pkt00_index = __builtin_ctzll(pkts_mask);               \
729         pkt00_mask = 1LLU << pkt00_index;                       \
730         pkts_mask &= ~pkt00_mask;                               \
731                                                                 \
732         mbuf00 = pkts[pkt00_index];                             \
733         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
734                                                                 \
735         pkt01_index = __builtin_ctzll(pkts_mask);               \
736         pkt01_mask = 1LLU << pkt01_index;                       \
737         pkts_mask &= ~pkt01_mask;                               \
738                                                                 \
739         mbuf01 = pkts[pkt01_index];                             \
740         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
741 }
742
743 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
744         mbuf00, mbuf01, pkts, pkts_mask, f)                     \
745 {                                                               \
746         uint64_t pkt00_mask, pkt01_mask;                        \
747         uint32_t key_offset = f->key_offset;            \
748                                                                 \
749         pkt00_index = __builtin_ctzll(pkts_mask);               \
750         pkt00_mask = 1LLU << pkt00_index;                       \
751         pkts_mask &= ~pkt00_mask;                               \
752                                                                 \
753         mbuf00 = pkts[pkt00_index];                             \
754         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
755                                                                 \
756         pkt01_index = __builtin_ctzll(pkts_mask);               \
757         if (pkts_mask == 0)                                     \
758                 pkt01_index = pkt00_index;                      \
759                                                                 \
760         pkt01_mask = 1LLU << pkt01_index;                       \
761         pkts_mask &= ~pkt01_mask;                               \
762                                                                 \
763         mbuf01 = pkts[pkt01_index];                             \
764         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
765 }
766
767 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)\
768 {                                                               \
769         uint64_t *key10, *key11;                                \
770         uint64_t signature10, signature11;                      \
771         uint32_t bucket10_index, bucket11_index;                \
772         rte_table_hash_op_hash f_hash = f->f_hash;              \
773         uint64_t seed = f->seed;                                \
774         uint32_t key_offset = f->key_offset;                    \
775                                                                 \
776         key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\
777         key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\
778                                                                 \
779         signature10 = f_hash(key10, &f->key_mask, KEY_SIZE, seed);      \
780         bucket10_index = signature10 & (f->n_buckets - 1);      \
781         bucket10 = (struct rte_bucket_4_8 *)                    \
782                 &f->memory[bucket10_index * f->bucket_size];    \
783         rte_prefetch0(bucket10);                                \
784                                                                 \
785         signature11 = f_hash(key11, &f->key_mask, KEY_SIZE, seed);      \
786         bucket11_index = signature11 & (f->n_buckets - 1);      \
787         bucket11 = (struct rte_bucket_4_8 *)                    \
788                 &f->memory[bucket11_index * f->bucket_size];    \
789         rte_prefetch0(bucket11);                                \
790 }
791
792 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
793         bucket20, bucket21, pkts_mask_out, entries, f)          \
794 {                                                               \
795         void *a20, *a21;                                        \
796         uint64_t pkt20_mask, pkt21_mask;                        \
797         uint64_t *key20, *key21;                                \
798         uint32_t pos20, pos21;                                  \
799                                                                 \
800         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
801         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
802                                                                 \
803         lookup_key8_cmp(key20, bucket20, pos20, f);                     \
804         lookup_key8_cmp(key21, bucket21, pos21, f);                     \
805                                                                 \
806         pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
807         pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
808         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
809                                                                 \
810         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
811         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
812         rte_prefetch0(a20);                                     \
813         rte_prefetch0(a21);                                     \
814         entries[pkt20_index] = a20;                             \
815         entries[pkt21_index] = a21;                             \
816         lru_update(bucket20, pos20);                            \
817         lru_update(bucket21, pos21);                            \
818 }
819
820 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
821         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
822 {                                                               \
823         struct rte_bucket_4_8 *bucket20_next, *bucket21_next;   \
824         void *a20, *a21;                                        \
825         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
826         uint64_t *key20, *key21;                                \
827         uint32_t pos20, pos21;                                  \
828                                                                 \
829         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
830         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
831                                                                 \
832         lookup_key8_cmp(key20, bucket20, pos20, f);                     \
833         lookup_key8_cmp(key21, bucket21, pos21, f);                     \
834                                                                 \
835         pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
836         pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
837         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
838                                                                 \
839         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
840         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
841         rte_prefetch0(a20);                                     \
842         rte_prefetch0(a21);                                     \
843         entries[pkt20_index] = a20;                             \
844         entries[pkt21_index] = a21;                             \
845                                                                 \
846         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
847         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
848         buckets_mask |= bucket20_mask | bucket21_mask;          \
849         bucket20_next = bucket20->next;                         \
850         bucket21_next = bucket21->next;                         \
851         buckets[pkt20_index] = bucket20_next;                   \
852         buckets[pkt21_index] = bucket21_next;                   \
853         keys[pkt20_index] = key20;                              \
854         keys[pkt21_index] = key21;                              \
855 }
856
857 static int
858 rte_table_hash_lookup_key8_lru(
859         void *table,
860         struct rte_mbuf **pkts,
861         uint64_t pkts_mask,
862         uint64_t *lookup_hit_mask,
863         void **entries)
864 {
865         struct rte_table_hash *f = (struct rte_table_hash *) table;
866         struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
867         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
868         uint32_t pkt00_index, pkt01_index, pkt10_index;
869         uint32_t pkt11_index, pkt20_index, pkt21_index;
870         uint64_t pkts_mask_out = 0;
871
872         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
873         RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
874
875         /* Cannot run the pipeline with less than 5 packets */
876         if (__builtin_popcountll(pkts_mask) < 5) {
877                 for ( ; pkts_mask; ) {
878                         struct rte_bucket_4_8 *bucket;
879                         struct rte_mbuf *mbuf;
880                         uint32_t pkt_index;
881
882                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
883                         lookup1_stage1(mbuf, bucket, f);
884                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
885                                 pkts_mask_out, entries, f);
886                 }
887
888                 *lookup_hit_mask = pkts_mask_out;
889                 RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
890                 return 0;
891         }
892
893         /*
894          * Pipeline fill
895          *
896          */
897         /* Pipeline stage 0 */
898         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
899                 pkts_mask, f);
900
901         /* Pipeline feed */
902         mbuf10 = mbuf00;
903         mbuf11 = mbuf01;
904         pkt10_index = pkt00_index;
905         pkt11_index = pkt01_index;
906
907         /* Pipeline stage 0 */
908         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
909                 pkts_mask, f);
910
911         /* Pipeline stage 1 */
912         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
913
914         /*
915          * Pipeline run
916          *
917          */
918         for ( ; pkts_mask; ) {
919                 /* Pipeline feed */
920                 bucket20 = bucket10;
921                 bucket21 = bucket11;
922                 mbuf20 = mbuf10;
923                 mbuf21 = mbuf11;
924                 mbuf10 = mbuf00;
925                 mbuf11 = mbuf01;
926                 pkt20_index = pkt10_index;
927                 pkt21_index = pkt11_index;
928                 pkt10_index = pkt00_index;
929                 pkt11_index = pkt01_index;
930
931                 /* Pipeline stage 0 */
932                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
933                         mbuf00, mbuf01, pkts, pkts_mask, f);
934
935                 /* Pipeline stage 1 */
936                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
937
938                 /* Pipeline stage 2 */
939                 lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
940                         bucket20, bucket21, pkts_mask_out, entries, f);
941         }
942
943         /*
944          * Pipeline flush
945          *
946          */
947         /* Pipeline feed */
948         bucket20 = bucket10;
949         bucket21 = bucket11;
950         mbuf20 = mbuf10;
951         mbuf21 = mbuf11;
952         mbuf10 = mbuf00;
953         mbuf11 = mbuf01;
954         pkt20_index = pkt10_index;
955         pkt21_index = pkt11_index;
956         pkt10_index = pkt00_index;
957         pkt11_index = pkt01_index;
958
959         /* Pipeline stage 1 */
960         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
961
962         /* Pipeline stage 2 */
963         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
964                 bucket20, bucket21, pkts_mask_out, entries, f);
965
966         /* Pipeline feed */
967         bucket20 = bucket10;
968         bucket21 = bucket11;
969         mbuf20 = mbuf10;
970         mbuf21 = mbuf11;
971         pkt20_index = pkt10_index;
972         pkt21_index = pkt11_index;
973
974         /* Pipeline stage 2 */
975         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
976                 bucket20, bucket21, pkts_mask_out, entries, f);
977
978         *lookup_hit_mask = pkts_mask_out;
979         RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
980         return 0;
981 } /* lookup LRU */
982
983 static int
984 rte_table_hash_lookup_key8_ext(
985         void *table,
986         struct rte_mbuf **pkts,
987         uint64_t pkts_mask,
988         uint64_t *lookup_hit_mask,
989         void **entries)
990 {
991         struct rte_table_hash *f = (struct rte_table_hash *) table;
992         struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
993         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
994         uint32_t pkt00_index, pkt01_index, pkt10_index;
995         uint32_t pkt11_index, pkt20_index, pkt21_index;
996         uint64_t pkts_mask_out = 0, buckets_mask = 0;
997         struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
998         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
999
1000         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
1001         RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
1002
1003         /* Cannot run the pipeline with less than 5 packets */
1004         if (__builtin_popcountll(pkts_mask) < 5) {
1005                 for ( ; pkts_mask; ) {
1006                         struct rte_bucket_4_8 *bucket;
1007                         struct rte_mbuf *mbuf;
1008                         uint32_t pkt_index;
1009
1010                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
1011                         lookup1_stage1(mbuf, bucket, f);
1012                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
1013                                 pkts_mask_out, entries, buckets_mask,
1014                                 buckets, keys, f);
1015                 }
1016
1017                 goto grind_next_buckets;
1018         }
1019
1020         /*
1021          * Pipeline fill
1022          *
1023          */
1024         /* Pipeline stage 0 */
1025         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1026                 pkts_mask, f);
1027
1028         /* Pipeline feed */
1029         mbuf10 = mbuf00;
1030         mbuf11 = mbuf01;
1031         pkt10_index = pkt00_index;
1032         pkt11_index = pkt01_index;
1033
1034         /* Pipeline stage 0 */
1035         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1036                 pkts_mask, f);
1037
1038         /* Pipeline stage 1 */
1039         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1040
1041         /*
1042          * Pipeline run
1043          *
1044          */
1045         for ( ; pkts_mask; ) {
1046                 /* Pipeline feed */
1047                 bucket20 = bucket10;
1048                 bucket21 = bucket11;
1049                 mbuf20 = mbuf10;
1050                 mbuf21 = mbuf11;
1051                 mbuf10 = mbuf00;
1052                 mbuf11 = mbuf01;
1053                 pkt20_index = pkt10_index;
1054                 pkt21_index = pkt11_index;
1055                 pkt10_index = pkt00_index;
1056                 pkt11_index = pkt01_index;
1057
1058                 /* Pipeline stage 0 */
1059                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1060                         mbuf00, mbuf01, pkts, pkts_mask, f);
1061
1062                 /* Pipeline stage 1 */
1063                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1064
1065                 /* Pipeline stage 2 */
1066                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1067                         bucket20, bucket21, pkts_mask_out, entries,
1068                         buckets_mask, buckets, keys, f);
1069         }
1070
1071         /*
1072          * Pipeline flush
1073          *
1074          */
1075         /* Pipeline feed */
1076         bucket20 = bucket10;
1077         bucket21 = bucket11;
1078         mbuf20 = mbuf10;
1079         mbuf21 = mbuf11;
1080         mbuf10 = mbuf00;
1081         mbuf11 = mbuf01;
1082         pkt20_index = pkt10_index;
1083         pkt21_index = pkt11_index;
1084         pkt10_index = pkt00_index;
1085         pkt11_index = pkt01_index;
1086
1087         /* Pipeline stage 1 */
1088         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1089
1090         /* Pipeline stage 2 */
1091         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1092                 bucket20, bucket21, pkts_mask_out, entries,
1093                 buckets_mask, buckets, keys, f);
1094
1095         /* Pipeline feed */
1096         bucket20 = bucket10;
1097         bucket21 = bucket11;
1098         mbuf20 = mbuf10;
1099         mbuf21 = mbuf11;
1100         pkt20_index = pkt10_index;
1101         pkt21_index = pkt11_index;
1102
1103         /* Pipeline stage 2 */
1104         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1105                 bucket20, bucket21, pkts_mask_out, entries,
1106                 buckets_mask, buckets, keys, f);
1107
1108 grind_next_buckets:
1109         /* Grind next buckets */
1110         for ( ; buckets_mask; ) {
1111                 uint64_t buckets_mask_next = 0;
1112
1113                 for ( ; buckets_mask; ) {
1114                         uint64_t pkt_mask;
1115                         uint32_t pkt_index;
1116
1117                         pkt_index = __builtin_ctzll(buckets_mask);
1118                         pkt_mask = 1LLU << pkt_index;
1119                         buckets_mask &= ~pkt_mask;
1120
1121                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1122                                 entries, buckets_mask_next, f);
1123                 }
1124
1125                 buckets_mask = buckets_mask_next;
1126         }
1127
1128         *lookup_hit_mask = pkts_mask_out;
1129         RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
1130         return 0;
1131 } /* lookup EXT */
1132
1133 static int
1134 rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear)
1135 {
1136         struct rte_table_hash *t = table;
1137
1138         if (stats != NULL)
1139                 memcpy(stats, &t->stats, sizeof(t->stats));
1140
1141         if (clear)
1142                 memset(&t->stats, 0, sizeof(t->stats));
1143
1144         return 0;
1145 }
1146
1147 struct rte_table_ops rte_table_hash_key8_lru_ops = {
1148         .f_create = rte_table_hash_create_key8_lru,
1149         .f_free = rte_table_hash_free_key8_lru,
1150         .f_add = rte_table_hash_entry_add_key8_lru,
1151         .f_delete = rte_table_hash_entry_delete_key8_lru,
1152         .f_add_bulk = NULL,
1153         .f_delete_bulk = NULL,
1154         .f_lookup = rte_table_hash_lookup_key8_lru,
1155         .f_stats = rte_table_hash_key8_stats_read,
1156 };
1157
1158 struct rte_table_ops rte_table_hash_key8_ext_ops = {
1159         .f_create = rte_table_hash_create_key8_ext,
1160         .f_free = rte_table_hash_free_key8_ext,
1161         .f_add = rte_table_hash_entry_add_key8_ext,
1162         .f_delete = rte_table_hash_entry_delete_key8_ext,
1163         .f_add_bulk = NULL,
1164         .f_delete_bulk = NULL,
1165         .f_lookup = rte_table_hash_lookup_key8_ext,
1166         .f_stats = rte_table_hash_key8_stats_read,
1167 };