New upstream version 18.02
[deb_dpdk.git] / lib / librte_table / rte_table_hash_key16.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <string.h>
5 #include <stdio.h>
6
7 #include <rte_common.h>
8 #include <rte_mbuf.h>
9 #include <rte_memory.h>
10 #include <rte_malloc.h>
11 #include <rte_log.h>
12
13 #include "rte_table_hash.h"
14 #include "rte_lru.h"
15
16 #define KEY_SIZE                                                16
17
18 #define KEYS_PER_BUCKET                                 4
19
20 #define RTE_BUCKET_ENTRY_VALID                                          0x1LLU
21
22 #ifdef RTE_TABLE_STATS_COLLECT
23
24 #define RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(table, val) \
25         table->stats.n_pkts_in += val
26 #define RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(table, val) \
27         table->stats.n_pkts_lookup_miss += val
28
29 #else
30
31 #define RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(table, val)
32 #define RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(table, val)
33
34 #endif
35
36 struct rte_bucket_4_16 {
37         /* Cache line 0 */
38         uint64_t signature[4 + 1];
39         uint64_t lru_list;
40         struct rte_bucket_4_16 *next;
41         uint64_t next_valid;
42
43         /* Cache line 1 */
44         uint64_t key[4][2];
45
46         /* Cache line 2 */
47         uint8_t data[0];
48 };
49
50 struct rte_table_hash {
51         struct rte_table_stats stats;
52
53         /* Input parameters */
54         uint32_t n_buckets;
55         uint32_t key_size;
56         uint32_t entry_size;
57         uint32_t bucket_size;
58         uint32_t key_offset;
59         uint64_t key_mask[2];
60         rte_table_hash_op_hash f_hash;
61         uint64_t seed;
62
63         /* Extendible buckets */
64         uint32_t n_buckets_ext;
65         uint32_t stack_pos;
66         uint32_t *stack;
67
68         /* Lookup table */
69         uint8_t memory[0] __rte_cache_aligned;
70 };
71
72 static int
73 keycmp(void *a, void *b, void *b_mask)
74 {
75         uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
76
77         return (a64[0] != (b64[0] & b_mask64[0])) ||
78                 (a64[1] != (b64[1] & b_mask64[1]));
79 }
80
81 static void
82 keycpy(void *dst, void *src, void *src_mask)
83 {
84         uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
85
86         dst64[0] = src64[0] & src_mask64[0];
87         dst64[1] = src64[1] & src_mask64[1];
88 }
89
90 static int
91 check_params_create(struct rte_table_hash_params *params)
92 {
93         /* name */
94         if (params->name == NULL) {
95                 RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
96                 return -EINVAL;
97         }
98
99         /* key_size */
100         if (params->key_size != KEY_SIZE) {
101                 RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
102                 return -EINVAL;
103         }
104
105         /* n_keys */
106         if (params->n_keys == 0) {
107                 RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
108                 return -EINVAL;
109         }
110
111         /* n_buckets */
112         if ((params->n_buckets == 0) ||
113                 (!rte_is_power_of_2(params->n_buckets))) {
114                 RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
115                 return -EINVAL;
116         }
117
118         /* f_hash */
119         if (params->f_hash == NULL) {
120                 RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
121                         __func__);
122                 return -EINVAL;
123         }
124
125         return 0;
126 }
127
128 static void *
129 rte_table_hash_create_key16_lru(void *params,
130                 int socket_id,
131                 uint32_t entry_size)
132 {
133         struct rte_table_hash_params *p = params;
134         struct rte_table_hash *f;
135         uint64_t bucket_size, total_size;
136         uint32_t n_buckets, i;
137
138         /* Check input parameters */
139         if ((check_params_create(p) != 0) ||
140                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
141                 ((sizeof(struct rte_bucket_4_16) % 64) != 0))
142                 return NULL;
143
144         /*
145          * Table dimensioning
146          *
147          * Objective: Pick the number of buckets (n_buckets) so that there a chance
148          * to store n_keys keys in the table.
149          *
150          * Note: Since the buckets do not get extended, it is not possible to
151          * guarantee that n_keys keys can be stored in the table at any time. In the
152          * worst case scenario when all the n_keys fall into the same bucket, only
153          * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
154          * defeats the purpose of the hash table. It indicates unsuitable f_hash or
155          * n_keys to n_buckets ratio.
156          *
157          * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
158          */
159         n_buckets = rte_align32pow2(
160                 (p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
161         n_buckets = RTE_MAX(n_buckets, p->n_buckets);
162
163         /* Memory allocation */
164         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_16) +
165                 KEYS_PER_BUCKET * entry_size);
166         total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
167
168         if (total_size > SIZE_MAX) {
169                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
170                 "for hash table %s\n",
171                 __func__, total_size, p->name);
172                 return NULL;
173         }
174
175         f = rte_zmalloc_socket(p->name,
176                 (size_t)total_size,
177                 RTE_CACHE_LINE_SIZE,
178                 socket_id);
179         if (f == NULL) {
180                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
181                 "for hash table %s\n",
182                 __func__, total_size, p->name);
183                 return NULL;
184         }
185         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
186                 "is %" PRIu64 " bytes\n",
187                 __func__, p->name, total_size);
188
189         /* Memory initialization */
190         f->n_buckets = n_buckets;
191         f->key_size = KEY_SIZE;
192         f->entry_size = entry_size;
193         f->bucket_size = bucket_size;
194         f->key_offset = p->key_offset;
195         f->f_hash = p->f_hash;
196         f->seed = p->seed;
197
198         if (p->key_mask != NULL) {
199                 f->key_mask[0] = ((uint64_t *)p->key_mask)[0];
200                 f->key_mask[1] = ((uint64_t *)p->key_mask)[1];
201         } else {
202                 f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU;
203                 f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU;
204         }
205
206         for (i = 0; i < n_buckets; i++) {
207                 struct rte_bucket_4_16 *bucket;
208
209                 bucket = (struct rte_bucket_4_16 *) &f->memory[i *
210                         f->bucket_size];
211                 lru_init(bucket);
212         }
213
214         return f;
215 }
216
217 static int
218 rte_table_hash_free_key16_lru(void *table)
219 {
220         struct rte_table_hash *f = table;
221
222         /* Check input parameters */
223         if (f == NULL) {
224                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
225                 return -EINVAL;
226         }
227
228         rte_free(f);
229         return 0;
230 }
231
232 static int
233 rte_table_hash_entry_add_key16_lru(
234         void *table,
235         void *key,
236         void *entry,
237         int *key_found,
238         void **entry_ptr)
239 {
240         struct rte_table_hash *f = table;
241         struct rte_bucket_4_16 *bucket;
242         uint64_t signature, pos;
243         uint32_t bucket_index, i;
244
245         signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
246         bucket_index = signature & (f->n_buckets - 1);
247         bucket = (struct rte_bucket_4_16 *)
248                 &f->memory[bucket_index * f->bucket_size];
249         signature |= RTE_BUCKET_ENTRY_VALID;
250
251         /* Key is present in the bucket */
252         for (i = 0; i < 4; i++) {
253                 uint64_t bucket_signature = bucket->signature[i];
254                 uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
255
256                 if ((bucket_signature == signature) &&
257                         (keycmp(bucket_key, key, f->key_mask) == 0)) {
258                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
259
260                         memcpy(bucket_data, entry, f->entry_size);
261                         lru_update(bucket, i);
262                         *key_found = 1;
263                         *entry_ptr = (void *) bucket_data;
264                         return 0;
265                 }
266         }
267
268         /* Key is not present in the bucket */
269         for (i = 0; i < 4; i++) {
270                 uint64_t bucket_signature = bucket->signature[i];
271                 uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
272
273                 if (bucket_signature == 0) {
274                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
275
276                         bucket->signature[i] = signature;
277                         keycpy(bucket_key, key, f->key_mask);
278                         memcpy(bucket_data, entry, f->entry_size);
279                         lru_update(bucket, i);
280                         *key_found = 0;
281                         *entry_ptr = (void *) bucket_data;
282
283                         return 0;
284                 }
285         }
286
287         /* Bucket full: replace LRU entry */
288         pos = lru_pos(bucket);
289         bucket->signature[pos] = signature;
290         keycpy(&bucket->key[pos], key, f->key_mask);
291         memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
292         lru_update(bucket, pos);
293         *key_found = 0;
294         *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
295
296         return 0;
297 }
298
299 static int
300 rte_table_hash_entry_delete_key16_lru(
301         void *table,
302         void *key,
303         int *key_found,
304         void *entry)
305 {
306         struct rte_table_hash *f = table;
307         struct rte_bucket_4_16 *bucket;
308         uint64_t signature;
309         uint32_t bucket_index, i;
310
311         signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
312         bucket_index = signature & (f->n_buckets - 1);
313         bucket = (struct rte_bucket_4_16 *)
314                 &f->memory[bucket_index * f->bucket_size];
315         signature |= RTE_BUCKET_ENTRY_VALID;
316
317         /* Key is present in the bucket */
318         for (i = 0; i < 4; i++) {
319                 uint64_t bucket_signature = bucket->signature[i];
320                 uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
321
322                 if ((bucket_signature == signature) &&
323                         (keycmp(bucket_key, key, f->key_mask) == 0)) {
324                         uint8_t *bucket_data = &bucket->data[i * f->entry_size];
325
326                         bucket->signature[i] = 0;
327                         *key_found = 1;
328                         if (entry)
329                                 memcpy(entry, bucket_data, f->entry_size);
330                         return 0;
331                 }
332         }
333
334         /* Key is not present in the bucket */
335         *key_found = 0;
336         return 0;
337 }
338
339 static void *
340 rte_table_hash_create_key16_ext(void *params,
341                 int socket_id,
342                 uint32_t entry_size)
343 {
344         struct rte_table_hash_params *p = params;
345         struct rte_table_hash *f;
346         uint64_t bucket_size, stack_size, total_size;
347         uint32_t n_buckets_ext, i;
348
349         /* Check input parameters */
350         if ((check_params_create(p) != 0) ||
351                 ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
352                 ((sizeof(struct rte_bucket_4_16) % 64) != 0))
353                 return NULL;
354
355         /*
356          * Table dimensioning
357          *
358          * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
359          * it is guaranteed that n_keys keys can be stored in the table at any time.
360          *
361          * The worst case scenario takes place when all the n_keys keys fall into
362          * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
363          * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
364          * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
365          * into a different bucket. This case defeats the purpose of the hash table.
366          * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
367          *
368          * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
369          */
370         n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
371
372         /* Memory allocation */
373         bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_16) +
374                 KEYS_PER_BUCKET * entry_size);
375         stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
376         total_size = sizeof(struct rte_table_hash) +
377                 (p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
378         if (total_size > SIZE_MAX) {
379                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
380                         "for hash table %s\n",
381                         __func__, total_size, p->name);
382                 return NULL;
383         }
384
385         f = rte_zmalloc_socket(p->name,
386                 (size_t)total_size,
387                 RTE_CACHE_LINE_SIZE,
388                 socket_id);
389         if (f == NULL) {
390                 RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
391                         "for hash table %s\n",
392                         __func__, total_size, p->name);
393                 return NULL;
394         }
395         RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
396                 "is %" PRIu64 " bytes\n",
397                 __func__, p->name, total_size);
398
399         /* Memory initialization */
400         f->n_buckets = p->n_buckets;
401         f->key_size = KEY_SIZE;
402         f->entry_size = entry_size;
403         f->bucket_size = bucket_size;
404         f->key_offset = p->key_offset;
405         f->f_hash = p->f_hash;
406         f->seed = p->seed;
407
408         f->n_buckets_ext = n_buckets_ext;
409         f->stack_pos = n_buckets_ext;
410         f->stack = (uint32_t *)
411                 &f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
412
413         if (p->key_mask != NULL) {
414                 f->key_mask[0] = (((uint64_t *)p->key_mask)[0]);
415                 f->key_mask[1] = (((uint64_t *)p->key_mask)[1]);
416         } else {
417                 f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU;
418                 f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU;
419         }
420
421         for (i = 0; i < n_buckets_ext; i++)
422                 f->stack[i] = i;
423
424         return f;
425 }
426
427 static int
428 rte_table_hash_free_key16_ext(void *table)
429 {
430         struct rte_table_hash *f = table;
431
432         /* Check input parameters */
433         if (f == NULL) {
434                 RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
435                 return -EINVAL;
436         }
437
438         rte_free(f);
439         return 0;
440 }
441
442 static int
443 rte_table_hash_entry_add_key16_ext(
444         void *table,
445         void *key,
446         void *entry,
447         int *key_found,
448         void **entry_ptr)
449 {
450         struct rte_table_hash *f = table;
451         struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
452         uint64_t signature;
453         uint32_t bucket_index, i;
454
455         signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
456         bucket_index = signature & (f->n_buckets - 1);
457         bucket0 = (struct rte_bucket_4_16 *)
458                 &f->memory[bucket_index * f->bucket_size];
459         signature |= RTE_BUCKET_ENTRY_VALID;
460
461         /* Key is present in the bucket */
462         for (bucket = bucket0; bucket != NULL; bucket = bucket->next)
463                 for (i = 0; i < 4; i++) {
464                         uint64_t bucket_signature = bucket->signature[i];
465                         uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
466
467                         if ((bucket_signature == signature) &&
468                                 (keycmp(bucket_key, key, f->key_mask) == 0)) {
469                                 uint8_t *bucket_data = &bucket->data[i *
470                                         f->entry_size];
471
472                                 memcpy(bucket_data, entry, f->entry_size);
473                                 *key_found = 1;
474                                 *entry_ptr = (void *) bucket_data;
475                                 return 0;
476                         }
477                 }
478
479         /* Key is not present in the bucket */
480         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
481                 bucket_prev = bucket, bucket = bucket->next)
482                 for (i = 0; i < 4; i++) {
483                         uint64_t bucket_signature = bucket->signature[i];
484                         uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
485
486                         if (bucket_signature == 0) {
487                                 uint8_t *bucket_data = &bucket->data[i *
488                                         f->entry_size];
489
490                                 bucket->signature[i] = signature;
491                                 keycpy(bucket_key, key, f->key_mask);
492                                 memcpy(bucket_data, entry, f->entry_size);
493                                 *key_found = 0;
494                                 *entry_ptr = (void *) bucket_data;
495
496                                 return 0;
497                         }
498                 }
499
500         /* Bucket full: extend bucket */
501         if (f->stack_pos > 0) {
502                 bucket_index = f->stack[--f->stack_pos];
503
504                 bucket = (struct rte_bucket_4_16 *) &f->memory[(f->n_buckets +
505                         bucket_index) * f->bucket_size];
506                 bucket_prev->next = bucket;
507                 bucket_prev->next_valid = 1;
508
509                 bucket->signature[0] = signature;
510                 keycpy(&bucket->key[0], key, f->key_mask);
511                 memcpy(&bucket->data[0], entry, f->entry_size);
512                 *key_found = 0;
513                 *entry_ptr = (void *) &bucket->data[0];
514                 return 0;
515         }
516
517         return -ENOSPC;
518 }
519
520 static int
521 rte_table_hash_entry_delete_key16_ext(
522         void *table,
523         void *key,
524         int *key_found,
525         void *entry)
526 {
527         struct rte_table_hash *f = table;
528         struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
529         uint64_t signature;
530         uint32_t bucket_index, i;
531
532         signature = f->f_hash(key, f->key_mask, f->key_size, f->seed);
533         bucket_index = signature & (f->n_buckets - 1);
534         bucket0 = (struct rte_bucket_4_16 *)
535                 &f->memory[bucket_index * f->bucket_size];
536         signature |= RTE_BUCKET_ENTRY_VALID;
537
538         /* Key is present in the bucket */
539         for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
540                 bucket_prev = bucket, bucket = bucket->next)
541                 for (i = 0; i < 4; i++) {
542                         uint64_t bucket_signature = bucket->signature[i];
543                         uint8_t *bucket_key = (uint8_t *) &bucket->key[i];
544
545                         if ((bucket_signature == signature) &&
546                                 (keycmp(bucket_key, key, f->key_mask) == 0)) {
547                                 uint8_t *bucket_data = &bucket->data[i *
548                                         f->entry_size];
549
550                                 bucket->signature[i] = 0;
551                                 *key_found = 1;
552                                 if (entry)
553                                         memcpy(entry, bucket_data, f->entry_size);
554
555                                 if ((bucket->signature[0] == 0) &&
556                                         (bucket->signature[1] == 0) &&
557                                         (bucket->signature[2] == 0) &&
558                                         (bucket->signature[3] == 0) &&
559                                         (bucket_prev != NULL)) {
560                                         bucket_prev->next = bucket->next;
561                                         bucket_prev->next_valid =
562                                                 bucket->next_valid;
563
564                                         memset(bucket, 0,
565                                                 sizeof(struct rte_bucket_4_16));
566                                         bucket_index = (((uint8_t *)bucket -
567                                                 (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
568                                         f->stack[f->stack_pos++] = bucket_index;
569                                 }
570
571                                 return 0;
572                         }
573                 }
574
575         /* Key is not present in the bucket */
576         *key_found = 0;
577         return 0;
578 }
579
580 #define lookup_key16_cmp(key_in, bucket, pos, f)                        \
581 {                                                               \
582         uint64_t xor[4][2], or[4], signature[4], k[2];          \
583                                                                 \
584         k[0] = key_in[0] & f->key_mask[0];                              \
585         k[1] = key_in[1] & f->key_mask[1];                              \
586         signature[0] = (~bucket->signature[0]) & 1;             \
587         signature[1] = (~bucket->signature[1]) & 1;             \
588         signature[2] = (~bucket->signature[2]) & 1;             \
589         signature[3] = (~bucket->signature[3]) & 1;             \
590                                                                 \
591         xor[0][0] = k[0] ^ bucket->key[0][0];                   \
592         xor[0][1] = k[1] ^ bucket->key[0][1];                   \
593                                                                 \
594         xor[1][0] = k[0] ^ bucket->key[1][0];                   \
595         xor[1][1] = k[1] ^ bucket->key[1][1];                   \
596                                                                 \
597         xor[2][0] = k[0] ^ bucket->key[2][0];                   \
598         xor[2][1] = k[1] ^ bucket->key[2][1];                   \
599                                                                 \
600         xor[3][0] = k[0] ^ bucket->key[3][0];                   \
601         xor[3][1] = k[1] ^ bucket->key[3][1];                   \
602                                                                 \
603         or[0] = xor[0][0] | xor[0][1] | signature[0];           \
604         or[1] = xor[1][0] | xor[1][1] | signature[1];           \
605         or[2] = xor[2][0] | xor[2][1] | signature[2];           \
606         or[3] = xor[3][0] | xor[3][1] | signature[3];           \
607                                                                 \
608         pos = 4;                                                \
609         if (or[0] == 0)                                         \
610                 pos = 0;                                        \
611         if (or[1] == 0)                                         \
612                 pos = 1;                                        \
613         if (or[2] == 0)                                         \
614                 pos = 2;                                        \
615         if (or[3] == 0)                                         \
616                 pos = 3;                                        \
617 }
618
619 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)   \
620 {                                                               \
621         uint64_t pkt_mask;                                      \
622         uint32_t key_offset = f->key_offset;\
623                                                                 \
624         pkt0_index = __builtin_ctzll(pkts_mask);                \
625         pkt_mask = 1LLU << pkt0_index;                          \
626         pkts_mask &= ~pkt_mask;                                 \
627                                                                 \
628         mbuf0 = pkts[pkt0_index];                               \
629         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\
630 }
631
632 #define lookup1_stage1(mbuf1, bucket1, f)                       \
633 {                                                               \
634         uint64_t *key;                                          \
635         uint64_t signature = 0;                         \
636         uint32_t bucket_index;                          \
637                                                                 \
638         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
639         signature = f->f_hash(key, f->key_mask, KEY_SIZE, f->seed);     \
640                                                                 \
641         bucket_index = signature & (f->n_buckets - 1);          \
642         bucket1 = (struct rte_bucket_4_16 *)                    \
643                 &f->memory[bucket_index * f->bucket_size];      \
644         rte_prefetch0(bucket1);                                 \
645         rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\
646 }
647
648 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,          \
649                 pkts_mask_out, entries, f)                      \
650 {                                                               \
651         void *a;                                                \
652         uint64_t pkt_mask;                                      \
653         uint64_t *key;                                          \
654         uint32_t pos;                                           \
655                                                                 \
656         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
657         lookup_key16_cmp(key, bucket2, pos, f);                 \
658                                                                 \
659         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
660         pkts_mask_out |= pkt_mask;                              \
661                                                                 \
662         a = (void *) &bucket2->data[pos * f->entry_size];       \
663         rte_prefetch0(a);                                       \
664         entries[pkt2_index] = a;                                \
665         lru_update(bucket2, pos);                               \
666 }
667
668 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out, entries, \
669         buckets_mask, buckets, keys, f)                         \
670 {                                                               \
671         struct rte_bucket_4_16 *bucket_next;                    \
672         void *a;                                                \
673         uint64_t pkt_mask, bucket_mask;                         \
674         uint64_t *key;                                          \
675         uint32_t pos;                                           \
676                                                                 \
677         key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
678         lookup_key16_cmp(key, bucket2, pos, f);                 \
679                                                                 \
680         pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\
681         pkts_mask_out |= pkt_mask;                              \
682                                                                 \
683         a = (void *) &bucket2->data[pos * f->entry_size];       \
684         rte_prefetch0(a);                                       \
685         entries[pkt2_index] = a;                                \
686                                                                 \
687         bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
688         buckets_mask |= bucket_mask;                            \
689         bucket_next = bucket2->next;                            \
690         buckets[pkt2_index] = bucket_next;                      \
691         keys[pkt2_index] = key;                                 \
692 }
693
694 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
695         buckets_mask, f)                                        \
696 {                                                               \
697         struct rte_bucket_4_16 *bucket, *bucket_next;           \
698         void *a;                                                \
699         uint64_t pkt_mask, bucket_mask;                         \
700         uint64_t *key;                                          \
701         uint32_t pos;                                           \
702                                                                 \
703         bucket = buckets[pkt_index];                            \
704         key = keys[pkt_index];                                  \
705         lookup_key16_cmp(key, bucket, pos, f);                  \
706                                                                 \
707         pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\
708         pkts_mask_out |= pkt_mask;                              \
709                                                                 \
710         a = (void *) &bucket->data[pos * f->entry_size];        \
711         rte_prefetch0(a);                                       \
712         entries[pkt_index] = a;                                 \
713                                                                 \
714         bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
715         buckets_mask |= bucket_mask;                            \
716         bucket_next = bucket->next;                             \
717         rte_prefetch0(bucket_next);                             \
718         rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\
719         buckets[pkt_index] = bucket_next;                       \
720         keys[pkt_index] = key;                                  \
721 }
722
723 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
724                 pkts, pkts_mask, f)                             \
725 {                                                               \
726         uint64_t pkt00_mask, pkt01_mask;                        \
727         uint32_t key_offset = f->key_offset;            \
728                                                                 \
729         pkt00_index = __builtin_ctzll(pkts_mask);               \
730         pkt00_mask = 1LLU << pkt00_index;                       \
731         pkts_mask &= ~pkt00_mask;                               \
732                                                                 \
733         mbuf00 = pkts[pkt00_index];                             \
734         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
735                                                                 \
736         pkt01_index = __builtin_ctzll(pkts_mask);               \
737         pkt01_mask = 1LLU << pkt01_index;                       \
738         pkts_mask &= ~pkt01_mask;                               \
739                                                                 \
740         mbuf01 = pkts[pkt01_index];                             \
741         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
742 }
743
744 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
745                 mbuf00, mbuf01, pkts, pkts_mask, f)             \
746 {                                                               \
747         uint64_t pkt00_mask, pkt01_mask;                        \
748         uint32_t key_offset = f->key_offset;            \
749                                                                 \
750         pkt00_index = __builtin_ctzll(pkts_mask);               \
751         pkt00_mask = 1LLU << pkt00_index;                       \
752         pkts_mask &= ~pkt00_mask;                               \
753                                                                 \
754         mbuf00 = pkts[pkt00_index];                             \
755         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset)); \
756                                                                 \
757         pkt01_index = __builtin_ctzll(pkts_mask);               \
758         if (pkts_mask == 0)                                     \
759                 pkt01_index = pkt00_index;                      \
760         pkt01_mask = 1LLU << pkt01_index;                       \
761         pkts_mask &= ~pkt01_mask;                               \
762                                                                 \
763         mbuf01 = pkts[pkt01_index];                             \
764         rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset)); \
765 }
766
767 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)   \
768 {                                                               \
769         uint64_t *key10, *key11;                                        \
770         uint64_t signature10, signature11;                      \
771         uint32_t bucket10_index, bucket11_index;        \
772                                                                 \
773         key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset);\
774         signature10 = f->f_hash(key10, f->key_mask,      KEY_SIZE, f->seed);\
775         bucket10_index = signature10 & (f->n_buckets - 1);      \
776         bucket10 = (struct rte_bucket_4_16 *)                           \
777                 &f->memory[bucket10_index * f->bucket_size];    \
778         rte_prefetch0(bucket10);                                \
779         rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\
780                                                                 \
781         key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset);\
782         signature11 = f->f_hash(key11, f->key_mask,      KEY_SIZE, f->seed);\
783         bucket11_index = signature11 & (f->n_buckets - 1);      \
784         bucket11 = (struct rte_bucket_4_16 *)                   \
785                 &f->memory[bucket11_index * f->bucket_size];    \
786         rte_prefetch0(bucket11);                                \
787         rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\
788 }
789
790 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
791                 bucket20, bucket21, pkts_mask_out, entries, f)  \
792 {                                                               \
793         void *a20, *a21;                                        \
794         uint64_t pkt20_mask, pkt21_mask;                        \
795         uint64_t *key20, *key21;                                \
796         uint32_t pos20, pos21;                                  \
797                                                                 \
798         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
799         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
800                                                                 \
801         lookup_key16_cmp(key20, bucket20, pos20, f);            \
802         lookup_key16_cmp(key21, bucket21, pos21, f);            \
803                                                                 \
804         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
805         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
806         pkts_mask_out |= pkt20_mask | pkt21_mask;                       \
807                                                                 \
808         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
809         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
810         rte_prefetch0(a20);                                     \
811         rte_prefetch0(a21);                                     \
812         entries[pkt20_index] = a20;                             \
813         entries[pkt21_index] = a21;                             \
814         lru_update(bucket20, pos20);                            \
815         lru_update(bucket21, pos21);                            \
816 }
817
818 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
819         bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f) \
820 {                                                               \
821         struct rte_bucket_4_16 *bucket20_next, *bucket21_next;  \
822         void *a20, *a21;                                        \
823         uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
824         uint64_t *key20, *key21;                                \
825         uint32_t pos20, pos21;                                  \
826                                                                 \
827         key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
828         key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
829                                                                 \
830         lookup_key16_cmp(key20, bucket20, pos20, f);    \
831         lookup_key16_cmp(key21, bucket21, pos21, f);    \
832                                                                 \
833         pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\
834         pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\
835         pkts_mask_out |= pkt20_mask | pkt21_mask;               \
836                                                                 \
837         a20 = (void *) &bucket20->data[pos20 * f->entry_size];  \
838         a21 = (void *) &bucket21->data[pos21 * f->entry_size];  \
839         rte_prefetch0(a20);                                     \
840         rte_prefetch0(a21);                                     \
841         entries[pkt20_index] = a20;                             \
842         entries[pkt21_index] = a21;                             \
843                                                                 \
844         bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
845         bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
846         buckets_mask |= bucket20_mask | bucket21_mask;          \
847         bucket20_next = bucket20->next;                         \
848         bucket21_next = bucket21->next;                         \
849         buckets[pkt20_index] = bucket20_next;                   \
850         buckets[pkt21_index] = bucket21_next;                   \
851         keys[pkt20_index] = key20;                              \
852         keys[pkt21_index] = key21;                              \
853 }
854
855 static int
856 rte_table_hash_lookup_key16_lru(
857         void *table,
858         struct rte_mbuf **pkts,
859         uint64_t pkts_mask,
860         uint64_t *lookup_hit_mask,
861         void **entries)
862 {
863         struct rte_table_hash *f = (struct rte_table_hash *) table;
864         struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
865         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
866         uint32_t pkt00_index, pkt01_index, pkt10_index;
867         uint32_t pkt11_index, pkt20_index, pkt21_index;
868         uint64_t pkts_mask_out = 0;
869
870         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
871
872         RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
873
874         /* Cannot run the pipeline with less than 5 packets */
875         if (__builtin_popcountll(pkts_mask) < 5) {
876                 for ( ; pkts_mask; ) {
877                         struct rte_bucket_4_16 *bucket;
878                         struct rte_mbuf *mbuf;
879                         uint32_t pkt_index;
880
881                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
882                         lookup1_stage1(mbuf, bucket, f);
883                         lookup1_stage2_lru(pkt_index, mbuf, bucket,
884                                 pkts_mask_out, entries, f);
885                 }
886
887                 *lookup_hit_mask = pkts_mask_out;
888                 RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
889                         __builtin_popcountll(pkts_mask_out));
890                 return 0;
891         }
892
893         /*
894          * Pipeline fill
895          *
896          */
897         /* Pipeline stage 0 */
898         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
899                 pkts_mask, f);
900
901         /* Pipeline feed */
902         mbuf10 = mbuf00;
903         mbuf11 = mbuf01;
904         pkt10_index = pkt00_index;
905         pkt11_index = pkt01_index;
906
907         /* Pipeline stage 0 */
908         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
909                 pkts_mask, f);
910
911         /* Pipeline stage 1 */
912         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
913
914         /*
915          * Pipeline run
916          *
917          */
918         for ( ; pkts_mask; ) {
919                 /* Pipeline feed */
920                 bucket20 = bucket10;
921                 bucket21 = bucket11;
922                 mbuf20 = mbuf10;
923                 mbuf21 = mbuf11;
924                 mbuf10 = mbuf00;
925                 mbuf11 = mbuf01;
926                 pkt20_index = pkt10_index;
927                 pkt21_index = pkt11_index;
928                 pkt10_index = pkt00_index;
929                 pkt11_index = pkt01_index;
930
931                 /* Pipeline stage 0 */
932                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
933                         mbuf00, mbuf01, pkts, pkts_mask, f);
934
935                 /* Pipeline stage 1 */
936                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
937
938                 /* Pipeline stage 2 */
939                 lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
940                         bucket20, bucket21, pkts_mask_out, entries, f);
941         }
942
943         /*
944          * Pipeline flush
945          *
946          */
947         /* Pipeline feed */
948         bucket20 = bucket10;
949         bucket21 = bucket11;
950         mbuf20 = mbuf10;
951         mbuf21 = mbuf11;
952         mbuf10 = mbuf00;
953         mbuf11 = mbuf01;
954         pkt20_index = pkt10_index;
955         pkt21_index = pkt11_index;
956         pkt10_index = pkt00_index;
957         pkt11_index = pkt01_index;
958
959         /* Pipeline stage 1 */
960         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
961
962         /* Pipeline stage 2 */
963         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
964                 bucket20, bucket21, pkts_mask_out, entries, f);
965
966         /* Pipeline feed */
967         bucket20 = bucket10;
968         bucket21 = bucket11;
969         mbuf20 = mbuf10;
970         mbuf21 = mbuf11;
971         pkt20_index = pkt10_index;
972         pkt21_index = pkt11_index;
973
974         /* Pipeline stage 2 */
975         lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
976                 bucket20, bucket21, pkts_mask_out, entries, f);
977
978         *lookup_hit_mask = pkts_mask_out;
979         RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
980                 __builtin_popcountll(pkts_mask_out));
981         return 0;
982 } /* lookup LRU */
983
984 static int
985 rte_table_hash_lookup_key16_ext(
986         void *table,
987         struct rte_mbuf **pkts,
988         uint64_t pkts_mask,
989         uint64_t *lookup_hit_mask,
990         void **entries)
991 {
992         struct rte_table_hash *f = (struct rte_table_hash *) table;
993         struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
994         struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
995         uint32_t pkt00_index, pkt01_index, pkt10_index;
996         uint32_t pkt11_index, pkt20_index, pkt21_index;
997         uint64_t pkts_mask_out = 0, buckets_mask = 0;
998         struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
999         uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
1000
1001         __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
1002
1003         RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in);
1004
1005         /* Cannot run the pipeline with less than 5 packets */
1006         if (__builtin_popcountll(pkts_mask) < 5) {
1007                 for ( ; pkts_mask; ) {
1008                         struct rte_bucket_4_16 *bucket;
1009                         struct rte_mbuf *mbuf;
1010                         uint32_t pkt_index;
1011
1012                         lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
1013                         lookup1_stage1(mbuf, bucket, f);
1014                         lookup1_stage2_ext(pkt_index, mbuf, bucket,
1015                                 pkts_mask_out, entries, buckets_mask,
1016                                 buckets, keys, f);
1017                 }
1018
1019                 goto grind_next_buckets;
1020         }
1021
1022         /*
1023          * Pipeline fill
1024          *
1025          */
1026         /* Pipeline stage 0 */
1027         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1028                 pkts_mask, f);
1029
1030         /* Pipeline feed */
1031         mbuf10 = mbuf00;
1032         mbuf11 = mbuf01;
1033         pkt10_index = pkt00_index;
1034         pkt11_index = pkt01_index;
1035
1036         /* Pipeline stage 0 */
1037         lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1038                 pkts_mask, f);
1039
1040         /* Pipeline stage 1 */
1041         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1042
1043         /*
1044          * Pipeline run
1045          *
1046          */
1047         for ( ; pkts_mask; ) {
1048                 /* Pipeline feed */
1049                 bucket20 = bucket10;
1050                 bucket21 = bucket11;
1051                 mbuf20 = mbuf10;
1052                 mbuf21 = mbuf11;
1053                 mbuf10 = mbuf00;
1054                 mbuf11 = mbuf01;
1055                 pkt20_index = pkt10_index;
1056                 pkt21_index = pkt11_index;
1057                 pkt10_index = pkt00_index;
1058                 pkt11_index = pkt01_index;
1059
1060                 /* Pipeline stage 0 */
1061                 lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1062                         mbuf00, mbuf01, pkts, pkts_mask, f);
1063
1064                 /* Pipeline stage 1 */
1065                 lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1066
1067                 /* Pipeline stage 2 */
1068                 lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1069                         bucket20, bucket21, pkts_mask_out, entries,
1070                         buckets_mask, buckets, keys, f);
1071         }
1072
1073         /*
1074          * Pipeline flush
1075          *
1076          */
1077         /* Pipeline feed */
1078         bucket20 = bucket10;
1079         bucket21 = bucket11;
1080         mbuf20 = mbuf10;
1081         mbuf21 = mbuf11;
1082         mbuf10 = mbuf00;
1083         mbuf11 = mbuf01;
1084         pkt20_index = pkt10_index;
1085         pkt21_index = pkt11_index;
1086         pkt10_index = pkt00_index;
1087         pkt11_index = pkt01_index;
1088
1089         /* Pipeline stage 1 */
1090         lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1091
1092         /* Pipeline stage 2 */
1093         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1094                 bucket20, bucket21, pkts_mask_out, entries,
1095                 buckets_mask, buckets, keys, f);
1096
1097         /* Pipeline feed */
1098         bucket20 = bucket10;
1099         bucket21 = bucket11;
1100         mbuf20 = mbuf10;
1101         mbuf21 = mbuf11;
1102         pkt20_index = pkt10_index;
1103         pkt21_index = pkt11_index;
1104
1105         /* Pipeline stage 2 */
1106         lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1107                 bucket20, bucket21, pkts_mask_out, entries,
1108                 buckets_mask, buckets, keys, f);
1109
1110 grind_next_buckets:
1111         /* Grind next buckets */
1112         for ( ; buckets_mask; ) {
1113                 uint64_t buckets_mask_next = 0;
1114
1115                 for ( ; buckets_mask; ) {
1116                         uint64_t pkt_mask;
1117                         uint32_t pkt_index;
1118
1119                         pkt_index = __builtin_ctzll(buckets_mask);
1120                         pkt_mask = 1LLU << pkt_index;
1121                         buckets_mask &= ~pkt_mask;
1122
1123                         lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1124                                 entries, buckets_mask_next, f);
1125                 }
1126
1127                 buckets_mask = buckets_mask_next;
1128         }
1129
1130         *lookup_hit_mask = pkts_mask_out;
1131         RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in -
1132                 __builtin_popcountll(pkts_mask_out));
1133         return 0;
1134 } /* lookup EXT */
1135
1136 static int
1137 rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear)
1138 {
1139         struct rte_table_hash *t = table;
1140
1141         if (stats != NULL)
1142                 memcpy(stats, &t->stats, sizeof(t->stats));
1143
1144         if (clear)
1145                 memset(&t->stats, 0, sizeof(t->stats));
1146
1147         return 0;
1148 }
1149
1150 struct rte_table_ops rte_table_hash_key16_lru_ops = {
1151         .f_create = rte_table_hash_create_key16_lru,
1152         .f_free = rte_table_hash_free_key16_lru,
1153         .f_add = rte_table_hash_entry_add_key16_lru,
1154         .f_delete = rte_table_hash_entry_delete_key16_lru,
1155         .f_add_bulk = NULL,
1156         .f_delete_bulk = NULL,
1157         .f_lookup = rte_table_hash_lookup_key16_lru,
1158         .f_stats = rte_table_hash_key16_stats_read,
1159 };
1160
1161 struct rte_table_ops rte_table_hash_key16_ext_ops = {
1162         .f_create = rte_table_hash_create_key16_ext,
1163         .f_free = rte_table_hash_free_key16_ext,
1164         .f_add = rte_table_hash_entry_add_key16_ext,
1165         .f_delete = rte_table_hash_entry_delete_key16_ext,
1166         .f_add_bulk = NULL,
1167         .f_delete_bulk = NULL,
1168         .f_lookup = rte_table_hash_lookup_key16_ext,
1169         .f_stats = rte_table_hash_key16_stats_read,
1170 };