add cuckoo hash
[vpp.git] / src / vppinfra / cuckoo_template.h
1 /*
2   Copyright (c) 2017 Cisco and/or its affiliates.
3
4   * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16
17 /*
18  * cuckoo hash implementation based on paper
19  * 'Algorithmic Improvements for Fast Concurrent Cuckoo Hashing'
20  * by Xiaozhou Li, David G. Andersen, Michael Kaminsky, Michael J. Freedman
21  * and their libcuckoo implementation (https://github.com/efficient/libcuckoo)
22  */
23
24 /*
25  * Note: to instantiate the template multiple times in a single file,
26  * #undef __included_cuckoo_template_h__...
27  */
28 #ifndef __included_cuckoo_template_h__
29 #define __included_cuckoo_template_h__
30
31 #include <vppinfra/heap.h>
32 #include <vppinfra/format.h>
33 #include <vppinfra/pool.h>
34 #include <vppinfra/lock.h>
35 #include <vppinfra/error.h>
36 #include <vppinfra/hash.h>
37 #include <vppinfra/cache.h>
38 #include <vppinfra/cuckoo_8_8.h>
39
40 #ifndef CLIB_CUCKOO_TYPE
41 #error CLIB_CUCKOO_TYPE not defined
42 #endif
43
44 #ifndef CLIB_CUCKOO_BFS_MAX_STEPS
45 #error CLIB_CUCKOO_BFS_MAX_STEPS not defined
46 #endif
47
48 #ifndef CLIB_CUCKOO_KVP_PER_BUCKET
49 #error CLIB_CUCKOO_KVP_PER_BUCKET not defined
50 #endif
51
52 #ifndef CLIB_CUCKOO_LOG2_KVP_PER_BUCKET
53 #error CLIB_CUCKOO_LOG2_KVP_PER_BUCKET not defined
54 #endif
55
56 #ifndef CLIB_CUCKOO_BFS_MAX_PATH_LENGTH
57 #error CLIB_CUCKOO_BFS_MAX_PATH_LENGTH not defined
58 #endif
59
60 STATIC_ASSERT (CLIB_CUCKOO_KVP_PER_BUCKET ==
61                (1 << CLIB_CUCKOO_LOG2_KVP_PER_BUCKET),
62                "CLIB_CUCKOO_KVP_PER_BUCKET != (1 << CLIB_CUCKOO_LOG2_KVP_PER_BUCKET");
63
64 #define _cv(a, b) a##b
65 #define __cv(a, b) _cv (a, b)
66 #define CV(a) __cv (a, CLIB_CUCKOO_TYPE)
67
68 #define _cvt(a, b) a##b##_t
69 #define __cvt(a, b) _cvt (a, b)
70 #define CVT(a) __cvt (a, CLIB_CUCKOO_TYPE)
71
72 typedef u64 clib_cuckoo_bucket_aux_t;
73
74 #define CLIB_CUCKOO_USE_COUNT_BIT_WIDTH (1 + CLIB_CUCKOO_LOG2_KVP_PER_BUCKET)
75
76 always_inline u64
77 clib_cuckoo_bucket_aux_get_version (clib_cuckoo_bucket_aux_t aux)
78 {
79   return aux >> (1 + CLIB_CUCKOO_USE_COUNT_BIT_WIDTH);
80 }
81
82 always_inline int
83 clib_cuckoo_bucket_aux_get_use_count (clib_cuckoo_bucket_aux_t aux)
84 {
85   u64 use_count_mask = (1 << CLIB_CUCKOO_USE_COUNT_BIT_WIDTH) - 1;
86   return (aux >> 1) & use_count_mask;
87 }
88
89 always_inline int
90 clib_cuckoo_bucket_aux_get_writer_flag (clib_cuckoo_bucket_aux_t aux)
91 {
92   return aux & 1;
93 }
94
95 always_inline clib_cuckoo_bucket_aux_t
96 clib_cuckoo_bucket_aux_pack (u64 version, int use_count, int writer_flag)
97 {
98   return (version << (1 + CLIB_CUCKOO_USE_COUNT_BIT_WIDTH)) +
99     (use_count << 1) + writer_flag;
100 }
101
102 always_inline clib_cuckoo_bucket_aux_t
103 clib_cuckoo_bucket_aux_set_version (clib_cuckoo_bucket_aux_t aux, u64 version)
104 {
105   int use_count = clib_cuckoo_bucket_aux_get_use_count (aux);
106   int writer_flag = clib_cuckoo_bucket_aux_get_writer_flag (aux);
107   return clib_cuckoo_bucket_aux_pack (version, use_count, writer_flag);
108 }
109
110 always_inline clib_cuckoo_bucket_aux_t
111 clib_cuckoo_bucket_aux_set_use_count (clib_cuckoo_bucket_aux_t aux,
112                                       int use_count)
113 {
114   u64 version = clib_cuckoo_bucket_aux_get_version (aux);
115   int writer_flag = clib_cuckoo_bucket_aux_get_writer_flag (aux);
116   return clib_cuckoo_bucket_aux_pack (version, use_count, writer_flag);
117 }
118
119 always_inline clib_cuckoo_bucket_aux_t
120 clib_cuckoo_bucket_aux_set_writer_flag (clib_cuckoo_bucket_aux_t aux,
121                                         int writer_flag)
122 {
123   u64 version = clib_cuckoo_bucket_aux_get_version (aux);
124   int use_count = clib_cuckoo_bucket_aux_get_use_count (aux);
125   return clib_cuckoo_bucket_aux_pack (version, use_count, writer_flag);
126 }
127
128 #define PATH_BITS_REQ \
129   (CLIB_CUCKOO_BFS_MAX_PATH_LENGTH * CLIB_CUCKOO_LOG2_KVP_PER_BUCKET)
130
131 #if PATH_BITS_REQ <= 8
132 typedef u8 path_data_t;
133 #elif PATH_BITS_REQ <= 16
134 typedef u16 path_data_t;
135 #elif PATH_BITS_REQ <= 32
136 typedef u32 path_data_t;
137 #elif PATH_BITS_REQ <= 64
138 typedef u64 path_data_t;
139 #else
140 #error no suitable datatype for path storage...
141 #endif
142
143 typedef struct
144 {
145   /** bucket where this path begins */
146   u64 start;
147   /** bucket at end of path */
148   u64 bucket;
149   /** length of the path */
150   u8 length;
151   /** holds compressed offsets in buckets along path */
152   path_data_t data;
153 } clib_cuckoo_path_t;
154
155 typedef struct
156 {
157   /** reduced hashes corresponding to elements */
158   u8 reduced_hashes[CLIB_CUCKOO_KVP_PER_BUCKET];
159
160   /** auxiliary data - version, writer flag and used count */
161   volatile clib_cuckoo_bucket_aux_t aux;
162
163   /** cuckoo elements in this bucket */
164     CVT (clib_cuckoo_kv) elts[CLIB_CUCKOO_KVP_PER_BUCKET];
165 } CVT (clib_cuckoo_bucket);
166
167 #define clib_cuckoo_bucket_foreach_idx(var) \
168   for (var = 0; var < CLIB_CUCKOO_KVP_PER_BUCKET; var++)
169
170 #if CLIB_CUCKOO_OPTIMIZE_UNROLL
171 #if CLIB_CUCKOO_KVP_PER_BUCKET == 2
172 #define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \
173   do                                                       \
174     {                                                      \
175       var = 0;                                             \
176       body;                                                \
177       var = 1;                                             \
178       body;                                                \
179     }                                                      \
180   while (0);
181 #elif CLIB_CUCKOO_KVP_PER_BUCKET == 4
182 #define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \
183   do                                                       \
184     {                                                      \
185       var = 0;                                             \
186       body;                                                \
187       var = 1;                                             \
188       body;                                                \
189       var = 2;                                             \
190       body;                                                \
191       var = 3;                                             \
192       body;                                                \
193     }                                                      \
194   while (0);
195 #elif CLIB_CUCKOO_KVP_PER_BUCKET == 8
196 #define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \
197   do                                                       \
198     {                                                      \
199       var = 0;                                             \
200       body;                                                \
201       var = 1;                                             \
202       body;                                                \
203       var = 2;                                             \
204       body;                                                \
205       var = 3;                                             \
206       body;                                                \
207       var = 4;                                             \
208       body;                                                \
209       var = 5;                                             \
210       body;                                                \
211       var = 6;                                             \
212       body;                                                \
213       var = 7;                                             \
214       body;                                                \
215     }                                                      \
216   while (0);
217 #else
218 #define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \
219   clib_cuckoo_bucket_foreach_idx (var)                     \
220   {                                                        \
221     body;                                                  \
222   }
223 #endif
224 #else /* CLIB_CUCKOO_OPTIMIZE_UNROLL */
225 #define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \
226   clib_cuckoo_bucket_foreach_idx (var)                     \
227   {                                                        \
228     body;                                                  \
229   }
230 #endif /* CLIB_CUCKOO_OPTIMIZE_UNROLL */
231
232 #define clib_cuckoo_bucket_foreach_elt_index(var, bucket) \
233   for (var = 0; var < CLIB_CUCKOO_KVP_PER_BUCKET; ++i)
234
235 #define clib_cuckoo_foreach_bucket(var, h, body)        \
236   do                                                    \
237     {                                                   \
238       CVT (clib_cuckoo_bucket) *__buckets = h->buckets; \
239       vec_foreach (var, __buckets)                      \
240       {                                                 \
241         body;                                           \
242       }                                                 \
243     }                                                   \
244   while (0)
245
246 typedef struct CV (clib_cuckoo)
247 {
248   /** vector of elements containing key-value pairs and auxiliary data */
249   CVT (clib_cuckoo_bucket) * volatile buckets;
250
251   /** garbage to be freed once its safe to do so .. */
252   CVT (clib_cuckoo_bucket) * *to_be_freed;
253
254   /** hash table name */
255   const char *name;
256
257   /** pool of cuckoo paths (reused when doing bfd search) */
258   clib_cuckoo_path_t *paths;
259
260   /**
261    * vector used as queue when doing cuckoo path searches - holds offsets
262    * in paths pool
263    */
264   uword *bfs_search_queue;
265
266   /**
267    * writer lock - whether this lock is taken or not has zero effect on
268    * readers
269    */
270   clib_spinlock_t writer_lock;
271
272   /** caller context passed to callback with garbage notification */
273   void *garbage_ctx;
274
275   /**
276    * garbage notify function - called when some garbage needs to be collected
277    * in main thread while other threads are stopped
278    */
279   void (*garbage_callback) (struct CV (clib_cuckoo) * h, void *garbage_ctx);
280
281 #if CLIB_CUCKOO_DEBUG_COUNTERS
282   u64 steps_exceeded;
283   u64 bfs_queue_emptied;
284   u64 fast_adds;
285   u64 slow_adds;
286   u64 rehashes;
287 #endif
288
289 } CVT (clib_cuckoo);
290
291 void CV (clib_cuckoo_init) (CVT (clib_cuckoo) * h, const char *name,
292                             u64 nbuckets,
293                             void (*garbage_callback) (CVT (clib_cuckoo) *,
294                                                       void *),
295                             void *garbage_ctx);
296
297 void CV (clib_cuckoo_garbage_collect) (CVT (clib_cuckoo) * h);
298
299 void CV (clib_cuckoo_free) (CVT (clib_cuckoo) * h);
300
301 int CV (clib_cuckoo_add_del) (CVT (clib_cuckoo) * h,
302                               CVT (clib_cuckoo_kv) * add_v, int is_add);
303 int CV (clib_cuckoo_search) (CVT (clib_cuckoo) * h,
304                              CVT (clib_cuckoo_kv) * search_v,
305                              CVT (clib_cuckoo_kv) * return_v);
306
307 void CV (clib_cuckoo_foreach_key_value_pair) (CVT (clib_cuckoo) * h,
308                                               void *callback, void *arg);
309
310 float CV (clib_cuckoo_calc_load) (CVT (clib_cuckoo) * h);
311
312 format_function_t CV (format_cuckoo);
313 format_function_t CV (format_cuckoo_kvp);
314
315 always_inline u8
316 clib_cuckoo_reduce_hash (u64 hash)
317 {
318   u32 v32 = ((u32) hash) ^ ((u32) (hash >> 32));
319   u16 v16 = ((u16) v32) ^ ((u16) (v32 >> 16));
320   u8 v8 = ((u8) v16) ^ ((u8) (v16 >> 8));
321   return v8;
322 }
323
324 always_inline u64
325 clib_cuckoo_get_other_bucket (u64 nbuckets, u64 bucket, u8 reduced_hash)
326 {
327   u64 mask = (nbuckets - 1);
328   return (bucket ^ ((reduced_hash + 1) * 0xc6a4a7935bd1e995)) & mask;
329 }
330
331 always_inline clib_cuckoo_lookup_info_t
332 CV (clib_cuckoo_calc_lookup) (CVT (clib_cuckoo_bucket) * buckets, u64 hash)
333 {
334   clib_cuckoo_lookup_info_t lookup;
335   u64 nbuckets = vec_len (buckets);
336   u64 mask = (nbuckets - 1);
337   lookup.bucket1 = hash & mask;
338 #if CLIB_CUCKOO_OPTIMIZE_PREFETCH
339   CLIB_PREFETCH (vec_elt_at_index (buckets, lookup.bucket1),
340                  sizeof (*buckets), LOAD);
341 #endif
342   u8 reduced_hash = clib_cuckoo_reduce_hash (hash);
343   lookup.bucket2 =
344     clib_cuckoo_get_other_bucket (nbuckets, lookup.bucket1, reduced_hash);
345 #if CLIB_CUCKOO_OPTIMIZE_PREFETCH
346   CLIB_PREFETCH (vec_elt_at_index (buckets, lookup.bucket2),
347                  sizeof (*buckets), LOAD);
348 #endif
349   lookup.reduced_hash = reduced_hash;
350   ASSERT (lookup.bucket1 < nbuckets);
351   ASSERT (lookup.bucket2 < nbuckets);
352   return lookup;
353 }
354
355 /**
356  * search for key within bucket
357  */
358 always_inline int CV (clib_cuckoo_bucket_search) (CVT (clib_cuckoo_bucket) *
359                                                   b,
360                                                   CVT (clib_cuckoo_kv) * kvp,
361                                                   u8 reduced_hash)
362 {
363   clib_cuckoo_bucket_aux_t bucket_aux;
364   u8 writer_flag;
365   do
366     {
367       bucket_aux = b->aux;
368       writer_flag = clib_cuckoo_bucket_aux_get_writer_flag (bucket_aux);
369     }
370   while (PREDICT_FALSE (writer_flag));  /* loop while writer flag is set */
371
372   int i;
373 #if CLIB_CUCKOO_OPTIMIZE_USE_COUNT_LIMITS_SEARCH
374   const int use_count = clib_cuckoo_bucket_aux_get_use_count (bucket_aux);
375 #endif
376   /* *INDENT-OFF* */
377   clib_cuckoo_bucket_foreach_idx_unrolled (i, {
378 #if CLIB_CUCKOO_OPTIMIZE_USE_COUNT_LIMITS_SEARCH
379     if (i > use_count)
380       {
381         break;
382       }
383 #endif
384     if (
385 #if CLIB_CUCKOO_OPTIMIZE_CMP_REDUCED_HASH
386         reduced_hash == b->reduced_hashes[i] &&
387 #endif
388         0 == memcmp (&kvp->key, &b->elts[i].key, sizeof (kvp->key)))
389       {
390         kvp->value = b->elts[i].value;
391         clib_cuckoo_bucket_aux_t bucket_aux2 = b->aux;
392         if (PREDICT_TRUE (clib_cuckoo_bucket_aux_get_version (bucket_aux) ==
393                           clib_cuckoo_bucket_aux_get_version (bucket_aux2)))
394           {
395             /* yay, fresh data */
396             return CLIB_CUCKOO_ERROR_SUCCESS;
397           }
398         else
399           {
400             /* oops, modification detected */
401             return CLIB_CUCKOO_ERROR_AGAIN;
402           }
403       }
404   });
405   /* *INDENT-ON* */
406   return CLIB_CUCKOO_ERROR_NOT_FOUND;
407 }
408
409 always_inline int CV (clib_cuckoo_search_inline) (CVT (clib_cuckoo) * h,
410                                                   CVT (clib_cuckoo_kv) * kvp)
411 {
412   clib_cuckoo_lookup_info_t lookup;
413   int rv;
414
415   u64 hash = CV (clib_cuckoo_hash) (kvp);
416   CVT (clib_cuckoo_bucket) * buckets;
417 again:
418   buckets = h->buckets;
419   lookup = CV (clib_cuckoo_calc_lookup) (buckets, hash);
420   do
421     {
422       rv =
423         CV (clib_cuckoo_bucket_search) (vec_elt_at_index
424                                         (buckets, lookup.bucket1), kvp,
425                                         lookup.reduced_hash);
426     }
427   while (PREDICT_FALSE (CLIB_CUCKOO_ERROR_AGAIN == rv));
428   if (CLIB_CUCKOO_ERROR_SUCCESS == rv)
429     {
430       return CLIB_CUCKOO_ERROR_SUCCESS;
431     }
432
433   rv =
434     CV (clib_cuckoo_bucket_search) (vec_elt_at_index
435                                     (buckets, lookup.bucket2), kvp,
436                                     lookup.reduced_hash);
437   if (PREDICT_FALSE (CLIB_CUCKOO_ERROR_AGAIN == rv))
438     {
439       /*
440        * change to 2nd bucket could bump the item to 1st bucket and the bucket
441        * indexes might not even be valid anymore - restart the search
442        */
443       goto again;
444     }
445   return rv;
446 }
447
448 #endif /* __included_cuckoo_template_h__ */
449
450 /** @endcond */
451
452 /*
453  * fd.io coding-style-patch-verification: ON
454  *
455  * Local Variables:
456  * eval: (c-set-style "gnu")
457  * End:
458  */