2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2005 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 /* This is all stolen from Bob Jenkins and reworked for clib. Thanks
39 once again Bob for the great work. */
42 ------------------------------------------------------------------------------
43 perfect.c: code to generate code for a hash for perfect hashing.
44 (c) Bob Jenkins, September 1996, December 1999
45 You may use this code in any way you wish, and it is free. No warranty.
46 I hereby place this in the public domain.
47 Source is http://burtleburtle.net/bob/c/perfect.c
49 This generates a minimal perfect hash function. That means, given a
50 set of n keys, this determines a hash function that maps each of
51 those keys into a value in 0..n-1 with no collisions.
53 The perfect hash function first uses a normal hash function on the key
54 to determine (a,b) such that the pair (a,b) is distinct for all
55 keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
56 tab[] is an array of 1-byte values and scramble[] is a 256-term array of
57 2-byte or 4-byte values. If there are n keys, the length of tab[] is a
58 power of two between n/3 and n.
60 I found the idea of computing distinct (a,b) values in "Practical minimal
61 perfect hash functions for large databases", Fox, Heath, Chen, and Daoud,
62 Communications of the ACM, January 1992. They found the idea in Chichelli
63 (CACM Jan 1980). Beyond that, our methods differ.
65 The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
66 0..*blen*-1. A fast hash function determines both a and b
67 simultaneously. Any decent hash function is likely to produce
68 hashes so that (a,b) is distinct for all pairs. I try the hash
69 using different values of *salt* until all pairs are distinct.
71 The final hash is (a XOR scramble[tab[b]]). *scramble* is a
72 predetermined mapping of 0..255 into 0..smax-1. *tab* is an
73 array that we fill in in such a way as to make the hash perfect.
75 First we fill in all values of *tab* that are used by more than one
76 key. We try all possible values for each position until one works.
78 This leaves m unmapped keys and m values that something could hash to.
79 If you treat unmapped keys as lefthand nodes and unused hash values
80 as righthand nodes, and draw a line connecting each key to each hash
81 value it could map to, you get a bipartite graph. We attempt to
82 find a perfect matching in this graph. If we succeed, we have
83 determined a perfect hash for the whole set of keys.
85 *scramble* is used because (a^tab[i]) clusters keys around *a*.
86 ------------------------------------------------------------------------------
89 #include <vppinfra/bitmap.h>
90 #include <vppinfra/format.h>
91 #include <vppinfra/phash.h>
92 #include <vppinfra/random.h>
95 init_keys_direct_u32 (phash_main_t * pm)
97 int n_keys_left, b_mask, a_shift;
101 seed = pm->hash_seed;
102 b_mask = (1 << pm->b_bits) - 1;
103 a_shift = BITS (seed) - pm->a_bits;
106 n_keys_left = vec_len (pm->keys);
108 while (n_keys_left >= 2)
115 x0 += (u32) k[0].key;
116 x1 += (u32) k[1].key;
118 hash_mix32 (x0, y0, z0);
119 hash_mix32 (x1, y1, z1);
121 k[0].b = z0 & b_mask;
122 k[1].b = z1 & b_mask;
123 k[0].a = z0 >> a_shift;
124 k[1].a = z1 >> a_shift;
125 if (PREDICT_FALSE (a_shift >= BITS (z0)))
132 if (n_keys_left >= 1)
139 hash_mix32 (x0, y0, z0);
141 k[0].b = z0 & b_mask;
142 k[0].a = z0 >> a_shift;
143 if (PREDICT_FALSE (a_shift >= BITS (z0)))
152 init_keys_direct_u64 (phash_main_t * pm)
154 int n_keys_left, b_mask, a_shift;
158 seed = pm->hash_seed;
159 b_mask = (1 << pm->b_bits) - 1;
160 a_shift = BITS (seed) - pm->a_bits;
163 n_keys_left = vec_len (pm->keys);
165 while (n_keys_left >= 2)
172 x0 += (u64) k[0].key;
173 x1 += (u64) k[1].key;
175 hash_mix64 (x0, y0, z0);
176 hash_mix64 (x1, y1, z1);
178 k[0].b = z0 & b_mask;
179 k[1].b = z1 & b_mask;
180 k[0].a = z0 >> a_shift;
181 k[1].a = z1 >> a_shift;
182 if (PREDICT_FALSE (a_shift >= BITS (z0)))
189 if (n_keys_left >= 1)
196 hash_mix64 (x0, y0, z0);
198 k[0].b = z0 & b_mask;
199 k[0].a = z0 >> a_shift;
200 if (PREDICT_FALSE (a_shift >= BITS (z0)))
209 init_keys_indirect_u32 (phash_main_t * pm)
211 int n_keys_left, b_mask, a_shift;
215 seed = pm->hash_seed;
216 b_mask = (1 << pm->b_bits) - 1;
217 a_shift = BITS (seed) - pm->a_bits;
220 n_keys_left = vec_len (pm->keys);
222 while (n_keys_left >= 2)
228 pm->key_seed2 (pm->private, k[0].key, k[1].key, &xyz);
239 hash_mix32 (x0, y0, z0);
240 hash_mix32 (x1, y1, z1);
242 k[0].b = z0 & b_mask;
243 k[1].b = z1 & b_mask;
244 k[0].a = z0 >> a_shift;
245 k[1].a = z1 >> a_shift;
246 if (PREDICT_FALSE (a_shift >= BITS (z0)))
253 if (n_keys_left >= 1)
258 pm->key_seed1 (pm->private, k[0].key, &xyz);
265 hash_mix32 (x0, y0, z0);
267 k[0].b = z0 & b_mask;
268 k[0].a = z0 >> a_shift;
269 if (PREDICT_FALSE (a_shift >= BITS (z0)))
278 init_keys_indirect_u64 (phash_main_t * pm)
280 int n_keys_left, b_mask, a_shift;
284 seed = pm->hash_seed;
285 b_mask = (1 << pm->b_bits) - 1;
286 a_shift = BITS (seed) - pm->a_bits;
289 n_keys_left = vec_len (pm->keys);
291 while (n_keys_left >= 2)
297 pm->key_seed2 (pm->private, k[0].key, k[1].key, &xyz);
308 hash_mix64 (x0, y0, z0);
309 hash_mix64 (x1, y1, z1);
311 k[0].b = z0 & b_mask;
312 k[1].b = z1 & b_mask;
313 k[0].a = z0 >> a_shift;
314 k[1].a = z1 >> a_shift;
315 if (PREDICT_FALSE (a_shift >= BITS (z0)))
322 if (n_keys_left >= 1)
327 pm->key_seed1 (pm->private, k[0].key, &xyz);
334 hash_mix64 (x0, y0, z0);
336 k[0].b = z0 & b_mask;
337 k[0].a = z0 >> a_shift;
338 if (PREDICT_FALSE (a_shift >= BITS (z0)))
347 * insert keys into table according to key->b
348 * check if the initial hash might work
351 init_tabb (phash_main_t * pm)
359 if (pm->flags & PHASH_FLAG_MIX64)
360 init_keys_indirect_u64 (pm);
362 init_keys_indirect_u32 (pm);
366 if (pm->flags & PHASH_FLAG_MIX64)
367 init_keys_direct_u64 (pm);
369 init_keys_direct_u32 (pm);
373 vec_resize (pm->tabb, 1 << pm->b_bits);
375 vec_foreach (tb, pm->tabb) phash_tabb_free (tb);
377 /* Two keys with the same (a,b) guarantees a collision */
379 vec_foreach (k, pm->keys)
383 tb = pm->tabb + k->b;
385 for (i = 0; i < vec_len (ki); i++)
387 l = pm->keys + ki[i];
390 /* Given keys are supposed to be unique. */
392 && pm->key_is_equal (pm->private, l->key, k->key))
393 clib_error ("duplicate keys");
399 vec_add1 (tb->keys, k - pm->keys);
403 return no_collisions;
406 /* Try to apply an augmenting list */
408 apply (phash_main_t * pm, u32 tail, u32 rollback)
412 phash_tabq_t *q_child, *q_parent;
413 u32 ki, i, hash, child, parent;
414 u32 stabb; /* scramble[tab[b]] */
419 /* Walk from child to parent until root is reached. */
420 for (child = tail - 1; child; child = parent)
422 q_child = &pm->tabq[child];
423 parent = q_child->parent_q;
424 q_parent = &pm->tabq[parent];
426 /* find parent's list of siblings */
427 ASSERT (q_parent->b_q < vec_len (pm->tabb));
428 pb = pm->tabb + q_parent->b_q;
430 /* erase old hash values */
431 stabb = pm->scramble[pb->val_b];
432 for (i = 0; i < vec_len (pb->keys); i++)
438 /* Erase hash for all of child's siblings. */
439 if (ki == pm->tabh[hash])
443 /* change pb->val_b, which will change the hashes of all parent siblings */
444 pb->val_b = rollback ? q_child->oldval_q : q_child->newval_q;
446 /* set new hash values */
447 stabb = pm->scramble[pb->val_b];
448 for (i = 0; i < vec_len (pb->keys); i++)
457 continue; /* root never had a hash */
459 else if (pm->tabh[hash] != ~0)
461 /* Very rare case: roll back any changes. */
462 apply (pm, tail, /* rollback changes */ 1);
476 -------------------------------------------------------------------------------
477 augment(): Add item to the mapping.
479 Construct a spanning tree of *b*s with *item* as root, where each
480 parent can have all its hashes changed (by some new val_b) with
481 at most one collision, and each child is the b of that collision.
483 I got this from Tarjan's "Data Structures and Network Algorithms". The
484 path from *item* to a *b* that can be remapped with no collision is
485 an "augmenting path". Change values of tab[b] along the path so that
486 the unmapped key gets mapped and the unused hash value gets used.
488 Assuming 1 key per b, if m out of n hash values are still unused,
489 you should expect the transitive closure to cover n/m nodes before
490 an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect
491 this approach to take about nlogn time to map all single-key b's.
492 -------------------------------------------------------------------------------
494 high_water: a value higher than any now in tabb[].water_b.
497 augment (phash_main_t * pm, u32 b_root, u32 high_water)
499 u32 q; /* current position walking through the queue */
500 u32 tail; /* tail of the queue. 0 is the head of the queue. */
501 phash_tabb_t *tb_parent, *tb_child, *tb_hit;
502 phash_key_t *k_parent, *k_child;
503 u32 v, v_limit; /* possible value for myb->val_b */
507 1 << ((pm->flags & PHASH_FLAG_USE_SCRAMBLE) ? pm->s_bits : BITS (u8));
509 /* Initialize the root of the spanning tree. */
510 pm->tabq[0].b_q = b_root;
513 /* construct the spanning tree by walking the queue, add children to tail */
514 for (q = 0; q < tail; q++)
516 if ((pm->flags & PHASH_FLAG_FAST_MODE)
517 && !(pm->flags & PHASH_FLAG_MINIMAL) && q == 1)
518 break; /* don't do transitive closure */
520 tb_parent = pm->tabb + pm->tabq[q].b_q; /* the b for this node */
522 for (v = 0; v < v_limit; v++)
526 for (i = 0; i < vec_len (tb_parent->keys); i++)
528 ki = tb_parent->keys[i];
529 k_parent = pm->keys + ki;
531 hash = k_parent->a ^ pm->scramble[v];
532 if (hash >= pm->hash_max)
533 goto try_next_v; /* hash code out of bounds => we can't use this v */
539 k_child = pm->keys + ki;
540 tb_hit = pm->tabb + k_child->b;
544 /* Hit at most one child b. */
545 if (tb_child == tb_hit)
550 /* Remember this as child b. */
552 if (tb_hit->water_b == high_water)
553 goto try_next_v; /* already explored */
557 /* tb_parent with v has either one or zero collisions. */
559 /* add childb to the queue of reachable things */
561 tb_child->water_b = high_water;
562 pm->tabq[tail].b_q = tb_child ? tb_child - pm->tabb : ~0;
563 pm->tabq[tail].newval_q = v; /* how to make parent (myb) use this hash */
564 pm->tabq[tail].oldval_q = tb_parent->val_b; /* need this for rollback */
565 pm->tabq[tail].parent_q = q;
568 /* Found a v with no collisions? */
571 /* Try to apply the augmenting path. */
572 if (apply (pm, tail, /* rollback */ 0))
573 return 1; /* success, item was added to the perfect hash */
574 --tail; /* don't know how to handle such a child! */
585 static phash_tabb_t *sort_tabb;
588 phash_tabb_compare (void *a1, void *a2)
592 phash_tabb_t *tb1, *tb2;
594 tb1 = sort_tabb + b1[0];
595 tb2 = sort_tabb + b2[0];
597 return ((int) vec_len (tb2->keys) - (int) vec_len (tb1->keys));
600 /* find a mapping that makes this a perfect hash */
602 perfect (phash_main_t * pm)
606 /* clear any state from previous attempts */
607 if (vec_bytes (pm->tabh))
608 memset (pm->tabh, ~0, vec_bytes (pm->tabh));
610 vec_validate (pm->tabb_sort, vec_len (pm->tabb) - 1);
611 for (i = 0; i < vec_len (pm->tabb_sort); i++)
612 pm->tabb_sort[i] = i;
614 sort_tabb = pm->tabb;
616 vec_sort_with_function (pm->tabb_sort, phash_tabb_compare);
618 /* In descending order by number of keys, map all *b*s */
619 for (i = 0; i < vec_len (pm->tabb_sort); i++)
621 if (!augment (pm, pm->tabb_sort[i], i + 1))
625 /* Success! We found a perfect hash of all keys into 0..nkeys-1. */
631 * Find initial a_bits = log2 (a_max), b_bits = log2 (b_max).
632 * Initial a_max and b_max values were found empirically. Some factors:
634 * If s_max<256 there is no scramble, so tab[b] needs to cover 0..s_max-1.
636 * a_max and b_max must be powers of 2 because the values in 0..a_max-1 and
637 * 0..b_max-1 are produced by applying a bitmask to the initial hash function.
639 * a_max must be less than s_max, in fact less than n_keys, because otherwise
640 * there would often be no i such that a^scramble[i] is in 0..n_keys-1 for
641 * all the *a*s associated with a given *b*, so there would be no legal
642 * value to assign to tab[b]. This only matters when we're doing a minimal
645 * It takes around 800 trials to find distinct (a,b) with nkey=s_max*(5/8)
646 * and a_max*b_max = s_max*s_max/32.
648 * Values of b_max less than s_max/4 never work, and s_max/2 always works.
650 * We want b_max as small as possible because it is the number of bytes in
651 * the huge array we must create for the perfect hash.
653 * When nkey <= s_max*(5/8), b_max=s_max/4 works much more often with
654 * a_max=s_max/8 than with a_max=s_max/4. Above s_max*(5/8), b_max=s_max/4
655 * doesn't seem to care whether a_max=s_max/8 or a_max=s_max/4. I think it
656 * has something to do with 5/8 = 1/8 * 5. For example examine 80000,
657 * 85000, and 90000 keys with different values of a_max. This only matters
658 * if we're doing a minimal perfect hash.
660 * When a_max*b_max <= 1<<U32BITS, the initial hash must produce one integer.
661 * Bigger than that it must produce two integers, which increases the
662 * cost of the hash per character hashed.
665 guess_initial_parameters (phash_main_t * pm)
667 u32 s_bits, s_max, a_max, b_max, n_keys;
668 int is_minimal, is_fast_mode;
669 const u32 b_max_use_scramble_threshold = 4096;
671 is_minimal = (pm->flags & PHASH_FLAG_MINIMAL) != 0;
672 is_fast_mode = (pm->flags & PHASH_FLAG_FAST_MODE) != 0;
674 n_keys = vec_len (pm->keys);
675 s_bits = max_log2 (n_keys);
695 * Was: a_max = is_minimal ? s_max / 2 : s_max;
696 * However, we know that is_minimal must be true, so the
697 * if-arm of the ternary expression is always executed.
716 else if (s_max / 4 < b_max_use_scramble_threshold)
718 if (n_keys <= s_max * 0.52)
719 a_max = b_max = s_max / 8;
721 a_max = b_max = s_max / 4;
725 a_max = ((n_keys <= s_max * (5.0 / 8.0)) ? s_max / 8 :
727 s_max * (3.0 / 4.0)) ? s_max / 4 : s_max / 2);
728 b_max = s_max / 4; /* always give the small size a shot */
733 a_max = b_max = s_max / 2;
736 a_max = s_max / 8; /* never require the multiword hash */
737 b_max = (n_keys <= s_max * (5.0 / 8.0)) ? s_max / 4 : s_max / 2;
742 a_max = (n_keys <= s_max * (5.0 / 8.0)) ? s_max / 8 : s_max / 2;
743 b_max = (n_keys <= s_max * (5.0 / 8.0)) ? s_max / 4 : s_max / 2;
746 /* Just find a hash as quick as possible.
747 We'll be thrashing virtual memory at this size. */
748 a_max = b_max = s_max / 2;
754 /* Non-minimal perfect hash. */
755 if (is_fast_mode && n_keys > s_max * 0.8)
761 if (s_max / 4 <= (1 << 14))
762 b_max = ((n_keys <= s_max * 0.56) ? s_max / 32 :
763 (n_keys <= s_max * 0.74) ? s_max / 16 : s_max / 8);
765 b_max = ((n_keys <= s_max * 0.6) ? s_max / 16 :
766 (n_keys <= s_max * 0.8) ? s_max / 8 : s_max / 4);
768 if (is_fast_mode && b_max < s_max / 8)
777 ASSERT (s_max == (1 << s_bits));
778 ASSERT (is_pow2 (a_max));
779 ASSERT (is_pow2 (b_max));
781 pm->a_bits = min_log2 (a_max);
782 pm->b_bits = min_log2 (b_max);
783 if (b_max >= b_max_use_scramble_threshold)
784 pm->flags |= PHASH_FLAG_USE_SCRAMBLE;
787 /* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
788 /* permute(0)=0. This is intended and useful. */
790 scramble_permute (u32 x, u32 nbits)
793 int mask = (1 << nbits) - 1;
794 int const2 = 1 + nbits / 2;
795 int const3 = 1 + nbits / 3;
796 int const4 = 1 + nbits / 4;
797 int const5 = 1 + nbits / 5;
798 for (i = 0; i < 20; i++)
800 x = (x + (x << const2)) & mask;
801 x = (x ^ (x >> const3));
802 x = (x + (x << const4)) & mask;
803 x = (x ^ (x >> const5));
808 /* initialize scramble[] with distinct random values in 0..smax-1 */
810 scramble_init (phash_main_t * pm)
814 /* fill scramble[] with distinct random integers in 0..smax-1 */
815 vec_validate (pm->scramble, (1 << (pm->s_bits < 8 ? 8 : pm->s_bits)) - 1);
816 for (i = 0; i < vec_len (pm->scramble); i++)
817 pm->scramble[i] = scramble_permute (i, pm->s_bits);
820 /* Try to find a perfect hash function. */
822 phash_find_perfect_hash (phash_main_t * pm)
824 clib_error_t *error = 0;
825 u32 max_a_bits, n_tries_this_a_b, want_minimal;
827 /* guess initial values for s_max, a_max and b_max */
828 guess_initial_parameters (pm);
830 want_minimal = pm->flags & PHASH_FLAG_MINIMAL;
834 pm->a_bits = pm->s_bits;
836 max_a_bits = pm->s_bits - want_minimal;
840 pm->hash_max = want_minimal ? vec_len (pm->keys) : (1 << pm->s_bits);
844 /* Allocate working memory. */
846 vec_validate_init_empty (pm->tabh, pm->hash_max - 1, ~0);
848 vec_validate (pm->tabq, 1 << pm->b_bits);
850 /* Actually find the perfect hash */
851 n_tries_this_a_b = 0;
854 /* Choose random hash seeds until keys become unique. */
855 pm->hash_seed = random_u64 (&pm->random_seed);
859 /* Found unique (A, B). */
861 /* Hash may already be perfect. */
865 pm->n_perfect_calls++;
872 /* Keep trying with different seed value. */
874 if (n_tries_this_a_b < 2048)
877 /* Try to put more bits in (A,B) to make distinct (A,B) more likely */
878 if (pm->a_bits < max_a_bits)
880 else if (pm->b_bits < pm->s_bits)
883 vec_resize (pm->tabb, vec_len (pm->tabb));
884 vec_resize (pm->tabq, vec_len (pm->tabq));
889 /* Can't increase (A, B) any more, so try increasing S. */
895 /* Construct mapping table for hash lookups. */
900 pm->a_shift = ((pm->flags & PHASH_FLAG_MIX64) ? 64 : 32) - pm->a_bits;
901 pm->b_mask = (1 << pm->b_bits) - 1;
903 vec_resize (pm->tab, vec_len (pm->tabb));
904 for (b = 0; b < vec_len (pm->tabb); b++)
906 v = pm->tabb[b].val_b;
908 /* Apply scramble now for small enough value of b_bits. */
909 if (!(pm->flags & PHASH_FLAG_USE_SCRAMBLE))
916 /* Free working memory. */
917 phash_main_free_working_memory (pm);
922 /* Slow hash computation for general keys. */
924 phash_hash_slow (phash_main_t * pm, uword key)
928 if (pm->flags & PHASH_FLAG_MIX64)
932 x0 = y0 = z0 = pm->hash_seed;
937 pm->key_seed1 (pm->private, key, &xyz);
945 hash_mix64 (x0, y0, z0);
947 a = z0 >> pm->a_shift;
954 x0 = y0 = z0 = pm->hash_seed;
959 pm->key_seed1 (pm->private, key, &xyz);
967 hash_mix32 (x0, y0, z0);
969 a = z0 >> pm->a_shift;
974 if (pm->flags & PHASH_FLAG_USE_SCRAMBLE)
979 /* Verify that perfect hash is perfect. */
981 phash_validate (phash_main_t * pm)
984 uword *unique_bitmap = 0;
985 clib_error_t *error = 0;
987 vec_foreach (k, pm->keys)
989 uword h = phash_hash_slow (pm, k->key);
991 if (h >= pm->hash_max)
993 error = clib_error_return (0, "hash out of range %wd", h);
997 if (clib_bitmap_get (unique_bitmap, h))
999 error = clib_error_return (0, "hash non-unique");
1003 unique_bitmap = clib_bitmap_ori (unique_bitmap, h);
1007 clib_bitmap_free (unique_bitmap);
1012 * fd.io coding-style-patch-verification: ON
1015 * eval: (c-set-style "gnu")