+#ifndef MAP_HUGE_SHIFT
+#define MAP_HUGE_SHIFT 26
+#endif
+
+static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
+{
+ uword rv;
+
+ /* Round to an even number of cache lines */
+ nbytes += CLIB_CACHE_LINE_BYTES - 1;
+ nbytes &= ~(CLIB_CACHE_LINE_BYTES - 1);
+
+ rv = alloc_arena_next (h);
+ alloc_arena_next (h) += nbytes;
+
+ if (alloc_arena_next (h) > alloc_arena_size (h))
+ os_out_of_memory ();
+
+ if (alloc_arena_next (h) > alloc_arena_mapped (h))
+ {
+ void *base, *rv;
+ uword alloc = alloc_arena_next (h) - alloc_arena_mapped (h);
+ int mmap_flags = MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS;
+ int mmap_flags_huge = (mmap_flags | MAP_HUGETLB |
+ BIHASH_LOG2_HUGEPAGE_SIZE << MAP_HUGE_SHIFT);
+
+ /* new allocation is 25% of existing one */
+ if (alloc_arena_mapped (h) >> 2 > alloc)
+ alloc = alloc_arena_mapped (h) >> 2;
+
+ /* round allocation to page size */
+ alloc = round_pow2 (alloc, 1 << BIHASH_LOG2_HUGEPAGE_SIZE);
+
+ base = (void *) (uword) (alloc_arena (h) + alloc_arena_mapped (h));
+
+ rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags_huge, -1, 0);
+
+ /* fallback - maybe we are still able to allocate normal pages */
+ if (rv == MAP_FAILED)
+ rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+
+ if (rv == MAP_FAILED)
+ os_out_of_memory ();
+
+ alloc_arena_mapped (h) += alloc;
+ }
+
+ return (void *) (uword) (rv + alloc_arena (h));
+}
+
+static void BV (clib_bihash_instantiate) (BVT (clib_bihash) * h)
+{
+ uword bucket_size;
+
+ alloc_arena (h) = clib_mem_vm_reserve (0, h->memory_size,
+ BIHASH_LOG2_HUGEPAGE_SIZE);
+ if (alloc_arena (h) == ~0)
+ os_out_of_memory ();
+ alloc_arena_next (h) = 0;
+ alloc_arena_size (h) = h->memory_size;
+ alloc_arena_mapped (h) = 0;
+
+ bucket_size = h->nbuckets * sizeof (h->buckets[0]);
+
+ if (BIHASH_KVP_AT_BUCKET_LEVEL)
+ bucket_size +=
+ h->nbuckets * BIHASH_KVP_PER_PAGE * sizeof (BVT (clib_bihash_kv));
+
+ h->buckets = BV (alloc_aligned) (h, bucket_size);
+
+ if (BIHASH_KVP_AT_BUCKET_LEVEL)
+ {
+ int i;
+ BVT (clib_bihash_bucket) * b;
+
+ b = h->buckets;
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b->offset = BV (clib_bihash_get_offset) (h, (void *) (b + 1));
+ b->refcnt = 1;
+ /* Mark all elements free */
+ clib_memset ((b + 1), 0xff,
+ BIHASH_KVP_PER_PAGE * sizeof (BVT (clib_bihash_kv)));
+
+ /* Compute next bucket start address */
+ b = (void *) (((uword) b) + sizeof (*b) +
+ (BIHASH_KVP_PER_PAGE *
+ sizeof (BVT (clib_bihash_kv))));
+ }
+ }
+ CLIB_MEMORY_STORE_BARRIER ();
+ h->instantiated = 1;
+}
+
+void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a)
+{
+ int i;
+ void *oldheap;
+ BVT (clib_bihash) * h = a->h;
+
+ a->nbuckets = 1 << (max_log2 (a->nbuckets));
+
+ h->name = (u8 *) a->name;
+ h->nbuckets = a->nbuckets;
+ h->log2_nbuckets = max_log2 (a->nbuckets);
+ h->memory_size = a->memory_size;
+ h->instantiated = 0;
+ h->fmt_fn = a->fmt_fn;
+
+ alloc_arena (h) = 0;
+
+ /*
+ * Make sure the requested size is rational. The max table
+ * size without playing the alignment card is 64 Gbytes.
+ * If someone starts complaining that's not enough, we can shift
+ * the offset by CLIB_LOG2_CACHE_LINE_BYTES...
+ */
+ ASSERT (h->memory_size < (1ULL << BIHASH_BUCKET_OFFSET_BITS));
+
+ /* Add this hash table to the list */
+ if (a->dont_add_to_all_bihash_list == 0)
+ {
+ for (i = 0; i < vec_len (clib_all_bihashes); i++)
+ if (clib_all_bihashes[i] == h)
+ goto do_lock;
+ oldheap = clib_all_bihash_set_heap ();
+ vec_add1 (clib_all_bihashes, (void *) h);
+ clib_mem_set_heap (oldheap);
+ }
+
+do_lock:
+ if (h->alloc_lock)
+ clib_mem_free ((void *) h->alloc_lock);
+
+ /*
+ * Set up the lock now, so we can use it to make the first add
+ * thread-safe
+ */
+ h->alloc_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ h->alloc_lock[0] = 0;
+
+#if BIHASH_LAZY_INSTANTIATE
+ if (a->instantiate_immediately)
+#endif
+ BV (clib_bihash_instantiate) (h);
+}
+