+#ifndef MAP_HUGE_SHIFT
+#define MAP_HUGE_SHIFT 26
+#endif
+
+#ifndef BIIHASH_MIN_ALLOC_LOG2_PAGES
+#define BIIHASH_MIN_ALLOC_LOG2_PAGES 10
+#endif
+
+#ifndef BIHASH_USE_HEAP
+#define BIHASH_USE_HEAP 1
+#endif
+
+static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
+{
+ uword rv;
+
+ /* Round to an even number of cache lines */
+ nbytes = round_pow2 (nbytes, CLIB_CACHE_LINE_BYTES);
+
+ if (BIHASH_USE_HEAP)
+ {
+ void *rv, *oldheap;
+ uword page_sz = sizeof (BVT (clib_bihash_value));
+ uword chunk_sz = round_pow2 (page_sz << BIIHASH_MIN_ALLOC_LOG2_PAGES,
+ CLIB_CACHE_LINE_BYTES);
+
+ BVT (clib_bihash_alloc_chunk) * chunk = h->chunks;
+
+ /* if there is enough space in the currenrt chunk */
+ if (chunk && chunk->bytes_left >= nbytes)
+ {
+ rv = chunk->next_alloc;
+ chunk->bytes_left -= nbytes;
+ chunk->next_alloc += nbytes;
+ return rv;
+ }
+
+ /* requested allocation is bigger than chunk size */
+ if (nbytes >= chunk_sz)
+ {
+ oldheap = clib_mem_set_heap (h->heap);
+ chunk = clib_mem_alloc_aligned (nbytes + sizeof (*chunk),
+ CLIB_CACHE_LINE_BYTES);
+ clib_mem_set_heap (oldheap);
+ clib_memset_u8 (chunk, 0, sizeof (*chunk));
+ chunk->size = nbytes;
+ rv = (u8 *) (chunk + 1);
+ if (h->chunks)
+ {
+ /* take 2nd place in the list */
+ chunk->next = h->chunks->next;
+ chunk->prev = h->chunks;
+ h->chunks->next = chunk;
+ if (chunk->next)
+ chunk->next->prev = chunk;
+ }
+ else
+ h->chunks = chunk;
+
+ return rv;
+ }
+
+ oldheap = clib_mem_set_heap (h->heap);
+ chunk = clib_mem_alloc_aligned (chunk_sz + sizeof (*chunk),
+ CLIB_CACHE_LINE_BYTES);
+ clib_mem_set_heap (oldheap);
+ chunk->size = chunk_sz;
+ chunk->bytes_left = chunk_sz;
+ chunk->next_alloc = (u8 *) (chunk + 1);
+ chunk->next = h->chunks;
+ chunk->prev = 0;
+ if (chunk->next)
+ chunk->next->prev = chunk;
+ h->chunks = chunk;
+ rv = chunk->next_alloc;
+ chunk->bytes_left -= nbytes;
+ chunk->next_alloc += nbytes;
+ return rv;
+ }
+
+ rv = alloc_arena_next (h);
+ alloc_arena_next (h) += nbytes;
+
+ if (alloc_arena_next (h) > alloc_arena_size (h))
+ os_out_of_memory ();
+
+ if (alloc_arena_next (h) > alloc_arena_mapped (h))
+ {
+ void *base, *rv;
+ uword alloc = alloc_arena_next (h) - alloc_arena_mapped (h);
+ int mmap_flags = MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS;
+ int mmap_flags_huge = (mmap_flags | MAP_HUGETLB | MAP_LOCKED |
+ BIHASH_LOG2_HUGEPAGE_SIZE << MAP_HUGE_SHIFT);
+
+ /* new allocation is 25% of existing one */
+ if (alloc_arena_mapped (h) >> 2 > alloc)
+ alloc = alloc_arena_mapped (h) >> 2;
+
+ /* round allocation to page size */
+ alloc = round_pow2 (alloc, 1 << BIHASH_LOG2_HUGEPAGE_SIZE);
+
+ base = (void *) (uword) (alloc_arena (h) + alloc_arena_mapped (h));
+
+ rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags_huge, -1, 0);
+
+ /* fallback - maybe we are still able to allocate normal pages */
+ if (rv == MAP_FAILED || mlock (base, alloc) != 0)
+ rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+
+ if (rv == MAP_FAILED)
+ os_out_of_memory ();
+
+ alloc_arena_mapped (h) += alloc;
+ }
+
+ return (void *) (uword) (rv + alloc_arena (h));
+}
+
+static void BV (clib_bihash_instantiate) (BVT (clib_bihash) * h)
+{
+ uword bucket_size;
+
+ if (BIHASH_USE_HEAP)
+ {
+ h->heap = clib_mem_get_heap ();
+ h->chunks = 0;
+ alloc_arena (h) = (uword) clib_mem_get_heap_base (h->heap);
+ }
+ else
+ {
+ alloc_arena (h) = clib_mem_vm_reserve (0, h->memory_size,
+ BIHASH_LOG2_HUGEPAGE_SIZE);
+ if (alloc_arena (h) == ~0)
+ os_out_of_memory ();
+ alloc_arena_next (h) = 0;
+ alloc_arena_size (h) = h->memory_size;
+ alloc_arena_mapped (h) = 0;
+ }
+
+ bucket_size = h->nbuckets * sizeof (h->buckets[0]);
+
+ if (BIHASH_KVP_AT_BUCKET_LEVEL)
+ bucket_size +=
+ h->nbuckets * BIHASH_KVP_PER_PAGE * sizeof (BVT (clib_bihash_kv));
+
+ h->buckets = BV (alloc_aligned) (h, bucket_size);
+ clib_memset_u8 (h->buckets, 0, bucket_size);
+
+ if (BIHASH_KVP_AT_BUCKET_LEVEL)
+ {
+ int i, j;
+ BVT (clib_bihash_bucket) * b;
+
+ b = h->buckets;
+
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ BVT (clib_bihash_kv) * v;
+ b->offset = BV (clib_bihash_get_offset) (h, (void *) (b + 1));
+ b->refcnt = 1;
+ /* Mark all elements free */
+ v = (void *) (b + 1);
+ for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
+ {
+ BV (clib_bihash_mark_free) (v);
+ v++;
+ }
+ /* Compute next bucket start address */
+ b = (void *) (((uword) b) + sizeof (*b) +
+ (BIHASH_KVP_PER_PAGE *
+ sizeof (BVT (clib_bihash_kv))));
+ }
+ }
+ CLIB_MEMORY_STORE_BARRIER ();
+ h->instantiated = 1;
+}
+
+void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a)
+{
+ int i;
+ void *oldheap;
+ BVT (clib_bihash) * h = a->h;
+
+ a->nbuckets = 1 << (max_log2 (a->nbuckets));
+
+ h->name = (u8 *) a->name;
+ h->nbuckets = a->nbuckets;
+ h->log2_nbuckets = max_log2 (a->nbuckets);
+ h->memory_size = BIHASH_USE_HEAP ? 0 : a->memory_size;
+ h->instantiated = 0;
+ h->dont_add_to_all_bihash_list = a->dont_add_to_all_bihash_list;
+ h->fmt_fn = BV (format_bihash);
+ h->kvp_fmt_fn = a->kvp_fmt_fn;
+
+ alloc_arena (h) = 0;
+
+ /*
+ * Make sure the requested size is rational. The max table
+ * size without playing the alignment card is 64 Gbytes.
+ * If someone starts complaining that's not enough, we can shift
+ * the offset by CLIB_LOG2_CACHE_LINE_BYTES...
+ */
+ if (BIHASH_USE_HEAP)
+ ASSERT (h->memory_size < (1ULL << BIHASH_BUCKET_OFFSET_BITS));
+
+ /* Add this hash table to the list */
+ if (a->dont_add_to_all_bihash_list == 0)
+ {
+ for (i = 0; i < vec_len (clib_all_bihashes); i++)
+ if (clib_all_bihashes[i] == h)
+ goto do_lock;
+ oldheap = clib_all_bihash_set_heap ();
+ vec_add1 (clib_all_bihashes, (void *) h);
+ clib_mem_set_heap (oldheap);
+ }
+
+do_lock:
+ if (h->alloc_lock)
+ clib_mem_free ((void *) h->alloc_lock);
+
+ /*
+ * Set up the lock now, so we can use it to make the first add
+ * thread-safe
+ */
+ h->alloc_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ h->alloc_lock[0] = 0;
+
+#if BIHASH_LAZY_INSTANTIATE
+ if (a->instantiate_immediately)
+#endif
+ BV (clib_bihash_instantiate) (h);
+}
+