#include <vppinfra/heap.h>
#include <vppinfra/format.h>
#include <vppinfra/pool.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/lock.h>
#ifndef BIHASH_TYPE
#error BIHASH_TYPE not defined
#endif
+#ifdef BIHASH_32_64_SVM
+#undef HAVE_MEMFD_CREATE
+#include <vppinfra/linux/syscall.h>
+#include <fcntl.h>
+#define F_LINUX_SPECIFIC_BASE 1024
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_SEAL_SHRINK (2)
+/* Max page size 2**16 due to refcount width */
+#define BIHASH_FREELIST_LENGTH 17
+#endif
+
#define _bv(a,b) a##b
#define __bv(a,b) _bv(a,b)
#define BV(a) __bv(a,BIHASH_TYPE)
#define __bvt(a,b) _bvt(a,b)
#define BVT(a) __bvt(a,BIHASH_TYPE)
+#define _bvs(a,b) struct a##b
+#define __bvs(a,b) _bvs(a,b)
+#define BVS(a) __bvs(a,BIHASH_TYPE)
+
+#if _LP64 == 0
+#define OVERFLOW_ASSERT(x) ASSERT(((x) & 0xFFFFFFFF00000000ULL) == 0)
+#define u64_to_pointer(x) (void *)(u32)((x))
+#define pointer_to_u64(x) (u64)(u32)((x))
+#else
+#define OVERFLOW_ASSERT(x)
+#define u64_to_pointer(x) (void *)((x))
+#define pointer_to_u64(x) (u64)((x))
+#endif
+
typedef struct BV (clib_bihash_value)
{
union
{
BVT (clib_bihash_kv) kvp[BIHASH_KVP_PER_PAGE];
- struct BV (clib_bihash_value) * next_free;
+ u64 next_free_as_u64;
};
} BVT (clib_bihash_value);
-/*
- * This is shared across all uses of the template, so it needs
- * a "personal" #include recursion block
- */
-#ifndef __defined_clib_bihash_bucket_t__
-#define __defined_clib_bihash_bucket_t__
+#define BIHASH_BUCKET_OFFSET_BITS 36
+
typedef struct
{
union
{
struct
{
- u32 offset;
- u8 linear_search;
- u8 pad[2];
- u8 log2_pages;
+ u64 offset:BIHASH_BUCKET_OFFSET_BITS;
+ u64 lock:1;
+ u64 linear_search:1;
+ u64 log2_pages:8;
+ u64 refcnt:16;
};
u64 as_u64;
};
-} clib_bihash_bucket_t;
-#endif /* __defined_clib_bihash_bucket_t__ */
-
-typedef struct
+} BVT (clib_bihash_bucket);
+
+STATIC_ASSERT_SIZEOF (BVT (clib_bihash_bucket), sizeof (u64));
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ /*
+ * Backing store allocation. Since bihash manages its own
+ * freelists, we simple dole out memory starting from alloc_arena[alloc_arena_next].
+ */
+ u64 alloc_arena_next; /* Next offset from alloc_arena to allocate, definitely NOT a constant */
+ u64 alloc_arena_size; /* Size of the arena */
+ /* Two SVM pointers stored as 8-byte integers */
+ u64 alloc_lock_as_u64;
+ u64 buckets_as_u64;
+ /* freelist list-head arrays/vectors */
+ u64 freelists_as_u64;
+ u32 nbuckets; /* Number of buckets */
+ /* Set when header valid */
+ volatile u32 ready;
+ u64 pad[2];
+}) BVT (clib_bihash_shared_header);
+/* *INDENT-ON* */
+
+STATIC_ASSERT_SIZEOF (BVT (clib_bihash_shared_header), 8 * sizeof (u64));
+
+typedef
+BVS (clib_bihash)
{
- BVT (clib_bihash_value) * values;
- clib_bihash_bucket_t *buckets;
- volatile u32 *writer_lock;
+ BVT (clib_bihash_bucket) * buckets;
+ volatile u32 *alloc_lock;
- BVT (clib_bihash_value) ** working_copies;
- clib_bihash_bucket_t saved_bucket;
+ BVT (clib_bihash_value) ** working_copies;
+ int *working_copy_lengths;
+ BVT (clib_bihash_bucket) saved_bucket;
u32 nbuckets;
u32 log2_nbuckets;
- u32 linear_buckets;
+ u64 memory_size;
u8 *name;
- BVT (clib_bihash_value) ** freelists;
- void *mheap;
+ u64 *freelists;
+
+#if BIHASH_32_64_SVM
+ BVT (clib_bihash_shared_header) * sh;
+ int memfd;
+#else
+ BVT (clib_bihash_shared_header) sh;
+#endif
+
+ u64 alloc_arena; /* Base of the allocation arena */
+ volatile u8 instantiated;
+
+ /**
+ * A custom format function to print the Key and Value of bihash_key instead of default hexdump
+ */
+ format_function_t *fmt_fn;
+
+ /** Optional statistics-gathering callback */
+#if BIHASH_ENABLE_STATS
+ void (*inc_stats_callback) (BVS (clib_bihash) *, int stat_id, u64 count);
+
+ /** Statistics callback context (e.g. address of stats data structure) */
+ void *inc_stats_context;
+#endif
} BVT (clib_bihash);
+typedef struct
+{
+ BVT (clib_bihash) * h;
+ char *name;
+ u32 nbuckets;
+ uword memory_size;
+ format_function_t *fmt_fn;
+ u8 instantiate_immediately;
+ u8 dont_add_to_all_bihash_list;
+} BVT (clib_bihash_init2_args);
+
+extern void **clib_all_bihashes;
+
+#if BIHASH_32_64_SVM
+#undef alloc_arena_next
+#undef alloc_arena_size
+#undef alloc_arena
+#undef CLIB_BIHASH_READY_MAGIC
+#define alloc_arena_next(h) (((h)->sh)->alloc_arena_next)
+#define alloc_arena_size(h) (((h)->sh)->alloc_arena_size)
+#define alloc_arena(h) ((h)->alloc_arena)
+#define CLIB_BIHASH_READY_MAGIC 0xFEEDFACE
+#else
+#undef alloc_arena_next
+#undef alloc_arena_size
+#undef alloc_arena
+#undef CLIB_BIHASH_READY_MAGIC
+#define alloc_arena_next(h) ((h)->sh.alloc_arena_next)
+#define alloc_arena_size(h) ((h)->sh.alloc_arena_size)
+#define alloc_arena(h) ((h)->alloc_arena)
+#define CLIB_BIHASH_READY_MAGIC 0
+#endif
-static inline void *BV (clib_bihash_get_value) (const BVT (clib_bihash) * h,
+#ifndef BIHASH_STAT_IDS
+#define BIHASH_STAT_IDS 1
+
+#define foreach_bihash_stat \
+_(alloc_add) \
+_(add) \
+_(split_add) \
+_(replace) \
+_(update) \
+_(del) \
+_(del_free) \
+_(linear) \
+_(resplit) \
+_(working_copy_lost) \
+_(splits) /* must be last */
+
+typedef enum
+{
+#define _(a) BIHASH_STAT_##a,
+ foreach_bihash_stat
+#undef _
+ BIHASH_STAT_N_STATS,
+} BVT (clib_bihash_stat_id);
+#endif /* BIHASH_STAT_IDS */
+
+static inline void BV (clib_bihash_increment_stat) (BVT (clib_bihash) * h,
+ int stat_id, u64 count)
+{
+#if BIHASH_ENABLE_STATS
+ if (PREDICT_FALSE (h->inc_stats_callback != 0))
+ h->inc_stats_callback (h, stat_id, count);
+#endif
+}
+
+#if BIHASH_ENABLE_STATS
+static inline void BV (clib_bihash_set_stats_callback)
+ (BVT (clib_bihash) * h, void (*cb) (BVT (clib_bihash) *, int, u64),
+ void *ctx)
+{
+ h->inc_stats_callback = cb;
+ h->inc_stats_context = ctx;
+}
+#endif
+
+
+static inline void BV (clib_bihash_alloc_lock) (BVT (clib_bihash) * h)
+{
+ while (__atomic_test_and_set (h->alloc_lock, __ATOMIC_ACQUIRE))
+ CLIB_PAUSE ();
+}
+
+static inline void BV (clib_bihash_alloc_unlock) (BVT (clib_bihash) * h)
+{
+ __atomic_clear (h->alloc_lock, __ATOMIC_RELEASE);
+}
+
+static inline void BV (clib_bihash_lock_bucket) (BVT (clib_bihash_bucket) * b)
+{
+ BVT (clib_bihash_bucket) unlocked_bucket, locked_bucket;
+
+ do
+ {
+ locked_bucket.as_u64 = unlocked_bucket.as_u64 = b->as_u64;
+ unlocked_bucket.lock = 0;
+ locked_bucket.lock = 1;
+ CLIB_PAUSE ();
+ }
+ while (__atomic_compare_exchange_n (&b->as_u64, &unlocked_bucket.as_u64,
+ locked_bucket.as_u64, 1 /* weak */ ,
+ __ATOMIC_ACQUIRE,
+ __ATOMIC_ACQUIRE) == 0);
+}
+
+static inline void BV (clib_bihash_unlock_bucket)
+ (BVT (clib_bihash_bucket) * b)
+{
+ CLIB_MEMORY_BARRIER ();
+ b->lock = 0;
+}
+
+static inline void *BV (clib_bihash_get_value) (BVT (clib_bihash) * h,
uword offset)
{
- u8 *hp = h->mheap;
+ u8 *hp = (u8 *) (uword) alloc_arena (h);
u8 *vp = hp + offset;
return (void *) vp;
}
-static inline uword BV (clib_bihash_get_offset) (const BVT (clib_bihash) * h,
+static inline int BV (clib_bihash_bucket_is_empty)
+ (BVT (clib_bihash_bucket) * b)
+{
+ /* Note: applied to locked buckets, test offset */
+ return b->offset == 0;
+}
+
+static inline uword BV (clib_bihash_get_offset) (BVT (clib_bihash) * h,
void *v)
{
u8 *hp, *vp;
- hp = (u8 *) h->mheap;
+ hp = (u8 *) (uword) alloc_arena (h);
vp = (u8 *) v;
- ASSERT ((vp - hp) < 0x100000000ULL);
return vp - hp;
}
void BV (clib_bihash_init)
(BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size);
+void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a);
+
+#if BIHASH_32_64_SVM
+void BV (clib_bihash_master_init_svm)
+ (BVT (clib_bihash) * h, char *name, u32 nbuckets, u64 memory_size);
+void BV (clib_bihash_slave_init_svm)
+ (BVT (clib_bihash) * h, char *name, int fd);
+#endif
+
+void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h,
+ format_function_t * fmt_fn);
+
void BV (clib_bihash_free) (BVT (clib_bihash) * h);
int BV (clib_bihash_add_del) (BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * add_v, int is_add);
-int BV (clib_bihash_search) (const BVT (clib_bihash) * h,
+int BV (clib_bihash_add_or_overwrite_stale) (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * add_v,
+ int (*is_stale_cb) (BVT
+ (clib_bihash_kv)
+ *, void *),
+ void *arg);
+int BV (clib_bihash_search) (BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * search_v,
BVT (clib_bihash_kv) * return_v);
+#define BIHASH_WALK_STOP 0
+#define BIHASH_WALK_CONTINUE 1
+
+typedef
+ int (*BV (clib_bihash_foreach_key_value_pair_cb)) (BVT (clib_bihash_kv) *,
+ void *);
void BV (clib_bihash_foreach_key_value_pair) (BVT (clib_bihash) * h,
- void *callback, void *arg);
+ BV
+ (clib_bihash_foreach_key_value_pair_cb)
+ cb, void *arg);
+void *clib_all_bihash_set_heap (void);
+void clib_bihash_copied (void *dst, void *src);
format_function_t BV (format_bihash);
format_function_t BV (format_bihash_kvp);
+format_function_t BV (format_bihash_lru);
-
-static inline int BV (clib_bihash_search_inline)
- (const BVT (clib_bihash) * h, BVT (clib_bihash_kv) * kvp)
+static inline int BV (clib_bihash_search_inline_with_hash)
+ (BVT (clib_bihash) * h, u64 hash, BVT (clib_bihash_kv) * key_result)
{
- u64 hash;
u32 bucket_index;
BVT (clib_bihash_value) * v;
- clib_bihash_bucket_t *b;
+ BVT (clib_bihash_bucket) * b;
int i, limit;
- hash = BV (clib_bihash_hash) (kvp);
+ if (PREDICT_FALSE (alloc_arena (h) == 0))
+ return -1;
bucket_index = hash & (h->nbuckets - 1);
b = &h->buckets[bucket_index];
- if (b->offset == 0)
+ if (PREDICT_FALSE (BV (clib_bihash_bucket_is_empty) (b)))
return -1;
+ if (PREDICT_FALSE (b->lock))
+ {
+ volatile BVT (clib_bihash_bucket) * bv = b;
+ while (bv->lock)
+ CLIB_PAUSE ();
+ }
+
hash >>= h->log2_nbuckets;
v = BV (clib_bihash_get_value) (h, b->offset);
for (i = 0; i < limit; i++)
{
- if (BV (clib_bihash_key_compare) (v->kvp[i].key, kvp->key))
+ if (BV (clib_bihash_key_compare) (v->kvp[i].key, key_result->key))
{
- *kvp = v->kvp[i];
+ *key_result = v->kvp[i];
return 0;
}
}
return -1;
}
-static inline int BV (clib_bihash_search_inline_2)
- (const BVT (clib_bihash) * h,
- BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
+static inline int BV (clib_bihash_search_inline)
+ (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * key_result)
{
u64 hash;
+
+ hash = BV (clib_bihash_hash) (key_result);
+
+ return BV (clib_bihash_search_inline_with_hash) (h, hash, key_result);
+}
+
+static inline void BV (clib_bihash_prefetch_bucket)
+ (BVT (clib_bihash) * h, u64 hash)
+{
+ u32 bucket_index;
+ BVT (clib_bihash_bucket) * b;
+
+ bucket_index = hash & (h->nbuckets - 1);
+ b = &h->buckets[bucket_index];
+
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, READ);
+}
+
+static inline void BV (clib_bihash_prefetch_data)
+ (BVT (clib_bihash) * h, u64 hash)
+{
u32 bucket_index;
BVT (clib_bihash_value) * v;
- clib_bihash_bucket_t *b;
+ BVT (clib_bihash_bucket) * b;
+
+ if (PREDICT_FALSE (alloc_arena (h) == 0))
+ return;
+
+ bucket_index = hash & (h->nbuckets - 1);
+ b = &h->buckets[bucket_index];
+
+ if (PREDICT_FALSE (BV (clib_bihash_bucket_is_empty) (b)))
+ return;
+
+ hash >>= h->log2_nbuckets;
+ v = BV (clib_bihash_get_value) (h, b->offset);
+
+ v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
+
+ CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, READ);
+}
+
+static inline int BV (clib_bihash_search_inline_2_with_hash)
+ (BVT (clib_bihash) * h,
+ u64 hash, BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
+{
+ u32 bucket_index;
+ BVT (clib_bihash_value) * v;
+ BVT (clib_bihash_bucket) * b;
int i, limit;
ASSERT (valuep);
- hash = BV (clib_bihash_hash) (search_key);
+ if (PREDICT_FALSE (alloc_arena (h) == 0))
+ return -1;
bucket_index = hash & (h->nbuckets - 1);
b = &h->buckets[bucket_index];
- if (b->offset == 0)
+ if (PREDICT_FALSE (BV (clib_bihash_bucket_is_empty) (b)))
return -1;
+ if (PREDICT_FALSE (b->lock))
+ {
+ volatile BVT (clib_bihash_bucket) * bv = b;
+ while (bv->lock)
+ CLIB_PAUSE ();
+ }
+
hash >>= h->log2_nbuckets;
v = BV (clib_bihash_get_value) (h, b->offset);
return -1;
}
+static inline int BV (clib_bihash_search_inline_2)
+ (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
+{
+ u64 hash;
+
+ hash = BV (clib_bihash_hash) (search_key);
+
+ return BV (clib_bihash_search_inline_2_with_hash) (h, hash, search_key,
+ valuep);
+}
+
#endif /* __included_bihash_template_h__ */