X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvppinfra%2Fbihash_template.c;h=53ffbf156e69a64c0ad54b8032a5219a7150ab16;hb=882fcfe6f019f341e654daafe5afae9e69b64c50;hp=7c817a205890aa2ed0099ef9bd85ac0699ad77c9;hpb=5e6b9580f202188cbe158368bdbe35c3f39973d7;p=vpp.git diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c index 7c817a20589..53ffbf156e6 100644 --- a/src/vppinfra/bihash_template.c +++ b/src/vppinfra/bihash_template.c @@ -15,30 +15,65 @@ /** @cond DOCUMENTATION_IS_IN_BIHASH_DOC_H */ +static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes) +{ + uword rv; + + /* Round to an even number of cache lines */ + nbytes += CLIB_CACHE_LINE_BYTES - 1; + nbytes &= ~(CLIB_CACHE_LINE_BYTES - 1); + + rv = h->alloc_arena_next; + h->alloc_arena_next += nbytes; + + if (rv >= (h->alloc_arena + h->alloc_arena_size)) + os_out_of_memory (); + + return (void *) rv; +} + + void BV (clib_bihash_init) (BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size) { - void *oldheap; + uword bucket_size; + int i; nbuckets = 1 << (max_log2 (nbuckets)); h->name = (u8 *) name; h->nbuckets = nbuckets; h->log2_nbuckets = max_log2 (nbuckets); + h->cache_hits = 0; + h->cache_misses = 0; - h->mheap = mheap_alloc (0 /* use VM */ , memory_size); + h->alloc_arena = (uword) clib_mem_vm_alloc (memory_size); + h->alloc_arena_next = h->alloc_arena; + h->alloc_arena_size = memory_size; - oldheap = clib_mem_set_heap (h->mheap); - vec_validate_aligned (h->buckets, nbuckets - 1, CLIB_CACHE_LINE_BYTES); - h->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); + bucket_size = nbuckets * sizeof (h->buckets[0]); + h->buckets = BV (alloc_aligned) (h, bucket_size); - clib_mem_set_heap (oldheap); + h->writer_lock = BV (alloc_aligned) (h, CLIB_CACHE_LINE_BYTES); + h->writer_lock[0] = 0; + + for (i = 0; i < nbuckets; i++) + BV (clib_bihash_reset_cache) (h->buckets + i); + + h->fmt_fn = NULL; +} + +void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h, + format_function_t * fmt_fn) +{ + h->fmt_fn = fmt_fn; } void BV (clib_bihash_free) (BVT (clib_bihash) * h) { - mheap_free (h->mheap); + vec_free (h->working_copies); + vec_free (h->freelists); + clib_mem_vm_free ((void *) (h->alloc_arena), h->alloc_arena_size); memset (h, 0, sizeof (*h)); } @@ -47,16 +82,12 @@ BVT (clib_bihash_value) * BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages) { BVT (clib_bihash_value) * rv = 0; - void *oldheap; ASSERT (h->writer_lock[0]); if (log2_pages >= vec_len (h->freelists) || h->freelists[log2_pages] == 0) { - oldheap = clib_mem_set_heap (h->mheap); - - vec_validate (h->freelists, log2_pages); - vec_validate_aligned (rv, (1 << log2_pages) - 1, CLIB_CACHE_LINE_BYTES); - clib_mem_set_heap (oldheap); + vec_validate_init_empty (h->freelists, log2_pages, 0); + rv = BV (alloc_aligned) (h, (sizeof (*rv) * (1 << log2_pages))); goto initialize; } rv = h->freelists[log2_pages]; @@ -64,7 +95,6 @@ BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages) initialize: ASSERT (rv); - ASSERT (vec_len (rv) == (1 << log2_pages)); /* * Latest gcc complains that the length arg is zero * if we replace (1<writer_lock[0]); - log2_pages = min_log2 (vec_len (v)); - ASSERT (vec_len (h->freelists) > log2_pages); v->next_free = h->freelists[log2_pages]; @@ -90,19 +117,18 @@ BV (value_free) (BVT (clib_bihash) * h, BVT (clib_bihash_value) * v) } static inline void -BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b) +BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b) { BVT (clib_bihash_value) * v; - clib_bihash_bucket_t working_bucket __attribute__ ((aligned (8))); - void *oldheap; + BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8))); BVT (clib_bihash_value) * working_copy; - u32 cpu_number = os_get_cpu_number (); + u32 thread_index = os_get_thread_index (); + int log2_working_copy_length; - if (cpu_number >= vec_len (h->working_copies)) + if (thread_index >= vec_len (h->working_copies)) { - oldheap = clib_mem_set_heap (h->mheap); - vec_validate (h->working_copies, cpu_number); - clib_mem_set_heap (oldheap); + vec_validate (h->working_copies, thread_index); + vec_validate_init_empty (h->working_copy_lengths, thread_index, ~0); } /* @@ -110,20 +136,27 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b) * updates from multiple threads will not result in sporadic, spurious * lookup failures. */ - working_copy = h->working_copies[cpu_number]; + working_copy = h->working_copies[thread_index]; + log2_working_copy_length = h->working_copy_lengths[thread_index]; h->saved_bucket.as_u64 = b->as_u64; - oldheap = clib_mem_set_heap (h->mheap); - if ((1 << b->log2_pages) > vec_len (working_copy)) + if (b->log2_pages > log2_working_copy_length) { - vec_validate_aligned (working_copy, (1 << b->log2_pages) - 1, - sizeof (u64)); - h->working_copies[cpu_number] = working_copy; + /* + * It's not worth the bookkeeping to free working copies + * if (working_copy) + * clib_mem_free (working_copy); + */ + working_copy = BV (alloc_aligned) + (h, sizeof (working_copy[0]) * (1 << b->log2_pages)); + h->working_copy_lengths[thread_index] = b->log2_pages; + h->working_copies[thread_index] = working_copy; } - _vec_len (working_copy) = 1 << b->log2_pages; - clib_mem_set_heap (oldheap); + /* Lock the bucket... */ + while (BV (clib_bihash_lock_bucket) (b) == 0) + ; v = BV (clib_bihash_get_value) (h, b->offset); @@ -132,22 +165,23 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b) working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy); CLIB_MEMORY_BARRIER (); b->as_u64 = working_bucket.as_u64; - h->working_copies[cpu_number] = working_copy; + h->working_copies[thread_index] = working_copy; } static BVT (clib_bihash_value) * BV (split_and_rehash) (BVT (clib_bihash) * h, - BVT (clib_bihash_value) * old_values, u32 new_log2_pages) + BVT (clib_bihash_value) * old_values, u32 old_log2_pages, + u32 new_log2_pages) { BVT (clib_bihash_value) * new_values, *new_v; - int i, j, length; + int i, j, length_in_kvs; new_values = BV (value_alloc) (h, new_log2_pages); - length = vec_len (old_values) * BIHASH_KVP_PER_PAGE; + length_in_kvs = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE; - for (i = 0; i < length; i++) + for (i = 0; i < length_in_kvs; i++) { u64 new_hash; @@ -173,10 +207,11 @@ BV (split_and_rehash) } } /* Crap. Tell caller to try again */ - BV (value_free) (h, new_values); + BV (value_free) (h, new_values, new_log2_pages); return 0; doublebreak:; } + return new_values; } @@ -184,22 +219,24 @@ static BVT (clib_bihash_value) * BV (split_and_rehash_linear) (BVT (clib_bihash) * h, - BVT (clib_bihash_value) * old_values, u32 new_log2_pages) + BVT (clib_bihash_value) * old_values, u32 old_log2_pages, + u32 new_log2_pages) { BVT (clib_bihash_value) * new_values; - int i, j, new_length; + int i, j, new_length, old_length; new_values = BV (value_alloc) (h, new_log2_pages); new_length = (1 << new_log2_pages) * BIHASH_KVP_PER_PAGE; + old_length = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE; j = 0; /* Across the old value array */ - for (i = 0; i < vec_len (old_values) * BIHASH_KVP_PER_PAGE; i++) + for (i = 0; i < old_length; i++) { /* Find a free slot in the new linear scan bucket */ for (; j < new_length; j++) { - /* Old value in use? Forget it. */ + /* Old value not in use? Forget it. */ if (BV (clib_bihash_is_free) (&(old_values->kvp[i]))) goto doublebreak; @@ -212,11 +249,12 @@ BV (split_and_rehash_linear) j++; goto doublebreak; } - /* This should never happen... */ - clib_warning ("BUG: linear rehash failed!"); - BV (value_free) (h, new_values); - return 0; } + /* This should never happen... */ + clib_warning ("BUG: linear rehash failed!"); + BV (value_free) (h, new_values, new_log2_pages); + return 0; + doublebreak:; } return new_values; @@ -226,13 +264,13 @@ int BV (clib_bihash_add_del) (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add) { u32 bucket_index; - clib_bihash_bucket_t *b, tmp_b; + BVT (clib_bihash_bucket) * b, tmp_b; BVT (clib_bihash_value) * v, *new_v, *save_new_v, *working_copy; int rv = 0; int i, limit; u64 hash, new_hash; - u32 new_log2_pages; - u32 cpu_number = os_get_cpu_number (); + u32 new_log2_pages, old_log2_pages; + u32 thread_index = os_get_thread_index (); int mark_bucket_linear; int resplit_once; @@ -243,11 +281,13 @@ int BV (clib_bihash_add_del) hash >>= h->log2_nbuckets; + tmp_b.linear_search = 0; + while (__sync_lock_test_and_set (h->writer_lock, 1)) ; /* First elt in the bucket? */ - if (b->offset == 0) + if (BV (clib_bihash_bucket_is_empty) (b)) { if (is_add == 0) { @@ -256,14 +296,17 @@ int BV (clib_bihash_add_del) } v = BV (value_alloc) (h, 0); + *v->kvp = *add_v; tmp_b.as_u64 = 0; tmp_b.offset = BV (clib_bihash_get_offset) (h, v); + tmp_b.refcnt = 1; b->as_u64 = tmp_b.as_u64; goto unlock; } + /* Note: this leaves the cache disabled */ BV (make_working_copy) (h, b); v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset); @@ -297,6 +340,7 @@ int BV (clib_bihash_add_del) clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v)); CLIB_MEMORY_BARRIER (); b->as_u64 = h->saved_bucket.as_u64; + b->refcnt++; goto unlock; } } @@ -310,8 +354,17 @@ int BV (clib_bihash_add_del) { memset (&(v->kvp[i]), 0xff, sizeof (*(add_v))); CLIB_MEMORY_BARRIER (); - b->as_u64 = h->saved_bucket.as_u64; - goto unlock; + if (PREDICT_TRUE (h->saved_bucket.refcnt > 1)) + { + h->saved_bucket.refcnt -= 1; + b->as_u64 = h->saved_bucket.as_u64; + goto unlock; + } + else + { + tmp_b.as_u64 = 0; + goto free_old_bucket; + } } } rv = -3; @@ -319,27 +372,31 @@ int BV (clib_bihash_add_del) goto unlock; } - new_log2_pages = h->saved_bucket.log2_pages + 1; + old_log2_pages = h->saved_bucket.log2_pages; + new_log2_pages = old_log2_pages + 1; mark_bucket_linear = 0; - working_copy = h->working_copies[cpu_number]; + working_copy = h->working_copies[thread_index]; resplit_once = 0; - new_v = BV (split_and_rehash) (h, working_copy, new_log2_pages); + new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages, + new_log2_pages); if (new_v == 0) { try_resplit: resplit_once = 1; new_log2_pages++; /* Try re-splitting. If that fails, fall back to linear search */ - new_v = BV (split_and_rehash) (h, working_copy, new_log2_pages); + new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages, + new_log2_pages); if (new_v == 0) { mark_linear: new_log2_pages--; /* pinned collisions, use linear search */ new_v = - BV (split_and_rehash_linear) (h, working_copy, new_log2_pages); + BV (split_and_rehash_linear) (h, working_copy, old_log2_pages, + new_log2_pages); mark_bucket_linear = 1; } } @@ -362,8 +419,9 @@ int BV (clib_bihash_add_del) goto expand_ok; } } + /* Crap. Try again */ - BV (value_free) (h, save_new_v); + BV (value_free) (h, save_new_v, new_log2_pages); /* * If we've already doubled the size of the bucket once, * fall back to linear search now. @@ -374,32 +432,38 @@ int BV (clib_bihash_add_del) goto try_resplit; expand_ok: - /* Keep track of the number of linear-scan buckets */ - if (tmp_b.linear_search ^ mark_bucket_linear) - h->linear_buckets += (mark_bucket_linear == 1) ? 1 : -1; - tmp_b.log2_pages = new_log2_pages; tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v); tmp_b.linear_search = mark_bucket_linear; + tmp_b.refcnt = h->saved_bucket.refcnt + 1; + +free_old_bucket: + CLIB_MEMORY_BARRIER (); b->as_u64 = tmp_b.as_u64; v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset); - BV (value_free) (h, v); + + BV (value_free) (h, v, h->saved_bucket.log2_pages); unlock: + BV (clib_bihash_reset_cache) (b); + BV (clib_bihash_unlock_bucket) (b); CLIB_MEMORY_BARRIER (); h->writer_lock[0] = 0; return rv; } int BV (clib_bihash_search) - (const BVT (clib_bihash) * h, + (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep) { u64 hash; u32 bucket_index; BVT (clib_bihash_value) * v; - clib_bihash_bucket_t *b; +#if BIHASH_KVP_CACHE_SIZE > 0 + BVT (clib_bihash_kv) * kvp; +#endif + BVT (clib_bihash_bucket) * b; int i, limit; ASSERT (valuep); @@ -409,9 +473,27 @@ int BV (clib_bihash_search) bucket_index = hash & (h->nbuckets - 1); b = &h->buckets[bucket_index]; - if (b->offset == 0) + if (BV (clib_bihash_bucket_is_empty) (b)) return -1; +#if BIHASH_KVP_CACHE_SIZE > 0 + /* Check the cache, if currently enabled */ + if (PREDICT_TRUE ((b->cache_lru & (1 << 15)) == 0)) + { + limit = BIHASH_KVP_CACHE_SIZE; + kvp = b->cache; + for (i = 0; i < limit; i++) + { + if (BV (clib_bihash_key_compare) (kvp[i].key, search_key->key)) + { + *valuep = kvp[i]; + h->cache_hits++; + return 0; + } + } + } +#endif + hash >>= h->log2_nbuckets; v = BV (clib_bihash_get_value) (h, b->offset); @@ -425,33 +507,82 @@ int BV (clib_bihash_search) if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key)) { *valuep = v->kvp[i]; + +#if BIHASH_KVP_CACHE_SIZE > 0 + u8 cache_slot; + /* Shut off the cache */ + if (BV (clib_bihash_lock_bucket) (b)) + { + cache_slot = BV (clib_bihash_get_lru) (b); + b->cache[cache_slot] = v->kvp[i]; + BV (clib_bihash_update_lru) (b, cache_slot); + + /* Reenable the cache */ + BV (clib_bihash_unlock_bucket) (b); + h->cache_misses++; + } +#endif return 0; } } return -1; } +u8 *BV (format_bihash_lru) (u8 * s, va_list * args) +{ +#if BIHASH_KVP_SIZE > 0 + int i; + BVT (clib_bihash_bucket) * b = va_arg (*args, BVT (clib_bihash_bucket) *); + u16 cache_lru = b->cache_lru; + + s = format (s, "cache %s, order ", cache_lru & (1 << 15) ? "on" : "off"); + + for (i = 0; i < BIHASH_KVP_CACHE_SIZE; i++) + s = format (s, "[%d] ", ((cache_lru >> (3 * i)) & 7)); + + return (s); +#else + return format (s, "cache not configured"); +#endif +} + +void +BV (clib_bihash_update_lru_not_inline) (BVT (clib_bihash_bucket) * b, u8 slot) +{ +#if BIHASH_KVP_SIZE > 0 + BV (clib_bihash_update_lru) (b, slot); +#endif +} + u8 *BV (format_bihash) (u8 * s, va_list * args) { BVT (clib_bihash) * h = va_arg (*args, BVT (clib_bihash) *); int verbose = va_arg (*args, int); - clib_bihash_bucket_t *b; + BVT (clib_bihash_bucket) * b; BVT (clib_bihash_value) * v; int i, j, k; u64 active_elements = 0; + u64 active_buckets = 0; + u64 linear_buckets = 0; + u64 used_bytes; s = format (s, "Hash table %s\n", h->name ? h->name : (u8 *) "(unnamed)"); for (i = 0; i < h->nbuckets; i++) { b = &h->buckets[i]; - if (b->offset == 0) + if (BV (clib_bihash_bucket_is_empty) (b)) { if (verbose > 1) s = format (s, "[%d]: empty\n", i); continue; } + active_buckets++; + + if (b->linear_search) + linear_buckets++; + if (verbose) { s = format (s, "[%d]: heap offset %d, len %d, linear %d\n", i, @@ -472,9 +603,18 @@ u8 *BV (format_bihash) (u8 * s, va_list * args) } if (verbose) { - s = format (s, " %d: %U\n", - j * BIHASH_KVP_PER_PAGE + k, - BV (format_bihash_kvp), &(v->kvp[k])); + if (h->fmt_fn) + { + s = format (s, " %d: %U\n", + j * BIHASH_KVP_PER_PAGE + k, + h->fmt_fn, &(v->kvp[k])); + } + else + { + s = format (s, " %d: %U\n", + j * BIHASH_KVP_PER_PAGE + k, + BV (format_bihash_kvp), &(v->kvp[k])); + } } active_elements++; } @@ -482,10 +622,35 @@ u8 *BV (format_bihash) (u8 * s, va_list * args) } } - s = format (s, " %lld active elements\n", active_elements); + s = format (s, " %lld active elements %lld active buckets\n", + active_elements, active_buckets); s = format (s, " %d free lists\n", vec_len (h->freelists)); - s = format (s, " %d linear search buckets\n", h->linear_buckets); + for (i = 0; i < vec_len (h->freelists); i++) + { + u32 nfree = 0; + BVT (clib_bihash_value) * free_elt; + + free_elt = h->freelists[i]; + while (free_elt) + { + nfree++; + free_elt = free_elt->next_free; + } + + s = format (s, " [len %d] %u free elts\n", 1 << i, nfree); + } + + s = format (s, " %lld linear search buckets\n", linear_buckets); + s = format (s, " %lld cache hits, %lld cache misses\n", + h->cache_hits, h->cache_misses); + used_bytes = h->alloc_arena_next - h->alloc_arena; + s = format (s, + " arena: base %llx, next %llx\n" + " used %lld b (%lld Mbytes) of %lld b (%lld Mbytes)\n", + h->alloc_arena, h->alloc_arena_next, + used_bytes, used_bytes >> 20, + h->alloc_arena_size, h->alloc_arena_size >> 20); return s; } @@ -493,14 +658,14 @@ void BV (clib_bihash_foreach_key_value_pair) (BVT (clib_bihash) * h, void *callback, void *arg) { int i, j, k; - clib_bihash_bucket_t *b; + BVT (clib_bihash_bucket) * b; BVT (clib_bihash_value) * v; void (*fp) (BVT (clib_bihash_kv) *, void *) = callback; for (i = 0; i < h->nbuckets; i++) { b = &h->buckets[i]; - if (b->offset == 0) + if (BV (clib_bihash_bucket_is_empty) (b)) continue; v = BV (clib_bihash_get_value) (h, b->offset);