From e17834d1f1a5f11568dbff287d28ed2f11c7d423 Mon Sep 17 00:00:00 2001 From: Vladimir Zhigulin Date: Fri, 12 Sep 2025 16:42:34 +0200 Subject: [PATCH] vppinfra: fix bihash atomicity and garbage values - Use atomics for buckets with proper memory orders - Use generation per bucket - Atomically check that generation not changed after search in bihash. This covers most of corner cases It is not ideal, since generation field is limited to some bits. But other corner cases are fixed. Change-Id: I4c264ba73472a7ddaab544a2fe6c42e61df7b223 Type: fix Signed-off-by: Vladimir Zhigulin --- src/vppinfra/bihash_template.h | 98 ++++++++++++++++++++++++---------- src/vppinfra/bihash_template_inlines.h | 16 +++--- 2 files changed, 77 insertions(+), 37 deletions(-) diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h index 8f5879b4634..1dd43cd765a 100644 --- a/src/vppinfra/bihash_template.h +++ b/src/vppinfra/bihash_template.h @@ -91,7 +91,8 @@ typedef struct u64 lock:1; u64 linear_search:1; u64 log2_pages:8; - u64 refcnt:16; + u64 refcnt : 13; /* limit kvp per bucket */ + u64 generation : 5; /* incremented each update */ }; u64 as_u64; }; @@ -298,6 +299,7 @@ static inline void BV (clib_bihash_unlock_bucket) (BVT (clib_bihash_bucket) * b) { b->lock = 0; + clib_atomic_store_rel_n (&b->as_u64, b->as_u64); } static inline void *BV (clib_bihash_get_value) (BVT (clib_bihash) * h, @@ -403,12 +405,36 @@ BV (clib_bihash_get_bucket) (BVT (clib_bihash) * h, u64 hash) #endif } +static inline int +BV (clib_bihash_search_need_retry) (BVT (clib_bihash_bucket) * b, + BVT (clib_bihash_bucket) * savedb) +{ + BVT (clib_bihash_bucket) tmpb; + tmpb.as_u64 = clib_atomic_load_relax_n (&b->as_u64); + if (PREDICT_TRUE (tmpb.as_u64 == savedb->as_u64)) + return 0; + + savedb->as_u64 = tmpb.as_u64; + return 1; +} + +static inline void +BV (clib_bihash_wait_bucket_lock) (BVT (clib_bihash_bucket) * b, + BVT (clib_bihash_bucket) * savedb) +{ + while (PREDICT_FALSE (savedb->lock)) + { + CLIB_PAUSE (); + savedb->as_u64 = clib_atomic_load_acq_n (&b->as_u64); + } +} + static inline int BV (clib_bihash_search_inline_with_hash) (BVT (clib_bihash) * h, u64 hash, BVT (clib_bihash_kv) * key_result) { BVT (clib_bihash_kv) rv; BVT (clib_bihash_value) * v; - BVT (clib_bihash_bucket) * b; + BVT (clib_bihash_bucket) * b, localb; int i, limit; static const BVT (clib_bihash_bucket) mask = { @@ -422,28 +448,25 @@ static inline int BV (clib_bihash_search_inline_with_hash) #endif b = BV (clib_bihash_get_bucket) (h, hash); + localb.as_u64 = clib_atomic_load_acq_n (&b->as_u64); - if (PREDICT_FALSE (BV (clib_bihash_bucket_is_empty) (b))) - return -1; +bucket_changed_retry: + BV (clib_bihash_wait_bucket_lock) (b, &localb); - if (PREDICT_FALSE (b->lock)) - { - volatile BVT (clib_bihash_bucket) * bv = b; - while (bv->lock) - CLIB_PAUSE (); - } + if (PREDICT_FALSE (BV (clib_bihash_bucket_is_empty) (&localb))) + return -1; - v = BV (clib_bihash_get_value) (h, b->offset); + v = BV (clib_bihash_get_value) (h, localb.offset); /* If the bucket has unresolvable collisions, use linear search */ limit = BIHASH_KVP_PER_PAGE; - if (PREDICT_FALSE (b->as_u64 & mask.as_u64)) + if (PREDICT_FALSE (localb.as_u64 & mask.as_u64)) { - if (PREDICT_FALSE (b->linear_search)) - limit <<= b->log2_pages; + if (PREDICT_FALSE (localb.linear_search)) + limit <<= localb.log2_pages; else - v += extract_bits (hash, h->log2_nbuckets, b->log2_pages); + v += extract_bits (hash, h->log2_nbuckets, localb.log2_pages); } for (i = 0; i < limit; i++) @@ -453,10 +476,20 @@ static inline int BV (clib_bihash_search_inline_with_hash) rv = v->kvp[i]; if (BV (clib_bihash_is_free) (&rv)) return -1; + *key_result = rv; + + if (PREDICT_FALSE (BV (clib_bihash_search_need_retry) (b, &localb))) + goto bucket_changed_retry; + return 0; } } + + /* Could have been reading state in the middle of split operation */ + if (PREDICT_FALSE (BV (clib_bihash_search_need_retry) (b, &localb))) + goto bucket_changed_retry; + return -1; } @@ -509,7 +542,7 @@ static inline int BV (clib_bihash_search_inline_2_with_hash) { BVT (clib_bihash_kv) rv; BVT (clib_bihash_value) * v; - BVT (clib_bihash_bucket) * b; + BVT (clib_bihash_bucket) * b, localb; int i, limit; static const BVT (clib_bihash_bucket) mask = { @@ -525,28 +558,25 @@ static inline int BV (clib_bihash_search_inline_2_with_hash) #endif b = BV (clib_bihash_get_bucket) (h, hash); + localb.as_u64 = clib_atomic_load_acq_n (&b->as_u64); - if (PREDICT_FALSE (BV (clib_bihash_bucket_is_empty) (b))) - return -1; +bucket_changed_retry: + BV (clib_bihash_wait_bucket_lock) (b, &localb); - if (PREDICT_FALSE (b->lock)) - { - volatile BVT (clib_bihash_bucket) * bv = b; - while (bv->lock) - CLIB_PAUSE (); - } + if (PREDICT_FALSE (BV (clib_bihash_bucket_is_empty) (&localb))) + return -1; - v = BV (clib_bihash_get_value) (h, b->offset); + v = BV (clib_bihash_get_value) (h, localb.offset); /* If the bucket has unresolvable collisions, use linear search */ limit = BIHASH_KVP_PER_PAGE; - if (PREDICT_FALSE (b->as_u64 & mask.as_u64)) + if (PREDICT_FALSE (localb.as_u64 & mask.as_u64)) { - if (PREDICT_FALSE (b->linear_search)) - limit <<= b->log2_pages; + if (PREDICT_FALSE (localb.linear_search)) + limit <<= localb.log2_pages; else - v += extract_bits (hash, h->log2_nbuckets, b->log2_pages); + v += extract_bits (hash, h->log2_nbuckets, localb.log2_pages); } for (i = 0; i < limit; i++) @@ -556,10 +586,20 @@ static inline int BV (clib_bihash_search_inline_2_with_hash) rv = v->kvp[i]; if (BV (clib_bihash_is_free) (&rv)) return -1; + *valuep = rv; + + if (PREDICT_FALSE (BV (clib_bihash_search_need_retry) (b, &localb))) + goto bucket_changed_retry; + return 0; } } + + /* Could have been reading state in the middle of split operation */ + if (PREDICT_FALSE (BV (clib_bihash_search_need_retry) (b, &localb))) + goto bucket_changed_retry; + return -1; } diff --git a/src/vppinfra/bihash_template_inlines.h b/src/vppinfra/bihash_template_inlines.h index 046dd89f178..5411ee78556 100644 --- a/src/vppinfra/bihash_template_inlines.h +++ b/src/vppinfra/bihash_template_inlines.h @@ -307,8 +307,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b) clib_memcpy_fast (working_copy, v, sizeof (*v) * (1 << b->log2_pages)); working_bucket.as_u64 = b->as_u64; working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy); - CLIB_MEMORY_STORE_BARRIER (); - b->as_u64 = working_bucket.as_u64; + clib_atomic_store_rel_n (&b->as_u64, working_bucket.as_u64); h->working_copies[thread_index] = working_copy; } @@ -450,6 +449,7 @@ BV (clib_bihash_add_del_inline_with_hash) ( b = BV (clib_bihash_get_bucket) (h, hash); BV (clib_bihash_lock_bucket) (b); + /* other writers will not touch this bucket */ /* First elt in the bucket? */ if (BIHASH_KVP_AT_BUCKET_LEVEL == 0 && BV (clib_bihash_bucket_is_empty) (b)) @@ -464,13 +464,13 @@ BV (clib_bihash_add_del_inline_with_hash) ( v = BV (value_alloc) (h, 0); BV (clib_bihash_alloc_unlock) (h); - *v->kvp = *add_v; + v->kvp[0] = *add_v; tmp_b.as_u64 = 0; /* clears bucket lock */ tmp_b.offset = BV (clib_bihash_get_offset) (h, v); tmp_b.refcnt = 1; - CLIB_MEMORY_STORE_BARRIER (); - b->as_u64 = tmp_b.as_u64; /* unlocks the bucket */ + clib_atomic_store_rel_n (&b->as_u64, + tmp_b.as_u64); /* unlocks the bucket */ BV (clib_bihash_increment_stat) (h, BIHASH_STAT_alloc_add, 1); return (0); @@ -554,7 +554,7 @@ BV (clib_bihash_add_del_inline_with_hash) ( if (is_stale_cb (&(v->kvp[i]), is_stale_arg)) { clib_memcpy_fast (&(v->kvp[i]), add_v, sizeof (*add_v)); - CLIB_MEMORY_STORE_BARRIER (); + b->generation++; BV (clib_bihash_unlock_bucket) (b); BV (clib_bihash_increment_stat) (h, BIHASH_STAT_replace, 1); return (0); @@ -705,6 +705,7 @@ expand_ok: tmp_b.log2_pages = new_log2_pages; tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v); tmp_b.linear_search = mark_bucket_linear; + tmp_b.generation = h->saved_bucket.generation + 1; #if BIHASH_KVP_AT_BUCKET_LEVEL /* Compensate for permanent refcount bump at the bucket level */ if (new_log2_pages > 0) @@ -712,8 +713,7 @@ expand_ok: tmp_b.refcnt = h->saved_bucket.refcnt + 1; ASSERT (tmp_b.refcnt > 0); tmp_b.lock = 0; - CLIB_MEMORY_STORE_BARRIER (); - b->as_u64 = tmp_b.as_u64; + clib_atomic_store_rel_n (&b->as_u64, tmp_b.as_u64); #if BIHASH_KVP_AT_BUCKET_LEVEL if (h->saved_bucket.log2_pages > 0) -- 2.16.6