Previous implementation of clib_rwlock_t used two spinlocks: one
writer lock, and one to guard the counter for the number of readers.
This implementation uses a single condition variable rw_cnt which
has the following properties:
if a writer has the rwlock, rw_cnt = -1
if the rwlock is free, rw_cnt = 0
otherwise, rw_cnt > 0 and rw_cnt = number of readers
rw_cnt will never be less than -1
Benchmarking:
The results below are the cycle counts from test_rwlock.c, configured so
that for 10000 iterations, 6 reader and 6 writer threads on separate cores
are spawned such that each writer thread increments a global counter
10000 times in each iteration. For Taishan, 4 reader and 4 writer
threads are spawned in each test.
x86 Xeon old rwlock: 12.473e8, 11.655e8, 13.201e8, 11.347e8, 13.182e8
x86 Xeon new rwlock: 5.881e8, 5.796e8, 6.536e8, 5.540e8, 5.890e8
Aarch64 ThX2* old rwlock: 9.263e7, 8.933e7, 9.074e7, 8.979e7, 9.378e7
Aarch64 ThX2* new rwlock: 7.221e7, 8.107e7, 7.515e7, 7.672e7, 7.386e7
A72 old rwlock: 3.268e6, 3.200e6, 3.086e6, 3.176e6, 3.170e6
A72 new rwlock: 1.261e6, 1.288e6, 1.251e6, 1.229e6, 1.234e6
*ThunderX2 used additional gcc options "-march=armv8.1-a+crc+crypto+lse"
Type: refactor
Change-Id: I7c347d3037b36205ab532cbcb52a374c846eb275
Signed-off-by: Jason Zhang <jason.zhang2@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Lijian Zhang <Lijian.Zhang@arm.com>
void
segment_manager_segment_reader_unlock (segment_manager_t * sm)
{
void
segment_manager_segment_reader_unlock (segment_manager_t * sm)
{
- ASSERT (sm->segments_rwlock->n_readers > 0);
clib_rwlock_reader_unlock (&sm->segments_rwlock);
}
clib_rwlock_reader_unlock (&sm->segments_rwlock);
}
typedef struct clib_rw_lock_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
typedef struct clib_rw_lock_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- volatile u32 n_readers;
- volatile u32 n_readers_lock;
- volatile u32 writer_lock;
+ /* -1 when W lock held, > 0 when R lock held */
+ volatile i32 rw_cnt;
#if CLIB_DEBUG > 0
pid_t pid;
uword thread_index;
#if CLIB_DEBUG > 0
pid_t pid;
uword thread_index;
always_inline void
clib_rwlock_reader_lock (clib_rwlock_t * p)
{
always_inline void
clib_rwlock_reader_lock (clib_rwlock_t * p)
{
- while (clib_atomic_test_and_set (&(*p)->n_readers_lock))
- CLIB_PAUSE ();
-
- (*p)->n_readers += 1;
- if ((*p)->n_readers == 1)
- while (clib_atomic_test_and_set (&(*p)->writer_lock))
+ /* rwlock held by a writer */
+ while ((cnt = clib_atomic_load_relax_n (&(*p)->rw_cnt)) < 0)
- clib_atomic_release (&(*p)->n_readers_lock);
+ while (!clib_atomic_cmp_and_swap_acq_relax_n
+ (&(*p)->rw_cnt, &cnt, cnt + 1, 1));
CLIB_LOCK_DBG (p);
}
always_inline void
clib_rwlock_reader_unlock (clib_rwlock_t * p)
{
CLIB_LOCK_DBG (p);
}
always_inline void
clib_rwlock_reader_unlock (clib_rwlock_t * p)
{
- ASSERT ((*p)->n_readers > 0);
+ ASSERT ((*p)->rw_cnt > 0);
-
- while (clib_atomic_test_and_set (&(*p)->n_readers_lock))
- CLIB_PAUSE ();
-
- (*p)->n_readers -= 1;
- if ((*p)->n_readers == 0)
- {
- clib_atomic_release (&(*p)->writer_lock);
- }
- clib_atomic_release (&(*p)->n_readers_lock);
+ clib_atomic_fetch_sub_rel (&(*p)->rw_cnt, 1);
}
always_inline void
clib_rwlock_writer_lock (clib_rwlock_t * p)
{
}
always_inline void
clib_rwlock_writer_lock (clib_rwlock_t * p)
{
- while (clib_atomic_test_and_set (&(*p)->writer_lock))
- CLIB_PAUSE ();
+ i32 cnt = 0;
+ do
+ {
+ /* rwlock held by writer or reader(s) */
+ while ((cnt = clib_atomic_load_relax_n (&(*p)->rw_cnt)) != 0)
+ CLIB_PAUSE ();
+ }
+ while (!clib_atomic_cmp_and_swap_acq_relax_n (&(*p)->rw_cnt, &cnt, -1, 1));
clib_rwlock_writer_unlock (clib_rwlock_t * p)
{
CLIB_LOCK_DBG_CLEAR (p);
clib_rwlock_writer_unlock (clib_rwlock_t * p)
{
CLIB_LOCK_DBG_CLEAR (p);
- clib_atomic_release (&(*p)->writer_lock);
+ clib_atomic_release (&(*p)->rw_cnt);