From 19515acddc343d1680d4a5d27f39456999498591 Mon Sep 17 00:00:00 2001 From: Sirshak Das Date: Wed, 7 Nov 2018 18:46:42 -0600 Subject: [PATCH] Use acquire/release ordering when accessing svm_fifo shared variable cursize Improves TCP iperf3 performance by ~3% on AArch64. Change-Id: I1e51bd8403ba45ec6af4c2f96b95e884c1ae0d67 Signed-off-by: Sirshak Das Reviewed-by: Honnappa Nagarahalli Reviewed-by: Ola Liljedahl --- src/svm/svm_fifo.c | 10 +++++----- src/svm/svm_fifo.h | 6 +++--- src/vppinfra/atomics.h | 4 ++++ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c index fb942a63d43..4397ef8413e 100644 --- a/src/svm/svm_fifo.c +++ b/src/svm/svm_fifo.c @@ -519,7 +519,7 @@ CLIB_MARCH_FN (svm_fifo_enqueue_nowait, int, svm_fifo_t * f, u32 max_bytes, /* Atomically increase the queue length */ ASSERT (cursize + total_copy_bytes <= nitems); - clib_atomic_fetch_add (&f->cursize, total_copy_bytes); + clib_atomic_fetch_add_rel (&f->cursize, total_copy_bytes); return (total_copy_bytes); } @@ -666,7 +666,7 @@ CLIB_MARCH_FN (svm_fifo_dequeue_nowait, int, svm_fifo_t * f, u32 max_bytes, ASSERT (f->head <= nitems); ASSERT (cursize >= total_copy_bytes); - clib_atomic_fetch_sub (&f->cursize, total_copy_bytes); + clib_atomic_fetch_sub_rel (&f->cursize, total_copy_bytes); return (total_copy_bytes); } @@ -764,7 +764,7 @@ svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes) ASSERT (f->head <= nitems); ASSERT (cursize >= total_drop_bytes); - clib_atomic_fetch_sub (&f->cursize, total_drop_bytes); + clib_atomic_fetch_sub_rel (&f->cursize, total_drop_bytes); return total_drop_bytes; } @@ -773,7 +773,7 @@ void svm_fifo_dequeue_drop_all (svm_fifo_t * f) { f->head = f->tail; - clib_atomic_fetch_sub (&f->cursize, f->cursize); + clib_atomic_fetch_sub_rel (&f->cursize, f->cursize); } int @@ -820,7 +820,7 @@ svm_fifo_segments_free (svm_fifo_t * f, svm_fifo_segment_t * fs) f->head = (f->head + fs[0].len) % f->nitems; total_drop_bytes = fs[0].len; } - clib_atomic_fetch_sub (&f->cursize, total_drop_bytes); + clib_atomic_fetch_sub_rel (&f->cursize, total_drop_bytes); } u32 diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h index e049d3e3147..791b513a4a6 100644 --- a/src/svm/svm_fifo.h +++ b/src/svm/svm_fifo.h @@ -113,19 +113,19 @@ u8 *svm_fifo_replay (u8 * s, svm_fifo_t * f, u8 no_read, u8 verbose); static inline u32 svm_fifo_max_dequeue (svm_fifo_t * f) { - return f->cursize; + return clib_atomic_load_acq_n (&f->cursize); } static inline int svm_fifo_is_full (svm_fifo_t * f) { - return (f->cursize == f->nitems); + return (clib_atomic_load_acq_n (&f->cursize) == f->nitems); } static inline int svm_fifo_is_empty (svm_fifo_t * f) { - return (f->cursize == 0); + return (clib_atomic_load_acq_n (&f->cursize) == 0); } static inline u32 diff --git a/src/vppinfra/atomics.h b/src/vppinfra/atomics.h index 420ae837fc0..8084bdc77d4 100644 --- a/src/vppinfra/atomics.h +++ b/src/vppinfra/atomics.h @@ -40,7 +40,11 @@ #define clib_atomic_test_and_set(a) __sync_lock_test_and_set(a, 1) #define clib_atomic_release(a) __sync_lock_release(a) +#define clib_atomic_load_acq_n(a) __atomic_load_n((a), __ATOMIC_ACQUIRE) #define clib_atomic_store_rel_n(a, b) __atomic_store_n ((a), (b), __ATOMIC_RELEASE) #define clib_atomic_swap_acq_n(a, b) __atomic_exchange_n ((a), (b), __ATOMIC_ACQUIRE) +#define clib_atomic_fetch_add_rel(a, b) __atomic_fetch_add((a), (b), __ATOMIC_RELEASE) +#define clib_atomic_fetch_sub_rel(a, b) __atomic_fetch_sub((a), (b), __ATOMIC_RELEASE) + #endif /* included_clib_atomics_h */ -- 2.16.6