Use acquire/release ordering when accessing svm_fifo shared variable cursize 84/16184/6
authorSirshak Das <sirshak.das@arm.com>
Thu, 8 Nov 2018 00:46:42 +0000 (18:46 -0600)
committerOle Trøan <otroan@employees.org>
Wed, 28 Nov 2018 20:50:21 +0000 (20:50 +0000)
Improves TCP iperf3 performance by ~3% on AArch64.

Change-Id: I1e51bd8403ba45ec6af4c2f96b95e884c1ae0d67
Signed-off-by: Sirshak Das <sirshak.das@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
src/svm/svm_fifo.c
src/svm/svm_fifo.h
src/vppinfra/atomics.h

index fb942a6..4397ef8 100644 (file)
@@ -519,7 +519,7 @@ CLIB_MARCH_FN (svm_fifo_enqueue_nowait, int, svm_fifo_t * f, u32 max_bytes,
 
   /* Atomically increase the queue length */
   ASSERT (cursize + total_copy_bytes <= nitems);
-  clib_atomic_fetch_add (&f->cursize, total_copy_bytes);
+  clib_atomic_fetch_add_rel (&f->cursize, total_copy_bytes);
 
   return (total_copy_bytes);
 }
@@ -666,7 +666,7 @@ CLIB_MARCH_FN (svm_fifo_dequeue_nowait, int, svm_fifo_t * f, u32 max_bytes,
 
   ASSERT (f->head <= nitems);
   ASSERT (cursize >= total_copy_bytes);
-  clib_atomic_fetch_sub (&f->cursize, total_copy_bytes);
+  clib_atomic_fetch_sub_rel (&f->cursize, total_copy_bytes);
 
   return (total_copy_bytes);
 }
@@ -764,7 +764,7 @@ svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes)
 
   ASSERT (f->head <= nitems);
   ASSERT (cursize >= total_drop_bytes);
-  clib_atomic_fetch_sub (&f->cursize, total_drop_bytes);
+  clib_atomic_fetch_sub_rel (&f->cursize, total_drop_bytes);
 
   return total_drop_bytes;
 }
@@ -773,7 +773,7 @@ void
 svm_fifo_dequeue_drop_all (svm_fifo_t * f)
 {
   f->head = f->tail;
-  clib_atomic_fetch_sub (&f->cursize, f->cursize);
+  clib_atomic_fetch_sub_rel (&f->cursize, f->cursize);
 }
 
 int
@@ -820,7 +820,7 @@ svm_fifo_segments_free (svm_fifo_t * f, svm_fifo_segment_t * fs)
       f->head = (f->head + fs[0].len) % f->nitems;
       total_drop_bytes = fs[0].len;
     }
-  clib_atomic_fetch_sub (&f->cursize, total_drop_bytes);
+  clib_atomic_fetch_sub_rel (&f->cursize, total_drop_bytes);
 }
 
 u32
index e049d3e..791b513 100644 (file)
@@ -113,19 +113,19 @@ u8 *svm_fifo_replay (u8 * s, svm_fifo_t * f, u8 no_read, u8 verbose);
 static inline u32
 svm_fifo_max_dequeue (svm_fifo_t * f)
 {
-  return f->cursize;
+  return clib_atomic_load_acq_n (&f->cursize);
 }
 
 static inline int
 svm_fifo_is_full (svm_fifo_t * f)
 {
-  return (f->cursize == f->nitems);
+  return (clib_atomic_load_acq_n (&f->cursize) == f->nitems);
 }
 
 static inline int
 svm_fifo_is_empty (svm_fifo_t * f)
 {
-  return (f->cursize == 0);
+  return (clib_atomic_load_acq_n (&f->cursize) == 0);
 }
 
 static inline u32
index 420ae83..8084bdc 100644 (file)
 #define clib_atomic_test_and_set(a) __sync_lock_test_and_set(a, 1)
 #define clib_atomic_release(a) __sync_lock_release(a)
 
+#define clib_atomic_load_acq_n(a) __atomic_load_n((a), __ATOMIC_ACQUIRE)
 #define clib_atomic_store_rel_n(a, b) __atomic_store_n ((a), (b), __ATOMIC_RELEASE)
 #define clib_atomic_swap_acq_n(a, b) __atomic_exchange_n ((a), (b), __ATOMIC_ACQUIRE)
 
+#define clib_atomic_fetch_add_rel(a, b) __atomic_fetch_add((a), (b), __ATOMIC_RELEASE)
+#define clib_atomic_fetch_sub_rel(a, b) __atomic_fetch_sub((a), (b), __ATOMIC_RELEASE)
+
 #endif /* included_clib_atomics_h */