lib/librte_eal/common/include/arch/x86/rte_atomic.h

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(c) 2010-2014 Intel Corporation
   3  */
   4
   5 #ifndef _RTE_ATOMIC_X86_H_
   6 #define _RTE_ATOMIC_X86_H_
   7
   8 #ifdef __cplusplus
   9 extern "C" {
  10 #endif
  11
  12 #include <stdint.h>
  13 #include <rte_common.h>
  14 #include <rte_config.h>
  15 #include <emmintrin.h>
  16 #include "generic/rte_atomic.h"
  17
  18 #if RTE_MAX_LCORE == 1
  19 #define MPLOCKED                        /**< No need to insert MP lock prefix. */
  20 #else
  21 #define MPLOCKED        "lock ; "       /**< Insert MP lock prefix. */
  22 #endif
  23
  24 #define rte_mb() _mm_mfence()
  25
  26 #define rte_wmb() _mm_sfence()
  27
  28 #define rte_rmb() _mm_lfence()
  29
  30 #define rte_smp_wmb() rte_compiler_barrier()
  31
  32 #define rte_smp_rmb() rte_compiler_barrier()
  33
  34 /*
  35  * From Intel Software Development Manual; Vol 3;
  36  * 8.2.2 Memory Ordering in P6 and More Recent Processor Families:
  37  * ...
  38  * . Reads are not reordered with other reads.
  39  * . Writes are not reordered with older reads.
  40  * . Writes to memory are not reordered with other writes,
  41  *   with the following exceptions:
  42  *   . streaming stores (writes) executed with the non-temporal move
  43  *     instructions (MOVNTI, MOVNTQ, MOVNTDQ, MOVNTPS, and MOVNTPD); and
  44  *   . string operations (see Section 8.2.4.1).
  45  *  ...
  46  * . Reads may be reordered with older writes to different locations but not
  47  * with older writes to the same location.
  48  * . Reads or writes cannot be reordered with I/O instructions,
  49  * locked instructions, or serializing instructions.
  50  * . Reads cannot pass earlier LFENCE and MFENCE instructions.
  51  * . Writes ... cannot pass earlier LFENCE, SFENCE, and MFENCE instructions.
  52  * . LFENCE instructions cannot pass earlier reads.
  53  * . SFENCE instructions cannot pass earlier writes ...
  54  * . MFENCE instructions cannot pass earlier reads, writes ...
  55  *
  56  * As pointed by Java guys, that makes possible to use lock-prefixed
  57  * instructions to get the same effect as mfence and on most modern HW
  58  * that gives a better perfomance then using mfence:
  59  * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
  60  * Basic idea is to use lock prefixed add with some dummy memory location
  61  * as the destination. From their experiments 128B(2 cache lines) below
  62  * current stack pointer looks like a good candidate.
  63  * So below we use that techinque for rte_smp_mb() implementation.
  64  */
  65
  66 static __rte_always_inline void
  67 rte_smp_mb(void)
  68 {
  69 #ifdef RTE_ARCH_I686
  70         asm volatile("lock addl $0, -128(%%esp); " ::: "memory");
  71 #else
  72         asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
  73 #endif
  74 }
  75
  76 #define rte_io_mb() rte_mb()
  77
  78 #define rte_io_wmb() rte_compiler_barrier()
  79
  80 #define rte_io_rmb() rte_compiler_barrier()
  81
  82 #define rte_cio_wmb() rte_compiler_barrier()
  83
  84 #define rte_cio_rmb() rte_compiler_barrier()
  85
  86 /*------------------------- 16 bit atomic operations -------------------------*/
  87
  88 #ifndef RTE_FORCE_INTRINSICS
  89 static inline int
  90 rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
  91 {
  92         uint8_t res;
  93
  94         asm volatile(
  95                         MPLOCKED
  96                         "cmpxchgw %[src], %[dst];"
  97                         "sete %[res];"
  98                         : [res] "=a" (res),     /* output */
  99                           [dst] "=m" (*dst)
 100                         : [src] "r" (src),      /* input */
 101                           "a" (exp),
 102                           "m" (*dst)
 103                         : "memory");            /* no-clobber list */
 104         return res;
 105 }
 106
 107 static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
 108 {
 109         return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
 110 }
 111
 112 static inline void
 113 rte_atomic16_inc(rte_atomic16_t *v)
 114 {
 115         asm volatile(
 116                         MPLOCKED
 117                         "incw %[cnt]"
 118                         : [cnt] "=m" (v->cnt)   /* output */
 119                         : "m" (v->cnt)          /* input */
 120                         );
 121 }
 122
 123 static inline void
 124 rte_atomic16_dec(rte_atomic16_t *v)
 125 {
 126         asm volatile(
 127                         MPLOCKED
 128                         "decw %[cnt]"
 129                         : [cnt] "=m" (v->cnt)   /* output */
 130                         : "m" (v->cnt)          /* input */
 131                         );
 132 }
 133
 134 static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
 135 {
 136         uint8_t ret;
 137
 138         asm volatile(
 139                         MPLOCKED
 140                         "incw %[cnt] ; "
 141                         "sete %[ret]"
 142                         : [cnt] "+m" (v->cnt),  /* output */
 143                           [ret] "=qm" (ret)
 144                         );
 145         return ret != 0;
 146 }
 147
 148 static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
 149 {
 150         uint8_t ret;
 151
 152         asm volatile(MPLOCKED
 153                         "decw %[cnt] ; "
 154                         "sete %[ret]"
 155                         : [cnt] "+m" (v->cnt),  /* output */
 156                           [ret] "=qm" (ret)
 157                         );
 158         return ret != 0;
 159 }
 160
 161 /*------------------------- 32 bit atomic operations -------------------------*/
 162
 163 static inline int
 164 rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
 165 {
 166         uint8_t res;
 167
 168         asm volatile(
 169                         MPLOCKED
 170                         "cmpxchgl %[src], %[dst];"
 171                         "sete %[res];"
 172                         : [res] "=a" (res),     /* output */
 173                           [dst] "=m" (*dst)
 174                         : [src] "r" (src),      /* input */
 175                           "a" (exp),
 176                           "m" (*dst)
 177                         : "memory");            /* no-clobber list */
 178         return res;
 179 }
 180
 181 static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
 182 {
 183         return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
 184 }
 185
 186 static inline void
 187 rte_atomic32_inc(rte_atomic32_t *v)
 188 {
 189         asm volatile(
 190                         MPLOCKED
 191                         "incl %[cnt]"
 192                         : [cnt] "=m" (v->cnt)   /* output */
 193                         : "m" (v->cnt)          /* input */
 194                         );
 195 }
 196
 197 static inline void
 198 rte_atomic32_dec(rte_atomic32_t *v)
 199 {
 200         asm volatile(
 201                         MPLOCKED
 202                         "decl %[cnt]"
 203                         : [cnt] "=m" (v->cnt)   /* output */
 204                         : "m" (v->cnt)          /* input */
 205                         );
 206 }
 207
 208 static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
 209 {
 210         uint8_t ret;
 211
 212         asm volatile(
 213                         MPLOCKED
 214                         "incl %[cnt] ; "
 215                         "sete %[ret]"
 216                         : [cnt] "+m" (v->cnt),  /* output */
 217                           [ret] "=qm" (ret)
 218                         );
 219         return ret != 0;
 220 }
 221
 222 static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 223 {
 224         uint8_t ret;
 225
 226         asm volatile(MPLOCKED
 227                         "decl %[cnt] ; "
 228                         "sete %[ret]"
 229                         : [cnt] "+m" (v->cnt),  /* output */
 230                           [ret] "=qm" (ret)
 231                         );
 232         return ret != 0;
 233 }
 234 #endif
 235
 236 #ifdef RTE_ARCH_I686
 237 #include "rte_atomic_32.h"
 238 #else
 239 #include "rte_atomic_64.h"
 240 #endif
 241
 242 #ifdef __cplusplus
 243 }
 244 #endif
 245
 246 #endif /* _RTE_ATOMIC_X86_H_ */