lib/librte_eal/common/include/arch/x86/rte_atomic.h

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #ifndef _RTE_ATOMIC_X86_H_
  35 #define _RTE_ATOMIC_X86_H_
  36
  37 #ifdef __cplusplus
  38 extern "C" {
  39 #endif
  40
  41 #include <stdint.h>
  42 #include <rte_common.h>
  43 #include <emmintrin.h>
  44 #include "generic/rte_atomic.h"
  45
  46 #if RTE_MAX_LCORE == 1
  47 #define MPLOCKED                        /**< No need to insert MP lock prefix. */
  48 #else
  49 #define MPLOCKED        "lock ; "       /**< Insert MP lock prefix. */
  50 #endif
  51
  52 #define rte_mb() _mm_mfence()
  53
  54 #define rte_wmb() _mm_sfence()
  55
  56 #define rte_rmb() _mm_lfence()
  57
  58 #define rte_smp_wmb() rte_compiler_barrier()
  59
  60 #define rte_smp_rmb() rte_compiler_barrier()
  61
  62 /*
  63  * From Intel Software Development Manual; Vol 3;
  64  * 8.2.2 Memory Ordering in P6 and More Recent Processor Families:
  65  * ...
  66  * . Reads are not reordered with other reads.
  67  * . Writes are not reordered with older reads.
  68  * . Writes to memory are not reordered with other writes,
  69  *   with the following exceptions:
  70  *   . streaming stores (writes) executed with the non-temporal move
  71  *     instructions (MOVNTI, MOVNTQ, MOVNTDQ, MOVNTPS, and MOVNTPD); and
  72  *   . string operations (see Section 8.2.4.1).
  73  *  ...
  74  * . Reads may be reordered with older writes to different locations but not
  75  * with older writes to the same location.
  76  * . Reads or writes cannot be reordered with I/O instructions,
  77  * locked instructions, or serializing instructions.
  78  * . Reads cannot pass earlier LFENCE and MFENCE instructions.
  79  * . Writes ... cannot pass earlier LFENCE, SFENCE, and MFENCE instructions.
  80  * . LFENCE instructions cannot pass earlier reads.
  81  * . SFENCE instructions cannot pass earlier writes ...
  82  * . MFENCE instructions cannot pass earlier reads, writes ...
  83  *
  84  * As pointed by Java guys, that makes possible to use lock-prefixed
  85  * instructions to get the same effect as mfence and on most modern HW
  86  * that gives a better perfomance then using mfence:
  87  * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
  88  * Basic idea is to use lock prefixed add with some dummy memory location
  89  * as the destination. From their experiments 128B(2 cache lines) below
  90  * current stack pointer looks like a good candidate.
  91  * So below we use that techinque for rte_smp_mb() implementation.
  92  */
  93
  94 static inline void __attribute__((always_inline))
  95 rte_smp_mb(void)
  96 {
  97 #ifdef RTE_ARCH_I686
  98         asm volatile("lock addl $0, -128(%%esp); " ::: "memory");
  99 #else
 100         asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
 101 #endif
 102 }
 103
 104 /*------------------------- 16 bit atomic operations -------------------------*/
 105
 106 #ifndef RTE_FORCE_INTRINSICS
 107 static inline int
 108 rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
 109 {
 110         uint8_t res;
 111
 112         asm volatile(
 113                         MPLOCKED
 114                         "cmpxchgw %[src], %[dst];"
 115                         "sete %[res];"
 116                         : [res] "=a" (res),     /* output */
 117                           [dst] "=m" (*dst)
 118                         : [src] "r" (src),      /* input */
 119                           "a" (exp),
 120                           "m" (*dst)
 121                         : "memory");            /* no-clobber list */
 122         return res;
 123 }
 124
 125 static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
 126 {
 127         return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
 128 }
 129
 130 static inline void
 131 rte_atomic16_inc(rte_atomic16_t *v)
 132 {
 133         asm volatile(
 134                         MPLOCKED
 135                         "incw %[cnt]"
 136                         : [cnt] "=m" (v->cnt)   /* output */
 137                         : "m" (v->cnt)          /* input */
 138                         );
 139 }
 140
 141 static inline void
 142 rte_atomic16_dec(rte_atomic16_t *v)
 143 {
 144         asm volatile(
 145                         MPLOCKED
 146                         "decw %[cnt]"
 147                         : [cnt] "=m" (v->cnt)   /* output */
 148                         : "m" (v->cnt)          /* input */
 149                         );
 150 }
 151
 152 static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
 153 {
 154         uint8_t ret;
 155
 156         asm volatile(
 157                         MPLOCKED
 158                         "incw %[cnt] ; "
 159                         "sete %[ret]"
 160                         : [cnt] "+m" (v->cnt),  /* output */
 161                           [ret] "=qm" (ret)
 162                         );
 163         return ret != 0;
 164 }
 165
 166 static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
 167 {
 168         uint8_t ret;
 169
 170         asm volatile(MPLOCKED
 171                         "decw %[cnt] ; "
 172                         "sete %[ret]"
 173                         : [cnt] "+m" (v->cnt),  /* output */
 174                           [ret] "=qm" (ret)
 175                         );
 176         return ret != 0;
 177 }
 178
 179 /*------------------------- 32 bit atomic operations -------------------------*/
 180
 181 static inline int
 182 rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
 183 {
 184         uint8_t res;
 185
 186         asm volatile(
 187                         MPLOCKED
 188                         "cmpxchgl %[src], %[dst];"
 189                         "sete %[res];"
 190                         : [res] "=a" (res),     /* output */
 191                           [dst] "=m" (*dst)
 192                         : [src] "r" (src),      /* input */
 193                           "a" (exp),
 194                           "m" (*dst)
 195                         : "memory");            /* no-clobber list */
 196         return res;
 197 }
 198
 199 static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
 200 {
 201         return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
 202 }
 203
 204 static inline void
 205 rte_atomic32_inc(rte_atomic32_t *v)
 206 {
 207         asm volatile(
 208                         MPLOCKED
 209                         "incl %[cnt]"
 210                         : [cnt] "=m" (v->cnt)   /* output */
 211                         : "m" (v->cnt)          /* input */
 212                         );
 213 }
 214
 215 static inline void
 216 rte_atomic32_dec(rte_atomic32_t *v)
 217 {
 218         asm volatile(
 219                         MPLOCKED
 220                         "decl %[cnt]"
 221                         : [cnt] "=m" (v->cnt)   /* output */
 222                         : "m" (v->cnt)          /* input */
 223                         );
 224 }
 225
 226 static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
 227 {
 228         uint8_t ret;
 229
 230         asm volatile(
 231                         MPLOCKED
 232                         "incl %[cnt] ; "
 233                         "sete %[ret]"
 234                         : [cnt] "+m" (v->cnt),  /* output */
 235                           [ret] "=qm" (ret)
 236                         );
 237         return ret != 0;
 238 }
 239
 240 static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 241 {
 242         uint8_t ret;
 243
 244         asm volatile(MPLOCKED
 245                         "decl %[cnt] ; "
 246                         "sete %[ret]"
 247                         : [cnt] "+m" (v->cnt),  /* output */
 248                           [ret] "=qm" (ret)
 249                         );
 250         return ret != 0;
 251 }
 252 #endif
 253
 254 #ifdef RTE_ARCH_I686
 255 #include "rte_atomic_32.h"
 256 #else
 257 #include "rte_atomic_64.h"
 258 #endif
 259
 260 #ifdef __cplusplus
 261 }
 262 #endif
 263
 264 #endif /* _RTE_ATOMIC_X86_H_ */