vppinfra: introduce CLIB_CACHE_PREFETCH_BYTES 50/33150/11
authorDamjan Marion <damarion@cisco.com>
Wed, 14 Jul 2021 16:18:08 +0000 (18:18 +0200)
committerFlorin Coras <florin.coras@gmail.com>
Tue, 27 Jul 2021 23:40:28 +0000 (23:40 +0000)
Type: improvement
Change-Id: Ic07010f11ef303f5213a33b0faf24aaedb62f110
Signed-off-by: Damjan Marion <damarion@cisco.com>
src/cmake/cpu.cmake
src/plugins/dpdk/device/init.c
src/vppinfra/cache.h

index f4a57a3..dbe0f17 100644 (file)
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+macro(set_log2_cacheline_size var n)
+  if(${n} EQUAL 128)
+    set(${var} 7)
+  elseif(${n} EQUAL 64)
+    set(${var} 6)
+  else()
+     message(FATAL_ERROR "Cacheline size ${n} not supported")
+  endif()
+endmacro()
+
 ##############################################################################
-# Cache line size detection
+# Cache line size
 ##############################################################################
-if(CMAKE_CROSSCOMPILING)
-  message(STATUS "Cross-compiling - cache line size detection disabled")
-  set(VPP_LOG2_CACHE_LINE_SIZE 6)
-elseif(DEFINED VPP_LOG2_CACHE_LINE_SIZE)
+if(DEFINED VPP_CACHE_LINE_SIZE)
   # Cache line size assigned via cmake args
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
-  file(READ "/proc/cpuinfo" cpuinfo)
-  string(REPLACE "\n" ";" cpuinfo ${cpuinfo})
-  foreach(l ${cpuinfo})
-    string(REPLACE ":" ";" l ${l})
-    list(GET l 0 name)
-    list(GET l 1 value)
-    string(STRIP ${name} name)
-    string(STRIP ${value} value)
-    if(${name} STREQUAL "CPU implementer")
-      set(CPU_IMPLEMENTER ${value})
-    endif()
-    if(${name} STREQUAL "CPU part")
-      set(CPU_PART ${value})
-    endif()
-  endforeach()
-  # Implementer 0x43 - Cavium
-  #  Part 0x0af - ThunderX2 is 64B, rest all are 128B
-  if (${CPU_IMPLEMENTER} STREQUAL "0x43")
-    if (${CPU_PART} STREQUAL "0x0af")
-      set(VPP_LOG2_CACHE_LINE_SIZE 6)
-    else()
-      set(VPP_LOG2_CACHE_LINE_SIZE 7)
-    endif()
-  else()
-      set(VPP_LOG2_CACHE_LINE_SIZE 6)
-  endif()
-  math(EXPR VPP_CACHE_LINE_SIZE "1 << ${VPP_LOG2_CACHE_LINE_SIZE}")
-  message(STATUS "ARM AArch64 CPU implementer ${CPU_IMPLEMENTER} part ${CPU_PART} cacheline size ${VPP_CACHE_LINE_SIZE}")
+  set(VPP_CACHE_LINE_SIZE 128)
 else()
-  set(VPP_LOG2_CACHE_LINE_SIZE 6)
+  set(VPP_CACHE_LINE_SIZE 64)
 endif()
 
-set(VPP_LOG2_CACHE_LINE_SIZE ${VPP_LOG2_CACHE_LINE_SIZE}
-    CACHE STRING "Target CPU cache line size (power of 2)")
+set(VPP_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE}
+    CACHE STRING "Target CPU cache line size")
+
+set_log2_cacheline_size(VPP_LOG2_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE})
 
 ##############################################################################
 # Gnu Assembler AVX-512 bug detection
@@ -77,7 +59,7 @@ endif()
 macro(add_vpp_march_variant v)
   cmake_parse_arguments(ARG
     "OFF"
-    "N_PREFETCHES"
+    "N_PREFETCHES;CACHE_PREFETCH_BYTES"
     "FLAGS"
     ${ARGN}
   )
@@ -98,6 +80,10 @@ macro(add_vpp_march_variant v)
     if(ARG_N_PREFETCHES)
       string(APPEND fs " -DCLIB_N_PREFETCHES=${ARG_N_PREFETCHES}")
     endif()
+    if(ARG_CACHE_PREFETCH_BYTES)
+      set_log2_cacheline_size(log2 ${ARG_CACHE_PREFETCH_BYTES})
+      string(APPEND fs " -DCLIB_LOG2_CACHE_PREFETCH_BYTES=${log2}")
+    endif()
     if(flags_ok)
       string(TOUPPER ${v} uv)
       if(ARG_OFF)
@@ -143,6 +129,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
   add_vpp_march_variant(qdf24xx
     FLAGS -march=armv8-a+crc+crypto -mtune=qdf24xx
     N_PREFETCHES 8
+    CACHE_PREFETCH_BYTES 64
     OFF
   )
 
@@ -154,16 +141,19 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
   add_vpp_march_variant(thunderx2t99
     FLAGS -march=armv8.1-a+crc+crypto -mtune=thunderx2t99
     N_PREFETCHES 8
+    CACHE_PREFETCH_BYTES 64
   )
 
   add_vpp_march_variant(cortexa72
     FLAGS -march=armv8-a+crc+crypto -mtune=cortex-a72
     N_PREFETCHES 6
+    CACHE_PREFETCH_BYTES 64
   )
 
   add_vpp_march_variant(neoversen1
     FLAGS -march=armv8.2-a+crc+crypto -mtune=neoverse-n1
     N_PREFETCHES 6
+    CACHE_PREFETCH_BYTES 64
   )
 endif()
 
index aebbb64..f923da6 100644 (file)
@@ -1921,8 +1921,6 @@ dpdk_init (vlib_main_t * vm)
                 "Data in cache line 0 is bigger than cache line size");
   STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0,
                 "Cache line marker must be 1st element in frame_queue_trace_t");
-  STATIC_ASSERT (RTE_CACHE_LINE_SIZE == 1 << CLIB_LOG2_CACHE_LINE_BYTES,
-                "DPDK RTE CACHE LINE SIZE does not match with 1<<CLIB_LOG2_CACHE_LINE_BYTES");
 
   dpdk_cli_reference ();
 
index 04f91e0..4229a06 100644 (file)
 
 #include <vppinfra/error_bootstrap.h>
 
-/*
- * Allow CFLAGS to override the configured / deduced cache line size
- */
-#ifndef CLIB_LOG2_CACHE_LINE_BYTES
-
 /* Default cache line size of 64 bytes. */
 #ifndef CLIB_LOG2_CACHE_LINE_BYTES
 #define CLIB_LOG2_CACHE_LINE_BYTES 6
 #endif
 
-#endif /* CLIB_LOG2_CACHE_LINE_BYTES defined */
-
-#if (CLIB_LOG2_CACHE_LINE_BYTES >= 9)
-#error Cache line size 512 bytes or greater
+/* How much data prefetch instruction prefetches */
+#ifndef CLIB_LOG2_CACHE_PREFETCH_BYTES
+#define CLIB_LOG2_CACHE_PREFETCH_BYTES CLIB_LOG2_CACHE_LINE_BYTES
 #endif
 
-#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES)
-#define CLIB_CACHE_LINE_ALIGN_MARK(mark) u8 mark[0] __attribute__((aligned(CLIB_CACHE_LINE_BYTES)))
-#define CLIB_CACHE_LINE_ROUND(x) ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1))
-
 /* Default cache line fill buffers. */
 #ifndef CLIB_N_PREFETCHES
 #define CLIB_N_PREFETCHES 16
 #endif
 
+#define CLIB_CACHE_LINE_BYTES    (1 << CLIB_LOG2_CACHE_LINE_BYTES)
+#define CLIB_CACHE_PREFETCH_BYTES (1 << CLIB_LOG2_CACHE_PREFETCH_BYTES)
+#define CLIB_CACHE_LINE_ALIGN_MARK(mark)                                      \
+  u8 mark[0] __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES)))
+#define CLIB_CACHE_LINE_ROUND(x)                                              \
+  ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1))
+
 /* Read/write arguments to __builtin_prefetch. */
 #define CLIB_PREFETCH_READ 0
 #define CLIB_PREFETCH_LOAD 0   /* alias for read */
 #define CLIB_PREFETCH_WRITE 1
 #define CLIB_PREFETCH_STORE 1  /* alias for write */
 
-#define _CLIB_PREFETCH(n,size,type)                            \
-  if ((size) > (n)*CLIB_CACHE_LINE_BYTES)                      \
-    __builtin_prefetch (_addr + (n)*CLIB_CACHE_LINE_BYTES,     \
-                       CLIB_PREFETCH_##type,                   \
-                       /* locality */ 3);
-
-#define CLIB_PREFETCH(addr,size,type)          \
-do {                                           \
-  void * _addr = (addr);                       \
-                                               \
-  ASSERT ((size) <= 4*CLIB_CACHE_LINE_BYTES);  \
-  _CLIB_PREFETCH (0, size, type);              \
-  _CLIB_PREFETCH (1, size, type);              \
-  _CLIB_PREFETCH (2, size, type);              \
-  _CLIB_PREFETCH (3, size, type);              \
-} while (0)
+#define _CLIB_PREFETCH(n, size, type)                                         \
+  if ((size) > (n) *CLIB_CACHE_PREFETCH_BYTES)                                \
+    __builtin_prefetch (_addr + (n) *CLIB_CACHE_PREFETCH_BYTES,               \
+                       CLIB_PREFETCH_##type, /* locality */ 3);
+
+#define CLIB_PREFETCH(addr, size, type)                                       \
+  do                                                                          \
+    {                                                                         \
+      void *_addr = (addr);                                                   \
+                                                                              \
+      ASSERT ((size) <= 4 * CLIB_CACHE_PREFETCH_BYTES);                       \
+      _CLIB_PREFETCH (0, size, type);                                         \
+      _CLIB_PREFETCH (1, size, type);                                         \
+      _CLIB_PREFETCH (2, size, type);                                         \
+      _CLIB_PREFETCH (3, size, type);                                         \
+    }                                                                         \
+  while (0)
 
 #undef _
 
 static_always_inline void
 clib_prefetch_load (void *p)
 {
-  CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+  __builtin_prefetch (p, /* rw */ 0, /* locality */ 3);
 }
 
 static_always_inline void
 clib_prefetch_store (void *p)
 {
-  CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, STORE);
+  __builtin_prefetch (p, /* rw */ 1, /* locality */ 3);
 }
 
 #endif /* included_clib_cache_h */