From 24d65a1c5aa18c107ae17115dfb2ea1e5cc05527 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 14 Jul 2021 18:18:08 +0200 Subject: [PATCH] vppinfra: introduce CLIB_CACHE_PREFETCH_BYTES Type: improvement Change-Id: Ic07010f11ef303f5213a33b0faf24aaedb62f110 Signed-off-by: Damjan Marion --- src/cmake/cpu.cmake | 64 ++++++++++++++++++------------------------ src/plugins/dpdk/device/init.c | 2 -- src/vppinfra/cache.h | 60 +++++++++++++++++++-------------------- 3 files changed, 56 insertions(+), 70 deletions(-) diff --git a/src/cmake/cpu.cmake b/src/cmake/cpu.cmake index f4a57a34281..dbe0f17b94f 100644 --- a/src/cmake/cpu.cmake +++ b/src/cmake/cpu.cmake @@ -11,49 +11,31 @@ # See the License for the specific language governing permissions and # limitations under the License. +macro(set_log2_cacheline_size var n) + if(${n} EQUAL 128) + set(${var} 7) + elseif(${n} EQUAL 64) + set(${var} 6) + else() + message(FATAL_ERROR "Cacheline size ${n} not supported") + endif() +endmacro() + ############################################################################## -# Cache line size detection +# Cache line size ############################################################################## -if(CMAKE_CROSSCOMPILING) - message(STATUS "Cross-compiling - cache line size detection disabled") - set(VPP_LOG2_CACHE_LINE_SIZE 6) -elseif(DEFINED VPP_LOG2_CACHE_LINE_SIZE) +if(DEFINED VPP_CACHE_LINE_SIZE) # Cache line size assigned via cmake args elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") - file(READ "/proc/cpuinfo" cpuinfo) - string(REPLACE "\n" ";" cpuinfo ${cpuinfo}) - foreach(l ${cpuinfo}) - string(REPLACE ":" ";" l ${l}) - list(GET l 0 name) - list(GET l 1 value) - string(STRIP ${name} name) - string(STRIP ${value} value) - if(${name} STREQUAL "CPU implementer") - set(CPU_IMPLEMENTER ${value}) - endif() - if(${name} STREQUAL "CPU part") - set(CPU_PART ${value}) - endif() - endforeach() - # Implementer 0x43 - Cavium - # Part 0x0af - ThunderX2 is 64B, rest all are 128B - if (${CPU_IMPLEMENTER} STREQUAL "0x43") - if (${CPU_PART} STREQUAL "0x0af") - set(VPP_LOG2_CACHE_LINE_SIZE 6) - else() - set(VPP_LOG2_CACHE_LINE_SIZE 7) - endif() - else() - set(VPP_LOG2_CACHE_LINE_SIZE 6) - endif() - math(EXPR VPP_CACHE_LINE_SIZE "1 << ${VPP_LOG2_CACHE_LINE_SIZE}") - message(STATUS "ARM AArch64 CPU implementer ${CPU_IMPLEMENTER} part ${CPU_PART} cacheline size ${VPP_CACHE_LINE_SIZE}") + set(VPP_CACHE_LINE_SIZE 128) else() - set(VPP_LOG2_CACHE_LINE_SIZE 6) + set(VPP_CACHE_LINE_SIZE 64) endif() -set(VPP_LOG2_CACHE_LINE_SIZE ${VPP_LOG2_CACHE_LINE_SIZE} - CACHE STRING "Target CPU cache line size (power of 2)") +set(VPP_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE} + CACHE STRING "Target CPU cache line size") + +set_log2_cacheline_size(VPP_LOG2_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE}) ############################################################################## # Gnu Assembler AVX-512 bug detection @@ -77,7 +59,7 @@ endif() macro(add_vpp_march_variant v) cmake_parse_arguments(ARG "OFF" - "N_PREFETCHES" + "N_PREFETCHES;CACHE_PREFETCH_BYTES" "FLAGS" ${ARGN} ) @@ -98,6 +80,10 @@ macro(add_vpp_march_variant v) if(ARG_N_PREFETCHES) string(APPEND fs " -DCLIB_N_PREFETCHES=${ARG_N_PREFETCHES}") endif() + if(ARG_CACHE_PREFETCH_BYTES) + set_log2_cacheline_size(log2 ${ARG_CACHE_PREFETCH_BYTES}) + string(APPEND fs " -DCLIB_LOG2_CACHE_PREFETCH_BYTES=${log2}") + endif() if(flags_ok) string(TOUPPER ${v} uv) if(ARG_OFF) @@ -143,6 +129,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") add_vpp_march_variant(qdf24xx FLAGS -march=armv8-a+crc+crypto -mtune=qdf24xx N_PREFETCHES 8 + CACHE_PREFETCH_BYTES 64 OFF ) @@ -154,16 +141,19 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") add_vpp_march_variant(thunderx2t99 FLAGS -march=armv8.1-a+crc+crypto -mtune=thunderx2t99 N_PREFETCHES 8 + CACHE_PREFETCH_BYTES 64 ) add_vpp_march_variant(cortexa72 FLAGS -march=armv8-a+crc+crypto -mtune=cortex-a72 N_PREFETCHES 6 + CACHE_PREFETCH_BYTES 64 ) add_vpp_march_variant(neoversen1 FLAGS -march=armv8.2-a+crc+crypto -mtune=neoverse-n1 N_PREFETCHES 6 + CACHE_PREFETCH_BYTES 64 ) endif() diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index aebbb64dd76..f923da6c09e 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -1921,8 +1921,6 @@ dpdk_init (vlib_main_t * vm) "Data in cache line 0 is bigger than cache line size"); STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, "Cache line marker must be 1st element in frame_queue_trace_t"); - STATIC_ASSERT (RTE_CACHE_LINE_SIZE == 1 << CLIB_LOG2_CACHE_LINE_BYTES, - "DPDK RTE CACHE LINE SIZE does not match with 1< -/* - * Allow CFLAGS to override the configured / deduced cache line size - */ -#ifndef CLIB_LOG2_CACHE_LINE_BYTES - /* Default cache line size of 64 bytes. */ #ifndef CLIB_LOG2_CACHE_LINE_BYTES #define CLIB_LOG2_CACHE_LINE_BYTES 6 #endif -#endif /* CLIB_LOG2_CACHE_LINE_BYTES defined */ - -#if (CLIB_LOG2_CACHE_LINE_BYTES >= 9) -#error Cache line size 512 bytes or greater +/* How much data prefetch instruction prefetches */ +#ifndef CLIB_LOG2_CACHE_PREFETCH_BYTES +#define CLIB_LOG2_CACHE_PREFETCH_BYTES CLIB_LOG2_CACHE_LINE_BYTES #endif -#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES) -#define CLIB_CACHE_LINE_ALIGN_MARK(mark) u8 mark[0] __attribute__((aligned(CLIB_CACHE_LINE_BYTES))) -#define CLIB_CACHE_LINE_ROUND(x) ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1)) - /* Default cache line fill buffers. */ #ifndef CLIB_N_PREFETCHES #define CLIB_N_PREFETCHES 16 #endif +#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES) +#define CLIB_CACHE_PREFETCH_BYTES (1 << CLIB_LOG2_CACHE_PREFETCH_BYTES) +#define CLIB_CACHE_LINE_ALIGN_MARK(mark) \ + u8 mark[0] __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES))) +#define CLIB_CACHE_LINE_ROUND(x) \ + ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1)) + /* Read/write arguments to __builtin_prefetch. */ #define CLIB_PREFETCH_READ 0 #define CLIB_PREFETCH_LOAD 0 /* alias for read */ #define CLIB_PREFETCH_WRITE 1 #define CLIB_PREFETCH_STORE 1 /* alias for write */ -#define _CLIB_PREFETCH(n,size,type) \ - if ((size) > (n)*CLIB_CACHE_LINE_BYTES) \ - __builtin_prefetch (_addr + (n)*CLIB_CACHE_LINE_BYTES, \ - CLIB_PREFETCH_##type, \ - /* locality */ 3); - -#define CLIB_PREFETCH(addr,size,type) \ -do { \ - void * _addr = (addr); \ - \ - ASSERT ((size) <= 4*CLIB_CACHE_LINE_BYTES); \ - _CLIB_PREFETCH (0, size, type); \ - _CLIB_PREFETCH (1, size, type); \ - _CLIB_PREFETCH (2, size, type); \ - _CLIB_PREFETCH (3, size, type); \ -} while (0) +#define _CLIB_PREFETCH(n, size, type) \ + if ((size) > (n) *CLIB_CACHE_PREFETCH_BYTES) \ + __builtin_prefetch (_addr + (n) *CLIB_CACHE_PREFETCH_BYTES, \ + CLIB_PREFETCH_##type, /* locality */ 3); + +#define CLIB_PREFETCH(addr, size, type) \ + do \ + { \ + void *_addr = (addr); \ + \ + ASSERT ((size) <= 4 * CLIB_CACHE_PREFETCH_BYTES); \ + _CLIB_PREFETCH (0, size, type); \ + _CLIB_PREFETCH (1, size, type); \ + _CLIB_PREFETCH (2, size, type); \ + _CLIB_PREFETCH (3, size, type); \ + } \ + while (0) #undef _ static_always_inline void clib_prefetch_load (void *p) { - CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD); + __builtin_prefetch (p, /* rw */ 0, /* locality */ 3); } static_always_inline void clib_prefetch_store (void *p) { - CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, STORE); + __builtin_prefetch (p, /* rw */ 1, /* locality */ 3); } #endif /* included_clib_cache_h */ -- 2.16.6