# See the License for the specific language governing permissions and
# limitations under the License.
-##############################################################################
-# Cache line size detection
-##############################################################################
-if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
- file(READ "/proc/cpuinfo" cpuinfo)
- string(REPLACE "\n" ";" cpuinfo ${cpuinfo})
- foreach(l ${cpuinfo})
- string(REPLACE ":" ";" l ${l})
- list(GET l 0 name)
- list(GET l 1 value)
- string(STRIP ${name} name)
- string(STRIP ${value} value)
- if(${name} STREQUAL "CPU implementer")
- set(CPU_IMPLEMENTER ${value})
- endif()
- if(${name} STREQUAL "CPU part")
- set(CPU_PART ${value})
- endif()
- endforeach()
- # Implementer 0x43 - Cavium
- # Part 0x0af - ThunderX2 is 64B, rest all are 128B
- if (${CPU_IMPLEMENTER} STREQUAL "0x43")
- if (${CPU_PART} STREQUAL "0x0af")
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
- else()
- set(VPP_LOG2_CACHE_LINE_SIZE 7)
- endif()
+macro(set_log2_cacheline_size var n)
+ if(${n} EQUAL 128)
+ set(${var} 7)
+ elseif(${n} EQUAL 64)
+ set(${var} 6)
else()
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
+ message(FATAL_ERROR "Cacheline size ${n} not supported")
endif()
- math(EXPR VPP_CACHE_LINE_SIZE "1 << ${VPP_LOG2_CACHE_LINE_SIZE}")
- message(STATUS "ARM AArch64 CPU implementer ${CPU_IMPLEMENTER} part ${CPU_PART} cacheline size ${VPP_CACHE_LINE_SIZE}")
+endmacro()
+
+##############################################################################
+# Cache line size
+##############################################################################
+if(DEFINED VPP_CACHE_LINE_SIZE)
+ # Cache line size assigned via cmake args
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
+ set(VPP_CACHE_LINE_SIZE 128)
else()
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
+ set(VPP_CACHE_LINE_SIZE 64)
endif()
-set(VPP_LOG2_CACHE_LINE_SIZE ${VPP_LOG2_CACHE_LINE_SIZE}
- CACHE STRING "Target CPU cache line size (power of 2)")
+set(VPP_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE}
+ CACHE STRING "Target CPU cache line size")
+
+set_log2_cacheline_size(VPP_LOG2_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE})
##############################################################################
-# CPU optimizations and multiarch support
+# Gnu Assembler AVX-512 bug detection
+# - see: https://sourceware.org/bugzilla/show_bug.cgi?id=23465
##############################################################################
if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
- set(CMAKE_C_FLAGS "-march=corei7 -mtune=corei7-avx ${CMAKE_C_FLAGS}")
- check_c_compiler_flag("-march=core-avx2" compiler_flag_march_core_avx2)
- if(compiler_flag_march_core_avx2)
- list(APPEND MARCH_VARIANTS "avx2\;-march=core-avx2 -mtune=core-avx2")
- endif()
- check_c_compiler_flag("-march=skylake-avx512" compiler_flag_march_skylake_avx512)
- if(compiler_flag_march_skylake_avx512)
- list(APPEND MARCH_VARIANTS "avx512\;-march=skylake-avx512 -mtune=skylake-avx512")
+ if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
+ set(pfx ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/gas_avx512_bug_test)
+ file(WRITE ${pfx}.s "vmovaps 0x40(,%rax), %zmm0\n")
+ execute_process(COMMAND ${CMAKE_C_COMPILER} -c ${pfx}.s -o ${pfx}.o)
+ execute_process(COMMAND objdump -s ${pfx}.o OUTPUT_VARIABLE _output)
+ if (NOT _output MATCHES "62f17c48 28040540 000000")
+ set(GNU_ASSEMBLER_AVX512_BUG 1)
+ endif()
endif()
-elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
- set(CMAKE_C_FLAGS "-march=armv8-a+crc ${CMAKE_C_FLAGS}")
- check_c_compiler_flag("-march=armv8-a+crc+crypto -mtune=qdf24xx" compiler_flag_march_core_qdf24xx)
- if(compiler_flag_march_core_qdf24xx)
- list(APPEND MARCH_VARIANTS "qdf24xx\;-march=armv8-a+crc+crypto -DCLIB_N_PREFETCHES=8")
+endif()
+
+##############################################################################
+# CPU optimizations and multiarch support
+##############################################################################
+
+option(VPP_BUILD_NATIVE_ONLY "Build only for native CPU." OFF)
+
+macro(add_vpp_march_variant v)
+ cmake_parse_arguments(ARG
+ "OFF"
+ "N_PREFETCHES;CACHE_PREFETCH_BYTES"
+ "FLAGS"
+ ${ARGN}
+ )
+
+ if(ARG_FLAGS)
+ set(flags_ok 1)
+ set(fs "")
+ foreach(f ${ARG_FLAGS})
+ string(APPEND fs " ${f}")
+ string(REGEX REPLACE "[-=+]" "_" sfx ${f})
+ if(NOT DEFINED compiler_flag${sfx})
+ check_c_compiler_flag(${f} compiler_flag${sfx})
+ endif()
+ if(NOT compiler_flag${sfx})
+ unset(flags_ok)
+ endif()
+ endforeach()
+ if(ARG_N_PREFETCHES)
+ string(APPEND fs " -DCLIB_N_PREFETCHES=${ARG_N_PREFETCHES}")
+ endif()
+ if(ARG_CACHE_PREFETCH_BYTES)
+ set_log2_cacheline_size(log2 ${ARG_CACHE_PREFETCH_BYTES})
+ string(APPEND fs " -DCLIB_LOG2_CACHE_PREFETCH_BYTES=${log2}")
+ endif()
+ if(flags_ok)
+ string(TOUPPER ${v} uv)
+ if(ARG_OFF)
+ option(VPP_MARCH_VARIANT_${uv} "Build ${v} multiarch variant." OFF)
+ else()
+ option(VPP_MARCH_VARIANT_${uv} "Build ${v} multiarch variant." ON)
+ endif()
+ if (VPP_MARCH_VARIANT_${uv})
+ list(APPEND MARCH_VARIANTS "${v}\;${fs}")
+ list(APPEND MARCH_VARIANTS_NAMES "${v}")
+ else()
+ list(APPEND MARCH_VARIANTS_DISABLED "${v}\;${fs}")
+ endif()
+ endif()
endif()
- check_c_compiler_flag("-march=armv8.1-a+crc+crypto -mtune=thunderx2t99" compiler_flag_march_thunderx2t99)
- if(compiler_flag_march_thunderx2t99)
- if (CMAKE_C_COMPILER_VERSION VERSION_GREATER 7.3)
- list(APPEND MARCH_VARIANTS "thunderx2t99\;-march=armv8.1-a+crc+crypto -mtune=thunderx2t99 -DCLIB_N_PREFETCHES=8")
- else()
- list(APPEND MARCH_VARIANTS "thunderx2t99\;-march=armv8.1-a+crc+crypto -DCLIB_N_PREFETCHES=8")
+endmacro()
+
+if(VPP_BUILD_NATIVE_ONLY)
+ set(VPP_BUILD_NATIVE_ARCH "native" CACHE STRING "native CPU -march= value.")
+ set(VPP_DEFAULT_MARCH_FLAGS -march=${VPP_BUILD_NATIVE_ARCH})
+ if(VPP_BUILD_NATIVE_ONLY)
+ check_c_compiler_flag(${VPP_DEFAULT_MARCH_FLAGS} compiler_flag_march)
+ if(NOT compiler_flag_march)
+ message(FATAL_ERROR "Native-only build with ${VPP_DEFAULT_MARCH_FLAGS} is not supported by compiler")
endif()
endif()
- check_c_compiler_flag("-march=armv8-a+crc+crypto -mtune=cortex-a72" compiler_flag_march_cortexa72)
- if(compiler_flag_march_cortexa72)
- list(APPEND MARCH_VARIANTS "cortexa72\;-march=armv8-a+crc+crypto -mtune=cortex-a72 -DCLIB_N_PREFETCHES=6")
+ set(MARCH_VARIANTS_NAMES "native-only")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+ set(VPP_DEFAULT_MARCH_FLAGS -march=corei7 -mtune=corei7-avx)
+
+ add_vpp_march_variant(hsw
+ FLAGS -march=haswell -mtune=haswell
+ )
+
+ add_vpp_march_variant(trm
+ FLAGS -march=tremont -mtune=tremont
+ OFF
+ )
+
+ add_vpp_march_variant(adl
+ FLAGS -march=alderlake -mtune=alderlake -mprefer-vector-width=256
+ OFF
+ )
+
+ add_vpp_march_variant(scalar
+ FLAGS -march=core2 -mno-mmx -mno-sse
+ OFF
+ )
+
+ add_vpp_march_variant(znver3
+ FLAGS -march=znver3 -mtune=znver3 -mprefer-vector-width=256
+ OFF
+ )
+
+ if (GNU_ASSEMBLER_AVX512_BUG)
+ message(WARNING "AVX-512 multiarch variant(s) disabled due to GNU Assembler bug")
+ else()
+ add_vpp_march_variant(skx
+ FLAGS -march=skylake-avx512 -mtune=skylake-avx512 -mprefer-vector-width=256
+ )
+
+ add_vpp_march_variant(icl
+ FLAGS -march=icelake-client -mtune=icelake-client -mprefer-vector-width=512
+ )
+
+ add_vpp_march_variant(spr
+ FLAGS -march=sapphirerapids -mtune=sapphirerapids -mprefer-vector-width=512
+ OFF
+ )
+
+ add_vpp_march_variant(znver4
+ FLAGS -march=znver4 -mtune=znver4 -mprefer-vector-width=512
+ OFF
+ )
endif()
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
+ set(VPP_DEFAULT_MARCH_FLAGS -march=armv8-a+crc)
+
+ add_vpp_march_variant(qdf24xx
+ FLAGS -march=armv8-a+crc+crypto -mtune=qdf24xx
+ N_PREFETCHES 8
+ CACHE_PREFETCH_BYTES 64
+ OFF
+ )
+
+ add_vpp_march_variant(octeontx2
+ FLAGS -march=armv8.2-a+crc+crypto+lse
+ N_PREFETCHES 8
+ )
+
+ add_vpp_march_variant(thunderx2t99
+ FLAGS -march=armv8.1-a+crc+crypto -mtune=thunderx2t99
+ N_PREFETCHES 8
+ CACHE_PREFETCH_BYTES 64
+ )
+
+ add_vpp_march_variant(cortexa72
+ FLAGS -march=armv8-a+crc+crypto -mtune=cortex-a72
+ N_PREFETCHES 6
+ CACHE_PREFETCH_BYTES 64
+ )
+
+ add_vpp_march_variant(neoversen1
+ FLAGS -march=armv8.2-a+crc+crypto -mtune=neoverse-n1
+ N_PREFETCHES 6
+ CACHE_PREFETCH_BYTES 64
+ )
+ add_vpp_march_variant(neoversen2
+ FLAGS -march=armv9-a+crypto -mtune=neoverse-n2
+ N_PREFETCHES 6
+ CACHE_PREFETCH_BYTES 64
+ OFF
+ )
endif()
macro(vpp_library_set_multiarch_sources lib)
- foreach(V ${MARCH_VARIANTS})
+ cmake_parse_arguments(ARG
+ ""
+ ""
+ "SOURCES;DEPENDS;FORCE_ON"
+ ${ARGN}
+ )
+
+ set(VARIANTS "${MARCH_VARIANTS}")
+
+ if(ARG_FORCE_ON)
+ foreach(F ${ARG_FORCE_ON})
+ foreach(V ${MARCH_VARIANTS_DISABLED})
+ list(GET V 0 VARIANT)
+ if (VARIANT STREQUAL F)
+ list(GET V 1 VARIANT_FLAGS)
+ list(APPEND VARIANTS "${VARIANT}\;${VARIANT_FLAGS}")
+ endif()
+ endforeach()
+ endforeach()
+ endif()
+
+ foreach(V ${VARIANTS})
list(GET V 0 VARIANT)
list(GET V 1 VARIANT_FLAGS)
set(l ${lib}_${VARIANT})
- add_library(${l} OBJECT ${ARGN})
+ add_library(${l} OBJECT ${ARG_SOURCES})
+ if(ARG_DEPENDS)
+ add_dependencies(${l} ${ARG_DEPENDS})
+ endif()
set_target_properties(${l} PROPERTIES POSITION_INDEPENDENT_CODE ON)
- target_compile_options(${l} PUBLIC "-DCLIB_MARCH_VARIANT=${VARIANT}")
+ target_compile_definitions(${l} PUBLIC CLIB_MARCH_VARIANT=${VARIANT})
separate_arguments(VARIANT_FLAGS)
target_compile_options(${l} PUBLIC ${VARIANT_FLAGS})
target_sources(${lib} PRIVATE $<TARGET_OBJECTS:${l}>)