It is recommended to use -mcpu instead of -march or -mtune on Grace
because the -mcpu flag specifies both the appropriate architecture and
the tuning strategy[1].
Type: improvement
[1] https://github.com/NVIDIA/grace-cpu-benchmarking-guide/blob/main/src/developer/languages/c-c++.md#recommended-compiler-flags
Signed-off-by: Jay Wang <[email protected]>
Change-Id: I09b52e605f218e633c4b59519038646ac6eec870
CACHE_PREFETCH_BYTES 64
OFF
)
+
+ add_vpp_march_variant(neoversev2
+ FLAGS -mcpu=neoverse-v2+crypto
+ N_PREFETCHES 6
+ CACHE_PREFETCH_BYTES 64
+ )
+
endif()
macro(vpp_library_set_multiarch_sources lib)
_ (0x41, 0xd0b, "ARM", "Cortex-A76", 0) \
_ (0x41, 0xd0c, "ARM", "Neoverse-N1", 0) \
_ (0x41, 0xd49, "ARM", "Neoverse-N2", 0) \
+ _ (0x41, 0xd4f, "ARM", "Neoverse-V2", 0) \
_ (0x41, 0xd4a, "ARM", "Neoverse-E1", 0) \
_ (0x43, 0x0a1, "Marvell", "THUNDERX CN88XX", 0) \
_ (0x43, 0x0a2, "Marvell", "OCTEON TX CN81XX", 0) \
_ (qdf24xx, "Qualcomm CentriqTM 2400") \
_ (cortexa72, "ARM Cortex-A72") \
_ (neoversen1, "ARM Neoverse N1") \
- _ (neoversen2, "ARM Neoverse N2")
+ _ (neoversen2, "ARM Neoverse N2") \
+ _ (neoversev2, "ARM Neoverse V2")
#else
#define foreach_march_variant
#endif
#define AARCH64_CPU_PART_CORTEXA72 0xd08
#define AARCH64_CPU_PART_NEOVERSEN1 0xd0c
#define AARCH64_CPU_PART_NEOVERSEN2 0xd49
+#define AARCH64_CPU_PART_NEOVERSEV2 0xd4f
/*cavium */
#define AARCH64_CPU_IMPLEMENTER_CAVIUM 0x43
return -1;
}
+static inline int
+clib_cpu_march_priority_neoversev2 ()
+{
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEV2)
+ return 10;
+
+ return -1;
+}
+
#ifdef CLIB_MARCH_VARIANT
#define CLIB_MARCH_FN_PRIORITY() CLIB_MARCH_SFX(clib_cpu_march_priority)()
#else