X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvppinfra%2Fcpu.h;h=7a1b75fcf7d755c1995891d83bd8e2d551fc3e04;hb=ac80b8be8be5757b667a9aa7ddf258b36f695a5a;hp=3e52087f1f02935e074c28f94f6f7c601c12ae83;hpb=7f4fd22f2f56b2b4620139d377e8db0976a2ae3b;p=vpp.git diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h index 3e52087f1f0..7a1b75fcf7d 100644 --- a/src/vppinfra/cpu.h +++ b/src/vppinfra/cpu.h @@ -19,34 +19,41 @@ #include #include -/* - * multiarchitecture support. Adding new entry will produce - * new graph node function variant optimized for specific cpu - * microarchitecture. - * Order is important for runtime selection, as 1st match wins... - */ - -#if __x86_64__ && CLIB_DEBUG == 0 -#define foreach_march_variant(macro, x) \ - macro(avx2, x, "arch=core-avx2") -#else -#define foreach_march_variant(macro, x) -#endif - - -#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0 -#define CLIB_CPU_OPTIMIZED __attribute__ ((optimize ("O3"))) +#if defined(__x86_64__) +#define foreach_march_variant \ + _ (scalar, "Generic (SIMD disabled)") \ + _ (hsw, "Intel Haswell") \ + _ (trm, "Intel Tremont") \ + _ (skx, "Intel Skylake (server) / Cascade Lake") \ + _ (icl, "Intel Ice Lake") \ + _ (adl, "Intel Alder Lake") \ + _ (spr, "Intel Sapphire Rapids") \ + _ (znver3, "AMD Milan") \ + _ (znver4, "AMD Genoa") +#elif defined(__aarch64__) +#define foreach_march_variant \ + _ (octeontx2, "Marvell Octeon TX2") \ + _ (thunderx2t99, "Marvell ThunderX2 T99") \ + _ (qdf24xx, "Qualcomm CentriqTM 2400") \ + _ (cortexa72, "ARM Cortex-A72") \ + _ (neoversen1, "ARM Neoverse N1") \ + _ (neoversen2, "ARM Neoverse N2") #else -#define CLIB_CPU_OPTIMIZED +#define foreach_march_variant #endif - -#define CLIB_MULTIARCH_ARCH_CHECK(arch, fn, tgt) \ - if (clib_cpu_supports_ ## arch()) \ - return & fn ## _ ##arch; - -/* FIXME to be removed */ -#define CLIB_MULTIARCH_SELECT_FN(fn,...) +#define amd_vendor(t1, t2, t3) \ + ((t1 == 0x68747541) && /* htuA */ \ + (t2 == 0x444d4163) && /* DMAc */ \ + (t3 == 0x69746e65)) /* itne */ +typedef enum +{ + CLIB_MARCH_VARIANT_TYPE = 0, +#define _(s, n) CLIB_MARCH_VARIANT_TYPE_##s, + foreach_march_variant +#undef _ + CLIB_MARCH_TYPE_N_VARIANTS +} clib_march_variant_type_t; #ifdef CLIB_MARCH_VARIANT #define __CLIB_MULTIARCH_FN(a,b) a##_##b @@ -84,7 +91,10 @@ clib_march_select_fn_ptr (clib_march_fn_registration * r) return rv; } -#define CLIB_MARCH_FN_POINTER(fn) \ +#define CLIB_MARCH_FN_POINTER(fn) \ + (__typeof__ (fn) *) clib_march_select_fn_ptr (fn##_march_fn_registrations); + +#define CLIB_MARCH_FN_VOID_POINTER(fn) \ clib_march_select_fn_ptr (fn##_march_fn_registrations); #define _CLIB_MARCH_FN_REGISTRATION(fn) \ @@ -114,25 +124,34 @@ _CLIB_MARCH_FN_REGISTRATION(fn) clib_march_fn_registration *fn##_march_fn_registrations = 0; \ _CLIB_MARCH_FN_REGISTRATION(fn) #endif -#define foreach_x86_64_flags \ -_ (sse3, 1, ecx, 0) \ -_ (ssse3, 1, ecx, 9) \ -_ (sse41, 1, ecx, 19) \ -_ (sse42, 1, ecx, 20) \ -_ (avx, 1, ecx, 28) \ -_ (rdrand, 1, ecx, 30) \ -_ (avx2, 7, ebx, 5) \ -_ (rtm, 7, ebx, 11) \ -_ (pqm, 7, ebx, 12) \ -_ (pqe, 7, ebx, 15) \ -_ (avx512f, 7, ebx, 16) \ -_ (rdseed, 7, ebx, 18) \ -_ (x86_aes, 1, ecx, 25) \ -_ (sha, 7, ebx, 29) \ -_ (vaes, 7, ecx, 9) \ -_ (vpclmulqdq, 7, ecx, 10) \ -_ (invariant_tsc, 0x80000007, edx, 8) - +#define foreach_x86_64_flags \ + _ (sse3, 1, ecx, 0) \ + _ (pclmulqdq, 1, ecx, 1) \ + _ (ssse3, 1, ecx, 9) \ + _ (sse41, 1, ecx, 19) \ + _ (sse42, 1, ecx, 20) \ + _ (avx, 1, ecx, 28) \ + _ (rdrand, 1, ecx, 30) \ + _ (avx2, 7, ebx, 5) \ + _ (bmi2, 7, ebx, 8) \ + _ (rtm, 7, ebx, 11) \ + _ (pqm, 7, ebx, 12) \ + _ (pqe, 7, ebx, 15) \ + _ (avx512f, 7, ebx, 16) \ + _ (rdseed, 7, ebx, 18) \ + _ (x86_aes, 1, ecx, 25) \ + _ (sha, 7, ebx, 29) \ + _ (vaes, 7, ecx, 9) \ + _ (vpclmulqdq, 7, ecx, 10) \ + _ (avx512_vnni, 7, ecx, 11) \ + _ (avx512_bitalg, 7, ecx, 12) \ + _ (avx512_vpopcntdq, 7, ecx, 14) \ + _ (movdiri, 7, ecx, 27) \ + _ (movdir64b, 7, ecx, 28) \ + _ (enqcmd, 7, ecx, 29) \ + _ (avx512_fp16, 7, edx, 23) \ + _ (invariant_tsc, 0x80000007, edx, 8) \ + _ (monitorx, 0x80000001, ecx, 29) #define foreach_aarch64_flags \ _ (fp, 0) \ @@ -159,21 +178,10 @@ _ (asimddp, 20) \ _ (sha512, 21) \ _ (sve, 22) -static inline u32 -clib_get_current_cpu_id () -{ - unsigned cpu, node; - syscall (__NR_getcpu, &cpu, &node, 0); - return cpu; -} +u32 clib_get_current_cpu_id (void); +u32 clib_get_current_numa_node (void); -static inline u32 -clib_get_current_numa_node () -{ - unsigned cpu, node; - syscall (__NR_getcpu, &cpu, &node, 0); - return node; -} +typedef int (*clib_cpu_supports_func_t) (void); #if defined(__x86_64__) #include "cpuid.h" @@ -190,7 +198,6 @@ clib_get_cpuid (const u32 lev, u32 * eax, u32 * ebx, u32 * ecx, u32 * edx) return 1; } - #define _(flag, func, reg, bit) \ static inline int \ clib_cpu_supports_ ## flag() \ @@ -244,108 +251,202 @@ clib_cpu_supports_aes () } static inline int -clib_cpu_march_priority_avx512 () +clib_cpu_march_priority_scalar () +{ + return 1; +} + +static inline int +clib_cpu_march_priority_spr () +{ + if (clib_cpu_supports_enqcmd ()) + return 300; + return -1; +} + +static inline int +clib_cpu_march_priority_icl () +{ + if (clib_cpu_supports_avx512_bitalg ()) + return 200; + return -1; +} + +static inline int +clib_cpu_march_priority_adl () +{ + if (clib_cpu_supports_movdiri () && clib_cpu_supports_avx2 ()) + return 150; + return -1; +} + +static inline int +clib_cpu_march_priority_skx () { if (clib_cpu_supports_avx512f ()) - return 20; + return 100; + return -1; +} + +static inline int +clib_cpu_march_priority_trm () +{ + if (clib_cpu_supports_movdiri ()) + return 40; return -1; } static inline int -clib_cpu_march_priority_avx2 () +clib_cpu_march_priority_hsw () { if (clib_cpu_supports_avx2 ()) return 50; return -1; } -static inline u32 -clib_cpu_implementer () +static inline int +clib_cpu_march_priority_znver4 () +{ + if (clib_cpu_supports_avx512_bitalg () && clib_cpu_supports_monitorx ()) + return 250; + return -1; +} + +static inline int +clib_cpu_march_priority_znver3 () { - char buf[128]; - static u32 implementer = -1; + if (clib_cpu_supports_avx2 () && clib_cpu_supports_monitorx ()) + return 70; + return -1; +} - if (-1 != implementer) - return implementer; +#define X86_CPU_ARCH_PERF_FUNC 0xA - FILE *fp = fopen ("/proc/cpuinfo", "r"); - if (!fp) - return implementer; +static inline int +clib_get_pmu_counter_count (u8 *fixed, u8 *general) +{ +#if defined(__x86_64__) + u32 __clib_unused eax = 0, ebx = 0, ecx = 0, edx = 0; + clib_get_cpuid (X86_CPU_ARCH_PERF_FUNC, &eax, &ebx, &ecx, &edx); - while (!feof (fp)) - { - if (!fgets (buf, sizeof (buf), fp)) - break; - buf[127] = '\0'; - if (strstr (buf, "CPU implementer")) - implementer = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0); - if (-1 != implementer) - break; - } - fclose (fp); + *general = (eax & 0xFF00) >> 8; + *fixed = (edx & 0xF); - return implementer; + return 1; +#else + return 0; +#endif } -static inline u32 -clib_cpu_part () +typedef struct { - char buf[128]; - static u32 part = -1; + struct + { + u8 implementer; + u16 part_num; + } aarch64; +} clib_cpu_info_t; + +const clib_cpu_info_t *clib_get_cpu_info (); + +/* ARM */ +#define AARCH64_CPU_IMPLEMENTER_ARM 0x41 +#define AARCH64_CPU_PART_CORTEXA72 0xd08 +#define AARCH64_CPU_PART_NEOVERSEN1 0xd0c +#define AARCH64_CPU_PART_NEOVERSEN2 0xd49 + +/*cavium */ +#define AARCH64_CPU_IMPLEMENTER_CAVIUM 0x43 +#define AARCH64_CPU_PART_THUNDERX2 0x0af +#define AARCH64_CPU_PART_OCTEONTX2T96 0x0b2 +#define AARCH64_CPU_PART_OCTEONTX2T98 0x0b1 + +/* Qualcomm */ +#define AARCH64_CPU_IMPLEMENTER_QUALCOMM 0x51 +#define AARCH64_CPU_PART_QDF24XX 0xc00 - if (-1 != part) - return part; +static inline int +clib_cpu_march_priority_octeontx2 () +{ + const clib_cpu_info_t *info = clib_get_cpu_info (); - FILE *fp = fopen ("/proc/cpuinfo", "r"); - if (!fp) - return part; + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM) + return -1; - while (!feof (fp)) - { - if (!fgets (buf, sizeof (buf), fp)) - break; - buf[127] = '\0'; - if (strstr (buf, "CPU part")) - part = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0); - if (-1 != part) - break; - } - fclose (fp); + if (info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T96 || + info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T98) + return 20; - return part; + return -1; } -#define AARCH64_CPU_IMPLEMENTER_THUNERDERX2 0x43 -#define AARCH64_CPU_PART_THUNERDERX2 0x0af -#define AARCH64_CPU_IMPLEMENTER_QDF24XX 0x51 -#define AARCH64_CPU_PART_QDF24XX 0xc00 -#define AARCH64_CPU_IMPLEMENTER_CORTEXA72 0x41 -#define AARCH64_CPU_PART_CORTEXA72 0xd08 - static inline int clib_cpu_march_priority_thunderx2t99 () { - if ((AARCH64_CPU_IMPLEMENTER_THUNERDERX2 == clib_cpu_implementer ()) && - (AARCH64_CPU_PART_THUNERDERX2 == clib_cpu_part ())) + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_THUNDERX2) return 20; + return -1; } static inline int clib_cpu_march_priority_qdf24xx () { - if ((AARCH64_CPU_IMPLEMENTER_QDF24XX == clib_cpu_implementer ()) && - (AARCH64_CPU_PART_QDF24XX == clib_cpu_part ())) + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_QUALCOMM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_QDF24XX) return 20; + return -1; } static inline int clib_cpu_march_priority_cortexa72 () { - if ((AARCH64_CPU_IMPLEMENTER_CORTEXA72 == clib_cpu_implementer ()) && - (AARCH64_CPU_PART_CORTEXA72 == clib_cpu_part ())) + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_CORTEXA72) + return 10; + + return -1; +} + +static inline int +clib_cpu_march_priority_neoversen1 () +{ + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN1) + return 10; + + return -1; +} + +static inline int +clib_cpu_march_priority_neoversen2 () +{ + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN2) return 10; + return -1; } @@ -368,19 +469,18 @@ CLIB_MARCH_SFX(fn ## _march_constructor) (void) \ } \ #ifndef CLIB_MARCH_VARIANT -#define CLIB_MARCH_FN(fn, rtype, _args...) \ - static rtype CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (fn ## _ma)(_args); \ - rtype (*fn ## _selected) (_args) = & CLIB_MARCH_SFX (fn ## _ma); \ - int fn ## _selected_priority = 0; \ - static inline rtype CLIB_CPU_OPTIMIZED \ - CLIB_MARCH_SFX (fn ## _ma)(_args) +#define CLIB_MARCH_FN(fn, rtype, _args...) \ + static rtype CLIB_MARCH_SFX (fn##_ma) (_args); \ + rtype (*fn##_selected) (_args) = &CLIB_MARCH_SFX (fn##_ma); \ + int fn##_selected_priority = 0; \ + static inline rtype CLIB_MARCH_SFX (fn##_ma) (_args) #else -#define CLIB_MARCH_FN(fn, rtype, _args...) \ - static rtype CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (fn ## _ma)(_args); \ - extern rtype (*fn ## _selected) (_args); \ - extern int fn ## _selected_priority; \ - CLIB_MARCH_FN_CONSTRUCTOR (fn) \ - static rtype CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (fn ## _ma)(_args) +#define CLIB_MARCH_FN(fn, rtype, _args...) \ + static rtype CLIB_MARCH_SFX (fn##_ma) (_args); \ + extern rtype (*fn##_selected) (_args); \ + extern int fn##_selected_priority; \ + CLIB_MARCH_FN_CONSTRUCTOR (fn) \ + static rtype CLIB_MARCH_SFX (fn##_ma) (_args) #endif #define CLIB_MARCH_FN_SELECT(fn) (* fn ## _selected) @@ -388,6 +488,7 @@ CLIB_MARCH_SFX(fn ## _march_constructor) (void) \ format_function_t format_cpu_uarch; format_function_t format_cpu_model_name; format_function_t format_cpu_flags; +format_function_t format_march_variant; /* * fd.io coding-style-patch-verification: ON