6a812180008c85a864496ddbf37cdc85974f3629
[vpp.git] / src / vppinfra / cpu.h
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #ifndef included_clib_cpu_h
17 #define included_clib_cpu_h
18
19 #include <sys/syscall.h>
20 #include <vppinfra/format.h>
21
22 /*
23  * multiarchitecture support. Adding new entry will produce
24  * new graph node function variant optimized for specific cpu
25  * microarchitecture.
26  * Order is important for runtime selection, as 1st match wins...
27  */
28
29 #if __x86_64__ && CLIB_DEBUG == 0
30 #define foreach_march_variant(macro, x) \
31   macro(avx2,  x, "arch=core-avx2")
32 #else
33 #define foreach_march_variant(macro, x)
34 #endif
35
36
37 #if __GNUC__ > 4  && !__clang__ && CLIB_DEBUG == 0
38 #define CLIB_CPU_OPTIMIZED __attribute__ ((optimize ("O3")))
39 #else
40 #define CLIB_CPU_OPTIMIZED
41 #endif
42
43
44 #define CLIB_MULTIARCH_ARCH_CHECK(arch, fn, tgt)                        \
45   if (clib_cpu_supports_ ## arch())                                     \
46     return & fn ## _ ##arch;
47
48 /* FIXME to be removed */
49 #define CLIB_MULTIARCH_SELECT_FN(fn,...)
50
51 #ifdef CLIB_MARCH_VARIANT
52 #define __CLIB_MULTIARCH_FN(a,b) a##_##b
53 #define _CLIB_MULTIARCH_FN(a,b) __CLIB_MULTIARCH_FN(a,b)
54 #define CLIB_MULTIARCH_FN(fn) _CLIB_MULTIARCH_FN(fn,CLIB_MARCH_VARIANT)
55 #else
56 #define CLIB_MULTIARCH_FN(fn) fn
57 #endif
58
59 #define CLIB_MARCH_SFX CLIB_MULTIARCH_FN
60
61 typedef struct _clib_march_fn_registration
62 {
63   void *function;
64   int priority;
65   struct _clib_march_fn_registration *next;
66   char *name;
67 } clib_march_fn_registration;
68
69 static_always_inline void *
70 clib_march_select_fn_ptr (clib_march_fn_registration * r)
71 {
72   void *rv = 0;
73   int last_prio = -1;
74
75   while (r)
76     {
77       if (last_prio < r->priority)
78         {
79           last_prio = r->priority;
80           rv = r->function;
81         }
82       r = r->next;
83     }
84   return rv;
85 }
86
87 #define CLIB_MARCH_FN_POINTER(fn) \
88   clib_march_select_fn_ptr (fn##_march_fn_registrations);
89
90 #define _CLIB_MARCH_FN_REGISTRATION(fn) \
91 static clib_march_fn_registration \
92 CLIB_MARCH_SFX(fn##_march_fn_registration) = \
93 { \
94   .name = CLIB_MARCH_VARIANT_STR \
95 }; \
96 \
97 static void __clib_constructor \
98 fn##_march_register () \
99 { \
100   clib_march_fn_registration *r; \
101   r = & CLIB_MARCH_SFX (fn##_march_fn_registration); \
102   r->priority = CLIB_MARCH_FN_PRIORITY(); \
103   r->next = fn##_march_fn_registrations; \
104   r->function = CLIB_MARCH_SFX (fn); \
105   fn##_march_fn_registrations = r; \
106 }
107
108 #ifdef CLIB_MARCH_VARIANT
109 #define CLIB_MARCH_FN_REGISTRATION(fn) \
110 extern clib_march_fn_registration *fn##_march_fn_registrations; \
111 _CLIB_MARCH_FN_REGISTRATION(fn)
112 #else
113 #define CLIB_MARCH_FN_REGISTRATION(fn) \
114 clib_march_fn_registration *fn##_march_fn_registrations = 0; \
115 _CLIB_MARCH_FN_REGISTRATION(fn)
116 #endif
117 #define foreach_x86_64_flags                                                  \
118   _ (sse3, 1, ecx, 0)                                                         \
119   _ (pclmulqdq, 1, ecx, 1)                                                    \
120   _ (ssse3, 1, ecx, 9)                                                        \
121   _ (sse41, 1, ecx, 19)                                                       \
122   _ (sse42, 1, ecx, 20)                                                       \
123   _ (avx, 1, ecx, 28)                                                         \
124   _ (rdrand, 1, ecx, 30)                                                      \
125   _ (avx2, 7, ebx, 5)                                                         \
126   _ (rtm, 7, ebx, 11)                                                         \
127   _ (pqm, 7, ebx, 12)                                                         \
128   _ (pqe, 7, ebx, 15)                                                         \
129   _ (avx512f, 7, ebx, 16)                                                     \
130   _ (rdseed, 7, ebx, 18)                                                      \
131   _ (x86_aes, 1, ecx, 25)                                                     \
132   _ (sha, 7, ebx, 29)                                                         \
133   _ (vaes, 7, ecx, 9)                                                         \
134   _ (vpclmulqdq, 7, ecx, 10)                                                  \
135   _ (avx512_vnni, 7, ecx, 11)                                                 \
136   _ (avx512_bitalg, 7, ecx, 12)                                               \
137   _ (avx512_vpopcntdq, 7, ecx, 14)                                            \
138   _ (movdiri, 7, ecx, 27)                                                     \
139   _ (movdir64b, 7, ecx, 28)                                                   \
140   _ (invariant_tsc, 0x80000007, edx, 8)
141
142 #define foreach_aarch64_flags \
143 _ (fp,          0) \
144 _ (asimd,       1) \
145 _ (evtstrm,     2) \
146 _ (aarch64_aes, 3) \
147 _ (pmull,       4) \
148 _ (sha1,        5) \
149 _ (sha2,        6) \
150 _ (crc32,       7) \
151 _ (atomics,     8) \
152 _ (fphp,        9) \
153 _ (asimdhp,    10) \
154 _ (cpuid,      11) \
155 _ (asimdrdm,   12) \
156 _ (jscvt,      13) \
157 _ (fcma,       14) \
158 _ (lrcpc,      15) \
159 _ (dcpop,      16) \
160 _ (sha3,       17) \
161 _ (sm3,        18) \
162 _ (sm4,        19) \
163 _ (asimddp,    20) \
164 _ (sha512,     21) \
165 _ (sve,        22)
166
167 static inline u32
168 clib_get_current_cpu_id ()
169 {
170   unsigned cpu, node;
171   syscall (__NR_getcpu, &cpu, &node, 0);
172   return cpu;
173 }
174
175 static inline u32
176 clib_get_current_numa_node ()
177 {
178   unsigned cpu, node;
179   syscall (__NR_getcpu, &cpu, &node, 0);
180   return node;
181 }
182
183 #if defined(__x86_64__)
184 #include "cpuid.h"
185
186 static inline int
187 clib_get_cpuid (const u32 lev, u32 * eax, u32 * ebx, u32 * ecx, u32 * edx)
188 {
189   if ((u32) __get_cpuid_max (0x80000000 & lev, 0) < lev)
190     return 0;
191   if (lev == 7)
192     __cpuid_count (lev, 0, *eax, *ebx, *ecx, *edx);
193   else
194     __cpuid (lev, *eax, *ebx, *ecx, *edx);
195   return 1;
196 }
197
198
199 #define _(flag, func, reg, bit) \
200 static inline int                                                       \
201 clib_cpu_supports_ ## flag()                                            \
202 {                                                                       \
203   u32 __attribute__((unused)) eax, ebx = 0, ecx = 0, edx  = 0;          \
204   clib_get_cpuid (func, &eax, &ebx, &ecx, &edx);                        \
205                                                                         \
206   return ((reg & (1 << bit)) != 0);                                     \
207 }
208 foreach_x86_64_flags
209 #undef _
210 #else /* __x86_64__ */
211
212 #define _(flag, func, reg, bit) \
213 static inline int clib_cpu_supports_ ## flag() { return 0; }
214 foreach_x86_64_flags
215 #undef _
216 #endif /* __x86_64__ */
217 #if defined(__aarch64__)
218 #include <sys/auxv.h>
219 #define _(flag, bit) \
220 static inline int                                                       \
221 clib_cpu_supports_ ## flag()                                            \
222 {                                                                       \
223   unsigned long hwcap = getauxval(AT_HWCAP);                            \
224   return (hwcap & (1 << bit));                                          \
225 }
226   foreach_aarch64_flags
227 #undef _
228 #else /* ! __x86_64__ && !__aarch64__ */
229 #define _(flag, bit) \
230 static inline int clib_cpu_supports_ ## flag() { return 0; }
231   foreach_aarch64_flags
232 #undef _
233 #endif /* __x86_64__, __aarch64__ */
234 /*
235  * aes is the only feature with the same name in both flag lists
236  * handle this by prefixing it with the arch name, and handling it
237  * with the custom function below
238  */
239   static inline int
240 clib_cpu_supports_aes ()
241 {
242 #if defined(__x86_64__)
243   return clib_cpu_supports_x86_aes ();
244 #elif defined (__aarch64__)
245   return clib_cpu_supports_aarch64_aes ();
246 #else
247   return 0;
248 #endif
249 }
250
251 static inline int
252 clib_cpu_march_priority_icl ()
253 {
254   if (clib_cpu_supports_avx512_bitalg ())
255     return 200;
256   return -1;
257 }
258
259 static inline int
260 clib_cpu_march_priority_skx ()
261 {
262   if (clib_cpu_supports_avx512f ())
263     return 100;
264   return -1;
265 }
266
267 static inline int
268 clib_cpu_march_priority_trm ()
269 {
270   if (clib_cpu_supports_movdiri ())
271     return 60;
272   return -1;
273 }
274
275 static inline int
276 clib_cpu_march_priority_hsw ()
277 {
278   if (clib_cpu_supports_avx2 ())
279     return 50;
280   return -1;
281 }
282
283 static inline u32
284 clib_cpu_implementer ()
285 {
286   char buf[128];
287   static u32 implementer = -1;
288
289   if (-1 != implementer)
290     return implementer;
291
292   FILE *fp = fopen ("/proc/cpuinfo", "r");
293   if (!fp)
294     return implementer;
295
296   while (!feof (fp))
297     {
298       if (!fgets (buf, sizeof (buf), fp))
299         break;
300       buf[127] = '\0';
301       if (strstr (buf, "CPU implementer"))
302         implementer = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0);
303       if (-1 != implementer)
304         break;
305     }
306   fclose (fp);
307
308   return implementer;
309 }
310
311 static inline u32
312 clib_cpu_part ()
313 {
314   char buf[128];
315   static u32 part = -1;
316
317   if (-1 != part)
318     return part;
319
320   FILE *fp = fopen ("/proc/cpuinfo", "r");
321   if (!fp)
322     return part;
323
324   while (!feof (fp))
325     {
326       if (!fgets (buf, sizeof (buf), fp))
327         break;
328       buf[127] = '\0';
329       if (strstr (buf, "CPU part"))
330         part = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0);
331       if (-1 != part)
332         break;
333     }
334   fclose (fp);
335
336   return part;
337 }
338
339 #define AARCH64_CPU_IMPLEMENTER_CAVIUM      0x43
340 #define AARCH64_CPU_PART_THUNDERX2          0x0af
341 #define AARCH64_CPU_PART_OCTEONTX2T96       0x0b2
342 #define AARCH64_CPU_PART_OCTEONTX2T98       0x0b1
343 #define AARCH64_CPU_IMPLEMENTER_QDF24XX     0x51
344 #define AARCH64_CPU_PART_QDF24XX            0xc00
345 #define AARCH64_CPU_IMPLEMENTER_CORTEXA72   0x41
346 #define AARCH64_CPU_PART_CORTEXA72          0xd08
347 #define AARCH64_CPU_IMPLEMENTER_NEOVERSEN1  0x41
348 #define AARCH64_CPU_PART_NEOVERSEN1         0xd0c
349
350 static inline int
351 clib_cpu_march_priority_octeontx2 ()
352 {
353   if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) &&
354       ((AARCH64_CPU_PART_OCTEONTX2T96 == clib_cpu_part ())
355        || AARCH64_CPU_PART_OCTEONTX2T98 == clib_cpu_part ()))
356     return 20;
357   return -1;
358 }
359
360 static inline int
361 clib_cpu_march_priority_thunderx2t99 ()
362 {
363   if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) &&
364       (AARCH64_CPU_PART_THUNDERX2 == clib_cpu_part ()))
365     return 20;
366   return -1;
367 }
368
369 static inline int
370 clib_cpu_march_priority_qdf24xx ()
371 {
372   if ((AARCH64_CPU_IMPLEMENTER_QDF24XX == clib_cpu_implementer ()) &&
373       (AARCH64_CPU_PART_QDF24XX == clib_cpu_part ()))
374     return 20;
375   return -1;
376 }
377
378 static inline int
379 clib_cpu_march_priority_cortexa72 ()
380 {
381   if ((AARCH64_CPU_IMPLEMENTER_CORTEXA72 == clib_cpu_implementer ()) &&
382       (AARCH64_CPU_PART_CORTEXA72 == clib_cpu_part ()))
383     return 10;
384   return -1;
385 }
386
387 static inline int
388 clib_cpu_march_priority_neoversen1 ()
389 {
390   if ((AARCH64_CPU_IMPLEMENTER_NEOVERSEN1 == clib_cpu_implementer ()) &&
391       (AARCH64_CPU_PART_NEOVERSEN1 == clib_cpu_part ()))
392     return 10;
393   return -1;
394 }
395
396 #ifdef CLIB_MARCH_VARIANT
397 #define CLIB_MARCH_FN_PRIORITY() CLIB_MARCH_SFX(clib_cpu_march_priority)()
398 #else
399 #define CLIB_MARCH_FN_PRIORITY() 0
400 #endif
401 #endif /* included_clib_cpu_h */
402
403 #define CLIB_MARCH_FN_CONSTRUCTOR(fn)                                   \
404 static void __clib_constructor                                          \
405 CLIB_MARCH_SFX(fn ## _march_constructor) (void)                         \
406 {                                                                       \
407   if (CLIB_MARCH_FN_PRIORITY() > fn ## _selected_priority)              \
408     {                                                                   \
409       fn ## _selected = & CLIB_MARCH_SFX (fn ## _ma);                   \
410       fn ## _selected_priority = CLIB_MARCH_FN_PRIORITY();              \
411     }                                                                   \
412 }                                                                       \
413
414 #ifndef CLIB_MARCH_VARIANT
415 #define CLIB_MARCH_FN(fn, rtype, _args...)                              \
416   static rtype CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (fn ## _ma)(_args);    \
417   rtype (*fn ## _selected) (_args) = & CLIB_MARCH_SFX (fn ## _ma);      \
418   int fn ## _selected_priority = 0;                                     \
419   static inline rtype CLIB_CPU_OPTIMIZED                                \
420   CLIB_MARCH_SFX (fn ## _ma)(_args)
421 #else
422 #define CLIB_MARCH_FN(fn, rtype, _args...)                              \
423   static rtype CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (fn ## _ma)(_args);    \
424   extern rtype (*fn ## _selected) (_args);                              \
425   extern int fn ## _selected_priority;                                  \
426   CLIB_MARCH_FN_CONSTRUCTOR (fn)                                        \
427   static rtype CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (fn ## _ma)(_args)
428 #endif
429
430 #define CLIB_MARCH_FN_SELECT(fn) (* fn ## _selected)
431
432 format_function_t format_cpu_uarch;
433 format_function_t format_cpu_model_name;
434 format_function_t format_cpu_flags;
435
436 /*
437  * fd.io coding-style-patch-verification: ON
438  *
439  * Local Variables:
440  * eval: (c-set-style "gnu")
441  * End:
442  */