1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #include <vppinfra/format.h>
6 #include <vppinfra/vector/test/test.h>
7 #include <vppinfra/error.h>
12 test_march_supported (clib_march_variant_type_t type)
15 if (CLIB_MARCH_VARIANT_TYPE_##s == type) \
16 return clib_cpu_march_priority_##s ();
23 test_funct (test_main_t *tm)
25 for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++)
27 test_registration_t *r = tm->registrations[i];
29 if (r == 0 || test_march_supported (i) < 0)
32 fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i);
34 "-------------------------------------------------------\n");
38 if (tm->filter && strstr (r->name, (char *) tm->filter) == 0)
41 fformat (stdout, "%-50s %s\n", r->name, err ? "FAIL" : "PASS");
44 clib_error_report (err);
45 fformat (stdout, "\n");
52 fformat (stdout, "\n");
56 #define TEST_PERF_MAX_EVENTS 7
61 u64 config[TEST_PERF_MAX_EVENTS];
64 format_function_t *format_fn;
65 } test_perf_event_bundle_t;
68 format_test_perf_bundle_default (u8 *s, va_list *args)
70 test_perf_event_bundle_t __clib_unused *b =
71 va_arg (*args, test_perf_event_bundle_t *);
72 test_perf_t *tp = va_arg (*args, test_perf_t *);
73 u64 *data = va_arg (*args, u64 *);
76 s = format (s, "%5.2f", (f64) data[1] / data[0]);
78 s = format (s, "%5s", "IPC");
81 s = format (s, "%8.2f", (f64) data[0] / tp->n_ops);
83 s = format (s, "%8s", "Clks/Op");
86 s = format (s, "%8.2f", (f64) data[1] / tp->n_ops);
88 s = format (s, "%8s", "Inst/Op");
91 s = format (s, "%9.2f", (f64) data[2] / tp->n_ops);
93 s = format (s, "%9s", "Brnch/Op");
96 s = format (s, "%10.2f", (f64) data[3] / tp->n_ops);
98 s = format (s, "%10s", "BrMiss/Op");
103 format_test_perf_bundle_core_power (u8 *s, va_list *args)
105 test_perf_event_bundle_t __clib_unused *b =
106 va_arg (*args, test_perf_event_bundle_t *);
107 test_perf_t __clib_unused *tp = va_arg (*args, test_perf_t *);
108 u64 *data = va_arg (*args, u64 *);
111 s = format (s, "%7.1f %%", (f64) 100 * data[1] / data[0]);
113 s = format (s, "%9s", "Level 0");
116 s = format (s, "%8.1f %%", (f64) 100 * data[2] / data[0]);
118 s = format (s, "%9s", "Level 1");
121 s = format (s, "%7.1f %%", (f64) 100 * data[3] / data[0]);
123 s = format (s, "%9s", "Level 2");
128 test_perf_event_bundle_t perf_bundles[] = {
131 .desc = "IPC, Clocks/Operatiom, Instr/Operation, Branch Total & Miss",
132 .type = PERF_TYPE_HARDWARE,
133 .config[0] = PERF_COUNT_HW_CPU_CYCLES,
134 .config[1] = PERF_COUNT_HW_INSTRUCTIONS,
135 .config[2] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
136 .config[3] = PERF_COUNT_HW_BRANCH_MISSES,
138 .format_fn = format_test_perf_bundle_default,
141 #define PERF_INTEL_CODE(event, umask) ((event) | (umask) << 8)
144 .name = "core-power",
146 "Core cycles where the core was running under specific turbo schedule.",
147 .type = PERF_TYPE_RAW,
148 .config[0] = PERF_INTEL_CODE (0x3c, 0x00),
149 .config[1] = PERF_INTEL_CODE (0x28, 0x07),
150 .config[2] = PERF_INTEL_CODE (0x28, 0x18),
151 .config[3] = PERF_INTEL_CODE (0x28, 0x20),
152 .config[4] = PERF_INTEL_CODE (0x28, 0x40),
154 .format_fn = format_test_perf_bundle_core_power,
161 test_perf (test_main_t *tm)
163 clib_error_t *err = 0;
164 test_perf_event_bundle_t *b = 0;
165 int group_fd = -1, fds[TEST_PERF_MAX_EVENTS];
166 u64 count[TEST_PERF_MAX_EVENTS + 3] = {};
167 struct perf_event_attr pe = {
168 .size = sizeof (struct perf_event_attr),
174 .read_format = (PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED |
175 PERF_FORMAT_TOTAL_TIME_RUNNING),
178 for (int i = 0; i < TEST_PERF_MAX_EVENTS; i++)
183 for (int i = 0; i < ARRAY_LEN (perf_bundles); i++)
184 if (strncmp ((char *) tm->bundle, perf_bundles[i].name,
185 vec_len (tm->bundle)) == 0)
187 b = perf_bundles + i;
191 return clib_error_return (0, "Unknown bundle '%s'", tm->bundle);
196 for (int i = 0; i < b->n_events; i++)
198 pe.config = b->config[i];
200 int fd = syscall (__NR_perf_event_open, &pe, /* pid */ 0, /* cpu */ -1,
201 /* group_fd */ group_fd, /* flags */ 0);
204 err = clib_error_return_unix (0, "perf_event_open");
217 for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++)
219 test_registration_t *r = tm->registrations[i];
221 if (r == 0 || test_march_supported (i) < 0)
224 fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i);
226 "-------------------------------------------------------\n");
231 test_perf_t *pt = r->perf_tests;
232 if (tm->filter && strstr (r->name, (char *) tm->filter) == 0)
234 fformat (stdout, "%-22s%-12s%U\n", r->name, "OpType",
235 b->format_fn, b, pt, 0UL);
238 u32 read_size = (b->n_events + 3) * sizeof (u64);
239 for (int i = 0; i < tm->repeat; i++)
241 test_perf_event_reset (group_fd);
242 pt->fn (group_fd, pt);
243 if ((read (group_fd, &count, read_size) != read_size))
245 err = clib_error_return_unix (0, "read");
248 if (count[1] != count[2])
250 "perf counters were not running all the time."
252 "\nConsider turning NMI watchdog off ('sysctl -w "
253 "kernel.nmi_watchdog=0')."
256 fformat (stdout, " %-20s%-12s%U\n", pt->name,
257 pt->op_name ? pt->op_name : "", b->format_fn, b,
269 for (int i = 0; i < TEST_PERF_MAX_EVENTS; i++)
277 main (int argc, char *argv[])
279 test_main_t *tm = &test_main;
280 unformat_input_t _i = {}, *i = &_i;
281 clib_mem_init (0, 64ULL << 20);
288 unformat_init_command_line (i, argv);
290 while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
292 if (unformat (i, "perf"))
294 else if (unformat (i, "filter %s", &tm->filter))
296 else if (unformat (i, "bundle %s", &tm->bundle))
298 else if (unformat (i, "repeat %d", &tm->repeat))
302 clib_warning ("unknown input '%U'", format_unformat_error, i);
308 err = test_perf (tm);
310 err = test_funct (tm);
314 clib_error_report (err);
315 fformat (stderr, "\n");
322 test_mem_alloc (uword size)
325 size = round_pow2 (size, CLIB_CACHE_LINE_BYTES);
326 rv = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
327 clib_memset_u8 (rv, 0, size);
332 test_mem_alloc_and_fill_inc_u8 (uword size, u8 start, u8 mask)
335 mask = mask ? mask : 0xff;
336 size = round_pow2 (size, CLIB_CACHE_LINE_BYTES);
337 rv = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
338 for (uword i = 0; i < size; i++)
339 rv[i] = ((u8) i + start) & mask;
344 test_mem_alloc_and_splat (uword elt_size, uword n_elts, void *elt)
347 uword data_size = elt_size * n_elts;
348 uword alloc_size = round_pow2 (data_size, CLIB_CACHE_LINE_BYTES);
349 e = rv = clib_mem_alloc_aligned (alloc_size, CLIB_CACHE_LINE_BYTES);
350 while (e - rv < data_size)
352 clib_memcpy_fast (e, elt, elt_size);
356 if (data_size < alloc_size)
357 clib_memset_u8 (e, 0, alloc_size - data_size);
362 test_mem_free (void *p)