2 * perfmon.c - skeleton vpp engine plug-in
4 * Copyright (c) <current-year> <your-organization>
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include <vnet/vnet.h>
19 #include <vnet/plugin/plugin.h>
20 #include <perfmon/perfmon.h>
22 #include <vlibapi/api.h>
23 #include <vlibmemory/api.h>
24 #include <vpp/app/version.h>
25 #include <linux/limits.h>
27 perfmon_main_t perfmon_main;
29 static char *perfmon_json_path = "/usr/share/vpp/plugins/perfmon";
37 } file_by_model_and_stepping_t;
39 /* Created by parsing mapfile.csv, see mapfile_tool.c */
41 static const file_by_model_and_stepping_t fms_table[] = {
42 /* model, stepping, stepping valid, file */
43 {0x2E, 0x0, 0, "NehalemEX_core_V2.json"},
44 {0x1E, 0x0, 0, "NehalemEP_core_V2.json"},
45 {0x1F, 0x0, 0, "NehalemEP_core_V2.json"},
46 {0x1A, 0x0, 0, "NehalemEP_core_V2.json"},
47 {0x2F, 0x0, 0, "WestmereEX_core_V2.json"},
48 {0x25, 0x0, 0, "WestmereEP-SP_core_V2.json"},
49 {0x2C, 0x0, 0, "WestmereEP-DP_core_V2.json"},
50 {0x37, 0x0, 0, "Silvermont_core_V14.json"},
51 {0x4D, 0x0, 0, "Silvermont_core_V14.json"},
52 {0x4C, 0x0, 0, "Silvermont_core_V14.json"},
53 {0x5C, 0x0, 0, "goldmont_core_v13.json"},
54 {0x5F, 0x0, 0, "goldmont_core_v13.json"},
55 {0x1C, 0x0, 0, "Bonnell_core_V4.json"},
56 {0x26, 0x0, 0, "Bonnell_core_V4.json"},
57 {0x27, 0x0, 0, "Bonnell_core_V4.json"},
58 {0x36, 0x0, 0, "Bonnell_core_V4.json"},
59 {0x35, 0x0, 0, "Bonnell_core_V4.json"},
60 {0x2A, 0x0, 0, "sandybridge_core_v16.json"},
61 {0x2D, 0x0, 0, "Jaketown_core_V20.json"},
62 {0x3A, 0x0, 0, "ivybridge_core_v21.json"},
63 {0x3E, 0x0, 0, "ivytown_core_v20.json"},
64 {0x3C, 0x0, 0, "haswell_core_v28.json"},
65 {0x45, 0x0, 0, "haswell_core_v28.json"},
66 {0x46, 0x0, 0, "haswell_core_v28.json"},
67 {0x3F, 0x0, 0, "haswellx_core_v20.json"},
68 {0x3D, 0x0, 0, "broadwell_core_v23.json"},
69 {0x47, 0x0, 0, "broadwell_core_v23.json"},
70 {0x4F, 0x0, 0, "broadwellx_core_v14.json"},
71 {0x56, 0x0, 0, "broadwellde_core_v7.json"},
72 {0x4E, 0x0, 0, "skylake_core_v42.json"},
73 {0x5E, 0x0, 0, "skylake_core_v42.json"},
74 {0x8E, 0x0, 0, "skylake_core_v42.json"},
75 {0x9E, 0x0, 0, "skylake_core_v42.json"},
76 {0x57, 0x0, 0, "KnightsLanding_core_V9.json"},
77 {0x85, 0x0, 0, "KnightsLanding_core_V9.json"},
78 {0x55, 0x0, 1, "skylakex_core_v1.12.json"},
79 {0x55, 0x1, 1, "skylakex_core_v1.12.json"},
80 {0x55, 0x2, 1, "skylakex_core_v1.12.json"},
81 {0x55, 0x3, 1, "skylakex_core_v1.12.json"},
82 {0x55, 0x4, 1, "skylakex_core_v1.12.json"},
83 {0x55, 0x5, 1, "cascadelakex_core_v1.00.json"},
84 {0x55, 0x6, 1, "cascadelakex_core_v1.00.json"},
85 {0x55, 0x7, 1, "cascadelakex_core_v1.00.json"},
86 {0x55, 0x8, 1, "cascadelakex_core_v1.00.json"},
87 {0x55, 0x9, 1, "cascadelakex_core_v1.00.json"},
88 {0x55, 0xA, 1, "cascadelakex_core_v1.00.json"},
89 {0x55, 0xB, 1, "cascadelakex_core_v1.00.json"},
90 {0x55, 0xC, 1, "cascadelakex_core_v1.00.json"},
91 {0x55, 0xD, 1, "cascadelakex_core_v1.00.json"},
92 {0x55, 0xE, 1, "cascadelakex_core_v1.00.json"},
93 {0x55, 0xF, 1, "cascadelakex_core_v1.00.json"},
94 {0x7A, 0x0, 0, "goldmontplus_core_v1.01.json"},
98 set_perfmon_json_path ()
100 char *p, path[PATH_MAX];
104 /* find executable path */
105 if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1)
108 /* readlink doesn't provide null termination */
112 if ((p = strrchr (path, '/')) == 0)
117 if ((p = strrchr (path, '/')) == 0)
121 /* cons up the .json file path */
122 s = format (0, "%s/share/vpp/plugins/perfmon", path);
124 perfmon_json_path = (char *) s;
130 #if defined(__x86_64__)
132 asm volatile ("mov $1, %%eax; cpuid; mov %%eax, %0":"=r" (cpuid)::"%eax",
133 "%edx", "%ecx", "%rbx");
140 static clib_error_t *
141 perfmon_init (vlib_main_t * vm)
143 perfmon_main_t *pm = &perfmon_main;
144 clib_error_t *error = 0;
147 int found_a_table = 0;
152 pm->vnet_main = vnet_get_main ();
154 pm->capture_by_thread_and_node_name =
155 hash_create_string (0, sizeof (uword));
157 pm->log_class = vlib_log_register_class ("perfmon", 0);
159 /* Default data collection interval */
160 pm->timeout_interval = 2.0; /* seconds */
161 vec_validate (pm->pm_fds, 1);
162 vec_validate (pm->pm_fds[0], vec_len (vlib_mains) - 1);
163 vec_validate (pm->pm_fds[1], vec_len (vlib_mains) - 1);
164 vec_validate (pm->perf_event_pages, 1);
165 vec_validate (pm->perf_event_pages[0], vec_len (vlib_mains) - 1);
166 vec_validate (pm->perf_event_pages[1], vec_len (vlib_mains) - 1);
167 vec_validate (pm->rdpmc_indices, 1);
168 vec_validate (pm->rdpmc_indices[0], vec_len (vlib_mains) - 1);
169 vec_validate (pm->rdpmc_indices[1], vec_len (vlib_mains) - 1);
170 pm->page_size = getpagesize ();
172 ht = pm->perfmon_table = 0;
174 set_perfmon_json_path ();
176 cpuid = get_cpuid ();
178 for (i = 0; i < ARRAY_LEN (fms_table); i++)
180 model = ((cpuid >> 12) & 0xf0) | ((cpuid >> 4) & 0xf);
181 stepping = cpuid & 0xf;
183 if (fms_table[i].model != model)
186 if (fms_table[i].has_stepping)
188 if (fms_table[i].stepping != stepping)
193 ht = perfmon_parse_table (pm, perfmon_json_path, fms_table[i].filename);
196 pm->perfmon_table = ht;
198 if (found_a_table == 0 || pm->perfmon_table == 0 || hash_elts (ht) == 0)
200 vlib_log_err (pm->log_class, "No table for cpuid %x", cpuid);
201 vlib_log_err (pm->log_class, " model %x, stepping %x",
208 VLIB_INIT_FUNCTION (perfmon_init);
211 VLIB_PLUGIN_REGISTER () =
213 .version = VPP_BUILD_VER,
214 .description = "Performance monitor plugin",
215 #if !defined(__x86_64__)
216 .default_disabled = 1,
228 if (*s >= '0' && *s <= '9')
229 rv = (rv << 4) | (*s - '0');
230 else if (*s >= 'a' && *s <= 'f')
231 rv = (rv << 4) | (*s - 'a' + 10);
232 else if (*s >= 'A' && *s <= 'A')
233 rv = (rv << 4) | (*s - 'A' + 10);
244 unformat_processor_event (unformat_input_t * input, va_list * args)
246 perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
247 perfmon_event_config_t *ep = va_arg (*args, perfmon_event_config_t *);
249 name_value_pair_t **nvps, *nvp;
255 if (pm->perfmon_table == 0)
258 if (!unformat (input, "%s", &s))
261 hp = hash_get_pair_mem (pm->perfmon_table, s);
268 nvps = (name_value_pair_t **) (hp->value[0]);
270 for (i = 0; i < vec_len (nvps); i++)
273 if (!strncmp ((char *) nvp->name, "EventCode", 9))
275 pe_config |= atox (nvp->value);
278 else if (!strncmp ((char *) nvp->name, "UMask", 5))
280 pe_config |= (atox (nvp->value) << 8);
289 clib_warning ("BUG: only found %d values", set_values);
293 ep->name = (char *) hp->key;
294 ep->pe_type = PERF_TYPE_RAW;
295 ep->pe_config = pe_config;
299 static clib_error_t *
300 set_pmc_command_fn (vlib_main_t * vm,
301 unformat_input_t * input, vlib_cli_command_t * cmd)
303 perfmon_main_t *pm = &perfmon_main;
304 vlib_thread_main_t *vtm = vlib_get_thread_main ();
305 int num_threads = 1 + vtm->n_threads;
306 unformat_input_t _line_input, *line_input = &_line_input;
307 perfmon_event_config_t ec;
314 vec_reset_length (pm->single_events_to_collect);
315 vec_reset_length (pm->paired_events_to_collect);
316 pm->ipc_event_index = ~0;
317 pm->mispredict_event_index = ~0;
319 if (!unformat_user (input, unformat_line_input, line_input))
320 return clib_error_return (0, "counter names required...");
322 clib_bitmap_zero (pm->thread_bitmap);
324 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
326 if (unformat (line_input, "timeout %u", &timeout_seconds))
327 pm->timeout_interval = (f64) timeout_seconds;
328 else if (unformat (line_input, "instructions-per-clock"))
330 ec.name = "instructions";
331 ec.pe_type = PERF_TYPE_HARDWARE;
332 ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS;
333 pm->ipc_event_index = vec_len (pm->paired_events_to_collect);
334 vec_add1 (pm->paired_events_to_collect, ec);
335 ec.name = "cpu-cycles";
336 ec.pe_type = PERF_TYPE_HARDWARE;
337 ec.pe_config = PERF_COUNT_HW_CPU_CYCLES;
338 vec_add1 (pm->paired_events_to_collect, ec);
340 else if (unformat (line_input, "branch-mispredict-rate"))
342 ec.name = "branch-misses";
343 ec.pe_type = PERF_TYPE_HARDWARE;
344 ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES;
345 pm->mispredict_event_index = vec_len (pm->paired_events_to_collect);
346 vec_add1 (pm->paired_events_to_collect, ec);
347 ec.name = "branches";
348 ec.pe_type = PERF_TYPE_HARDWARE;
349 ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
350 vec_add1 (pm->paired_events_to_collect, ec);
352 else if (unformat (line_input, "threads %U",
353 unformat_bitmap_list, &pm->thread_bitmap))
355 else if (unformat (line_input, "thread %U",
356 unformat_bitmap_list, &pm->thread_bitmap))
358 else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec))
360 vec_add1 (pm->single_events_to_collect, ec);
362 #define _(type,event,str) \
363 else if (unformat (line_input, str)) \
367 ec.pe_config = event; \
368 vec_add1 (pm->single_events_to_collect, ec); \
370 foreach_perfmon_event
374 error = clib_error_return (0, "unknown input '%U'",
375 format_unformat_error, line_input);
376 unformat_free (line_input);
381 unformat_free (line_input);
383 last_set = clib_bitmap_last_set (pm->thread_bitmap);
384 if (last_set != ~0 && last_set >= num_threads)
385 return clib_error_return (0, "thread %d does not exist", last_set);
387 /* Stick paired events at the front of the (unified) list */
388 if (vec_len (pm->paired_events_to_collect) > 0)
390 perfmon_event_config_t *tmp;
391 /* first 2n events are pairs... */
392 vec_append (pm->paired_events_to_collect, pm->single_events_to_collect);
393 tmp = pm->single_events_to_collect;
394 pm->single_events_to_collect = pm->paired_events_to_collect;
395 pm->paired_events_to_collect = tmp;
398 if (vec_len (pm->single_events_to_collect) == 0)
399 return clib_error_return (0, "no events specified...");
401 /* Figure out how long data collection will take */
403 ((f64) vec_len (pm->single_events_to_collect)) * pm->timeout_interval;
404 delay /= 2.0; /* collect 2 stats at once */
406 vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds",
407 vec_len (pm->single_events_to_collect), delay);
409 vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index,
412 /* Coarse-grained wait */
413 vlib_process_suspend (vm, delay);
416 /* Reasonable to guess that collection may not be quite done... */
417 while (pm->state == PERFMON_STATE_RUNNING)
419 vlib_process_suspend (vm, 10e-3);
422 vlib_cli_output (vm, "DEADMAN: collection still running...");
427 vlib_cli_output (vm, "Data collection complete...");
432 VLIB_CLI_COMMAND (set_pmc_command, static) =
435 .short_help = "set pmc [threads n,n1-n2] c1... [see \"show pmc events\"]",
436 .function = set_pmc_command_fn,
442 capture_name_sort (void *a1, void *a2)
444 perfmon_capture_t *c1 = a1;
445 perfmon_capture_t *c2 = a2;
447 return strcmp ((char *) c1->thread_and_node_name,
448 (char *) c2->thread_and_node_name);
452 format_capture (u8 * s, va_list * args)
454 perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
455 perfmon_capture_t *c = va_arg (*args, perfmon_capture_t *);
456 int verbose __attribute__ ((unused)) = va_arg (*args, int);
462 s = format (s, "%=40s%=20s%=16s%=16s%=16s",
463 "Name", "Counter", "Count", "Pkts", "Counts/Pkt");
467 for (i = 0; i < vec_len (c->counter_names); i++)
472 name = c->thread_and_node_name;
479 /* Deal with synthetic events right here */
480 if (i == pm->ipc_event_index)
483 ASSERT ((i + 1) < vec_len (c->counter_names));
485 if (c->counter_values[i + 1] > 0)
486 ipc_rate = (f64) c->counter_values[i]
487 / (f64) c->counter_values[i + 1];
491 s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n",
492 name, "instructions-per-clock",
493 c->counter_values[i],
494 c->counter_values[i + 1], ipc_rate);
498 if (i == pm->mispredict_event_index)
501 ASSERT (i + 1 < vec_len (c->counter_names));
503 if (c->counter_values[i + 1] > 0)
504 mispredict_rate = (f64) c->counter_values[i]
505 / (f64) c->counter_values[i + 1];
507 mispredict_rate = 0.0;
509 s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n",
510 name, "branch-mispredict-rate",
511 c->counter_values[i],
512 c->counter_values[i + 1], mispredict_rate);
516 if (c->vectors_this_counter[i])
518 ((f64) c->counter_values[i]) / ((f64) c->vectors_this_counter[i]);
522 s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e",
523 name, c->counter_names[i],
524 c->counter_values[i],
525 c->vectors_this_counter[i], ticks_per_pkt);
531 format_generic_events (u8 * s, va_list * args)
533 int verbose = va_arg (*args, int);
535 #define _(type,config,name) \
537 s = format (s, "\n %s", name); \
539 s = format (s, "\n %s (%d, %d)", name, type, config);
540 foreach_perfmon_event;
548 name_value_pair_t **nvps;
552 sort_nvps_by_name (void *a1, void *a2)
554 sort_nvp_t *nvp1 = a1;
555 sort_nvp_t *nvp2 = a2;
557 return strcmp ((char *) nvp1->name, (char *) nvp2->name);
561 format_processor_events (u8 * s, va_list * args)
563 perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
564 int verbose = va_arg (*args, int);
566 sort_nvp_t *sort_nvps = 0;
569 name_value_pair_t **value;
572 hash_foreach_mem (key, value, pm->perfmon_table,
574 vec_add2 (sort_nvps, sn, 1);
579 vec_sort_with_function (sort_nvps, sort_nvps_by_name);
583 for (i = 0; i < vec_len (sort_nvps); i++)
584 s = format (s, "\n %s ", sort_nvps[i].name);
588 for (i = 0; i < vec_len (sort_nvps); i++)
590 name_value_pair_t **nvps;
591 s = format (s, "\n %s:", sort_nvps[i].name);
593 nvps = sort_nvps[i].nvps;
595 for (j = 0; j < vec_len (nvps); j++)
596 s = format (s, "\n %s = %s", nvps[j]->name, nvps[j]->value);
599 vec_free (sort_nvps);
604 static clib_error_t *
605 show_pmc_command_fn (vlib_main_t * vm,
606 unformat_input_t * input, vlib_cli_command_t * cmd)
608 perfmon_main_t *pm = &perfmon_main;
612 perfmon_capture_t *c;
613 perfmon_capture_t *captures = 0;
615 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
617 if (unformat (input, "events"))
619 else if (unformat (input, "verbose"))
627 vlib_cli_output (vm, "Generic Events %U",
628 format_generic_events, verbose);
629 vlib_cli_output (vm, "Synthetic Events");
630 vlib_cli_output (vm, " instructions-per-clock");
631 vlib_cli_output (vm, " branch-mispredict-rate");
632 if (pm->perfmon_table)
633 vlib_cli_output (vm, "Processor Events %U",
634 format_processor_events, pm, verbose);
638 if (pm->state == PERFMON_STATE_RUNNING)
640 vlib_cli_output (vm, "Data collection in progress...");
644 if (pool_elts (pm->capture_pool) == 0)
646 vlib_cli_output (vm, "No data...");
651 pool_foreach (c, pm->capture_pool,
653 vec_add1 (captures, *c);
657 vec_sort_with_function (captures, capture_name_sort);
659 vlib_cli_output (vm, "%U", format_capture, pm, 0 /* header */ ,
662 for (i = 0; i < vec_len (captures); i++)
666 vlib_cli_output (vm, "%U", format_capture, pm, c, verbose);
675 VLIB_CLI_COMMAND (show_pmc_command, static) =
678 .short_help = "show pmc [verbose]",
679 .function = show_pmc_command_fn,
684 static clib_error_t *
685 clear_pmc_command_fn (vlib_main_t * vm,
686 unformat_input_t * input, vlib_cli_command_t * cmd)
688 perfmon_main_t *pm = &perfmon_main;
692 if (pm->state == PERFMON_STATE_RUNNING)
694 vlib_cli_output (vm, "Performance monitor is still running...");
698 pool_free (pm->capture_pool);
701 hash_foreach_mem (key, value, pm->capture_by_thread_and_node_name,
706 hash_free (pm->capture_by_thread_and_node_name);
707 pm->capture_by_thread_and_node_name =
708 hash_create_string (0, sizeof (uword));
713 VLIB_CLI_COMMAND (clear_pmc_command, static) =
716 .short_help = "clear the performance monitor counters",
717 .function = clear_pmc_command_fn,
723 * fd.io coding-style-patch-verification: ON
726 * eval: (c-set-style "gnu")