static char *perfmon_json_path = "/usr/share/vpp/plugins/perfmon";
+typedef struct
+{
+ u8 model;
+ u8 stepping;
+ u8 has_stepping;
+ char *filename;
+} file_by_model_and_stepping_t;
+
+/* Created by parsing mapfile.csv, see mapfile_tool.c */
+
+static const file_by_model_and_stepping_t fms_table[] = {
+ /* model, stepping, stepping valid, file */
+ {0x2E, 0x0, 0, "NehalemEX_core_V2.json"},
+ {0x1E, 0x0, 0, "NehalemEP_core_V2.json"},
+ {0x1F, 0x0, 0, "NehalemEP_core_V2.json"},
+ {0x1A, 0x0, 0, "NehalemEP_core_V2.json"},
+ {0x2F, 0x0, 0, "WestmereEX_core_V2.json"},
+ {0x25, 0x0, 0, "WestmereEP-SP_core_V2.json"},
+ {0x2C, 0x0, 0, "WestmereEP-DP_core_V2.json"},
+ {0x37, 0x0, 0, "Silvermont_core_V14.json"},
+ {0x4D, 0x0, 0, "Silvermont_core_V14.json"},
+ {0x4C, 0x0, 0, "Silvermont_core_V14.json"},
+ {0x5C, 0x0, 0, "goldmont_core_v13.json"},
+ {0x5F, 0x0, 0, "goldmont_core_v13.json"},
+ {0x1C, 0x0, 0, "Bonnell_core_V4.json"},
+ {0x26, 0x0, 0, "Bonnell_core_V4.json"},
+ {0x27, 0x0, 0, "Bonnell_core_V4.json"},
+ {0x36, 0x0, 0, "Bonnell_core_V4.json"},
+ {0x35, 0x0, 0, "Bonnell_core_V4.json"},
+ {0x2A, 0x0, 0, "sandybridge_core_v16.json"},
+ {0x2D, 0x0, 0, "Jaketown_core_V20.json"},
+ {0x3A, 0x0, 0, "ivybridge_core_v21.json"},
+ {0x3E, 0x0, 0, "ivytown_core_v20.json"},
+ {0x3C, 0x0, 0, "haswell_core_v28.json"},
+ {0x45, 0x0, 0, "haswell_core_v28.json"},
+ {0x46, 0x0, 0, "haswell_core_v28.json"},
+ {0x3F, 0x0, 0, "haswellx_core_v20.json"},
+ {0x3D, 0x0, 0, "broadwell_core_v23.json"},
+ {0x47, 0x0, 0, "broadwell_core_v23.json"},
+ {0x4F, 0x0, 0, "broadwellx_core_v14.json"},
+ {0x56, 0x0, 0, "broadwellde_core_v7.json"},
+ {0x4E, 0x0, 0, "skylake_core_v42.json"},
+ {0x5E, 0x0, 0, "skylake_core_v42.json"},
+ {0x8E, 0x0, 0, "skylake_core_v42.json"},
+ {0x9E, 0x0, 0, "skylake_core_v42.json"},
+ {0x57, 0x0, 0, "KnightsLanding_core_V9.json"},
+ {0x85, 0x0, 0, "KnightsLanding_core_V9.json"},
+ {0x55, 0x0, 1, "skylakex_core_v1.12.json"},
+ {0x55, 0x1, 1, "skylakex_core_v1.12.json"},
+ {0x55, 0x2, 1, "skylakex_core_v1.12.json"},
+ {0x55, 0x3, 1, "skylakex_core_v1.12.json"},
+ {0x55, 0x4, 1, "skylakex_core_v1.12.json"},
+ {0x55, 0x5, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0x6, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0x7, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0x8, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0x9, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0xA, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0xB, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0xC, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0xD, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0xE, 1, "cascadelakex_core_v1.00.json"},
+ {0x55, 0xF, 1, "cascadelakex_core_v1.00.json"},
+ {0x7A, 0x0, 0, "goldmontplus_core_v1.01.json"},
+};
+
static void
set_perfmon_json_path ()
{
perfmon_json_path = (char *) s;
}
-#define foreach_cpuid_table \
-_(0x0306C3, haswell_core_v28.json) \
-_(0x0306F2, haswell_core_v28.json) \
-_(0x0406E3, skylake_core_v42.json) \
-_(0x0506E3, skylake_core_v42.json)
-
static inline u32
get_cpuid (void)
{
u32 cpuid;
uword *ht;
int found_a_table = 0;
+ int i;
+ u8 model, stepping;
pm->vlib_main = vm;
pm->vnet_main = vnet_get_main ();
pm->log_class = vlib_log_register_class ("perfmon", 0);
/* Default data collection interval */
- pm->timeout_interval = 3.0;
- vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1);
- vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1);
- vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1);
+ pm->timeout_interval = 2.0; /* seconds */
+ vec_validate (pm->pm_fds, 1);
+ vec_validate (pm->pm_fds[0], vec_len (vlib_mains) - 1);
+ vec_validate (pm->pm_fds[1], vec_len (vlib_mains) - 1);
+ vec_validate (pm->perf_event_pages, 1);
+ vec_validate (pm->perf_event_pages[0], vec_len (vlib_mains) - 1);
+ vec_validate (pm->perf_event_pages[1], vec_len (vlib_mains) - 1);
+ vec_validate (pm->rdpmc_indices, 1);
+ vec_validate (pm->rdpmc_indices[0], vec_len (vlib_mains) - 1);
+ vec_validate (pm->rdpmc_indices[1], vec_len (vlib_mains) - 1);
pm->page_size = getpagesize ();
ht = pm->perfmon_table = 0;
cpuid = get_cpuid ();
- if (0)
+ for (i = 0; i < ARRAY_LEN (fms_table); i++)
{
- }
-#define _(id,table) \
- else if (cpuid == id) \
- { \
- vlib_log_debug (pm->log_class, "Found table %s", #table); \
- ht = perfmon_parse_table (pm, perfmon_json_path, #table); \
- found_a_table = 1; \
- }
- foreach_cpuid_table;
-#undef _
+ model = ((cpuid >> 12) & 0xf0) | ((cpuid >> 4) & 0xf);
+ stepping = cpuid & 0xf;
+ if (fms_table[i].model != model)
+ continue;
+
+ if (fms_table[i].has_stepping)
+ {
+ if (fms_table[i].stepping != stepping)
+ continue;
+ }
+
+ found_a_table = 1;
+ ht = perfmon_parse_table (pm, perfmon_json_path, fms_table[i].filename);
+ break;
+ }
pm->perfmon_table = ht;
- if (found_a_table == 0)
- vlib_log_err (pm->log_class, "No table for cpuid %x", cpuid);
+ if (found_a_table == 0 || pm->perfmon_table == 0 || hash_elts (ht) == 0)
+ {
+ vlib_log_err (pm->log_class, "No table for cpuid %x", cpuid);
+ vlib_log_err (pm->log_class, " model %x, stepping %x",
+ model, stepping);
+ }
return error;
}
unformat_input_t * input, vlib_cli_command_t * cmd)
{
perfmon_main_t *pm = &perfmon_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ int num_threads = 1 + vtm->n_threads;
unformat_input_t _line_input, *line_input = &_line_input;
perfmon_event_config_t ec;
+ f64 delay;
u32 timeout_seconds;
u32 deadman;
+ int last_set;
+ clib_error_t *error;
- vec_reset_length (pm->events_to_collect);
+ vec_reset_length (pm->single_events_to_collect);
+ vec_reset_length (pm->paired_events_to_collect);
pm->ipc_event_index = ~0;
pm->mispredict_event_index = ~0;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, "counter names required...");
+ clib_bitmap_zero (pm->thread_bitmap);
+
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (line_input, "timeout %u", &timeout_seconds))
ec.name = "instructions";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS;
- pm->ipc_event_index = vec_len (pm->events_to_collect);
- vec_add1 (pm->events_to_collect, ec);
+ pm->ipc_event_index = vec_len (pm->paired_events_to_collect);
+ vec_add1 (pm->paired_events_to_collect, ec);
ec.name = "cpu-cycles";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_CPU_CYCLES;
- vec_add1 (pm->events_to_collect, ec);
+ vec_add1 (pm->paired_events_to_collect, ec);
}
else if (unformat (line_input, "branch-mispredict-rate"))
{
ec.name = "branch-misses";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES;
- pm->mispredict_event_index = vec_len (pm->events_to_collect);
- vec_add1 (pm->events_to_collect, ec);
+ pm->mispredict_event_index = vec_len (pm->paired_events_to_collect);
+ vec_add1 (pm->paired_events_to_collect, ec);
ec.name = "branches";
ec.pe_type = PERF_TYPE_HARDWARE;
ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
- vec_add1 (pm->events_to_collect, ec);
+ vec_add1 (pm->paired_events_to_collect, ec);
}
+ else if (unformat (line_input, "threads %U",
+ unformat_bitmap_list, &pm->thread_bitmap))
+ ;
+ else if (unformat (line_input, "thread %U",
+ unformat_bitmap_list, &pm->thread_bitmap))
+ ;
else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec))
{
- vec_add1 (pm->events_to_collect, ec);
+ vec_add1 (pm->single_events_to_collect, ec);
}
#define _(type,event,str) \
else if (unformat (line_input, str)) \
ec.name = str; \
ec.pe_type = type; \
ec.pe_config = event; \
- vec_add1 (pm->events_to_collect, ec); \
+ vec_add1 (pm->single_events_to_collect, ec); \
}
foreach_perfmon_event
#undef _
else
- return clib_error_return (0, "unknown input '%U'",
- format_unformat_error, line_input);
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return error;
+ }
}
- if (vec_len (pm->events_to_collect) == 0)
+ unformat_free (line_input);
+
+ last_set = clib_bitmap_last_set (pm->thread_bitmap);
+ if (last_set != ~0 && last_set >= num_threads)
+ return clib_error_return (0, "thread %d does not exist", last_set);
+
+ /* Stick paired events at the front of the (unified) list */
+ if (vec_len (pm->paired_events_to_collect) > 0)
+ {
+ perfmon_event_config_t *tmp;
+ /* first 2n events are pairs... */
+ vec_append (pm->paired_events_to_collect, pm->single_events_to_collect);
+ tmp = pm->single_events_to_collect;
+ pm->single_events_to_collect = pm->paired_events_to_collect;
+ pm->paired_events_to_collect = tmp;
+ }
+
+ if (vec_len (pm->single_events_to_collect) == 0)
return clib_error_return (0, "no events specified...");
+ /* Figure out how long data collection will take */
+ delay =
+ ((f64) vec_len (pm->single_events_to_collect)) * pm->timeout_interval;
+ delay /= 2.0; /* collect 2 stats at once */
+
vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds",
- vec_len (pm->events_to_collect),
- (f64) (vec_len (pm->events_to_collect))
- * pm->timeout_interval);
+ vec_len (pm->single_events_to_collect), delay);
vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index,
PERFMON_START, 0);
/* Coarse-grained wait */
- vlib_process_suspend (vm,
- ((f64) (vec_len (pm->events_to_collect)
- * pm->timeout_interval)));
+ vlib_process_suspend (vm, delay);
deadman = 0;
/* Reasonable to guess that collection may not be quite done... */
VLIB_CLI_COMMAND (set_pmc_command, static) =
{
.path = "set pmc",
- .short_help = "set pmc c1 [..., use \"show pmc events\"]",
+ .short_help = "set pmc [threads n,n1-n2] c1... [see \"show pmc events\"]",
.function = set_pmc_command_fn,
.is_mp_safe = 1,
};
if (i == pm->ipc_event_index)
{
f64 ipc_rate;
- ASSERT (i + 1 < vec_len (c->counter_names));
+ ASSERT ((i + 1) < vec_len (c->counter_names));
if (c->counter_values[i + 1] > 0)
ipc_rate = (f64) c->counter_values[i]