From 4d1a866aff6ceb03025990b6e60b42faf09ef486 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Mon, 10 Sep 2018 12:31:15 -0400 Subject: [PATCH] X86_64 perf counter plugin Change-Id: Ie5a00c15ee9536cc61afab57f6cadc1aa1972f3c Signed-off-by: Dave Barach --- build-root/scripts/find-dev-contents | 5 + src/plugins/perfmon/CMakeLists.txt | 38 ++ src/plugins/perfmon/parse_util.c | 235 +++++++++++++ src/plugins/perfmon/perfmon.c | 615 +++++++++++++++++++++++++++++++++ src/plugins/perfmon/perfmon.h | 145 ++++++++ src/plugins/perfmon/perfmon_periodic.c | 433 +++++++++++++++++++++++ src/vlib/main.c | 83 ++++- src/vlib/main.h | 9 + src/vlib/node.h | 5 + src/vlib/node_cli.c | 32 +- src/vlibapi/node_serialize.c | 11 +- src/vppinfra/CMakeLists.txt | 1 + src/vppinfra/pmc.h | 46 +++ 13 files changed, 1632 insertions(+), 26 deletions(-) create mode 100644 src/plugins/perfmon/CMakeLists.txt create mode 100644 src/plugins/perfmon/parse_util.c create mode 100644 src/plugins/perfmon/perfmon.c create mode 100644 src/plugins/perfmon/perfmon.h create mode 100644 src/plugins/perfmon/perfmon_periodic.c create mode 100644 src/vppinfra/pmc.h diff --git a/build-root/scripts/find-dev-contents b/build-root/scripts/find-dev-contents index 623121ef95d..73ba95bc79c 100755 --- a/build-root/scripts/find-dev-contents +++ b/build-root/scripts/find-dev-contents @@ -28,6 +28,11 @@ do fi done +# Perf monitor .json files +for i in $(find ${1}/vpp/share/vpp/plugins/perfmon -name *.json -type f -print); do + echo ../${i} /usr/share/vpp/plugins/perfmon >> ${2} +done + # sample plugin paths=`(cd ..; find src/examples/sample-plugin -type f -print | grep -v autom4te)` diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt new file mode 100644 index 00000000000..30e1f2caf3e --- /dev/null +++ b/src/plugins/perfmon/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (c) 2018 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(perfmon + SOURCES + perfmon.c + perfmon_periodic.c + parse_util.c +) + +# Reenable / extend when .json file license issue fixed +# +# set (PERFMON_JSON_FILES +# haswell_core_v28.json +# haswellx_core_v20.json +# ivybridge_core_v21.json +# ivytown_core_v20.json +# jaketown_core_v20.json +# sandybridge_core_v16.json +# skylake_core_v42.json +# skylakex_core_v1.12.json +# ) + +# install( +# FILES ${PERFMON_JSON_FILES} +# DESTINATION share/vpp/plugins/perfmon +# COMPONENT vpp-dev +# ) diff --git a/src/plugins/perfmon/parse_util.c b/src/plugins/perfmon/parse_util.c new file mode 100644 index 00000000000..436acaa6de0 --- /dev/null +++ b/src/plugins/perfmon/parse_util.c @@ -0,0 +1,235 @@ +/* + * parse_util.c - halfhearted json parser + * + * Copyright (c) 2018 Cisco Systems and/or its affiliates + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +typedef enum +{ + STATE_START, + STATE_READ_NAME, + STATE_READ_VALUE, +} parse_state_t; + +static u8 * +downcase (u8 * s) +{ + u8 *rv = 0; + u8 c; + int i; + + for (i = 0; i < vec_len (s); i++) + { + c = s[i]; + if (c >= 'A' && c <= 'Z') + c = c + ('a' - 'A'); + vec_add1 (rv, c); + } + return (rv); +} + +uword * +perfmon_parse_table (perfmon_main_t * pm, char *path, char *table_name) +{ + u8 *cp; + u8 *event_name; + int state = STATE_START; + uword *ht; + name_value_pair_t *nvp = 0; + name_value_pair_t **nvps = 0; + u8 *v; + int i; + u8 *json_filename; + clib_error_t *error; + + /* Create the name/value hash table in any case... */ + ht = hash_create_string (0, sizeof (uword)); + + json_filename = format (0, "%s/%s%c", path, table_name, 0); + + vlib_log_debug (pm->log_class, "Try to read perfmon events from %s", + json_filename); + + error = unix_proc_file_contents ((char *) json_filename, &cp); + + if (error) + { + vlib_log_err (pm->log_class, + "Failed to read CPU-specific counter table"); + vlib_log_err (pm->log_class, + "Download from https://download.01.org/perfmon, " + "and install as %s", json_filename); + vec_free (json_filename); + clib_error_report (error); + return ht; + } + vlib_log_debug (pm->log_class, "Read OK, parse the event table..."); + vec_free (json_filename); + +again: + while (*cp) + { + switch (state) + { + case STATE_START: + while (*cp && *cp != '{' && *cp != '}' && *cp != ',') + cp++; + if (*cp == 0) + goto done; + + /* Look for a new event */ + if (*cp == '{') + { + if (*cp == 0) + { + error: + clib_warning ("parse fail"); + hash_free (ht); + return 0; + } + cp++; + state = STATE_READ_NAME; + goto again; + } + else if (*cp == '}') /* end of event */ + { + /* Look for the "EventName" nvp */ + for (i = 0; i < vec_len (nvps); i++) + { + nvp = nvps[i]; + if (!strncmp ((char *) nvp->name, "EventName", 9)) + { + event_name = nvp->value; + goto found; + } + } + /* no name? */ + for (i = 0; i < vec_len (nvps); i++) + { + vec_free (nvps[i]->name); + vec_free (nvps[i]->value); + } + vec_free (nvps); + cp++; + goto again; + + found: + event_name = downcase (event_name); + hash_set_mem (ht, event_name, nvps); + nvp = 0; + nvps = 0; + cp++; + goto again; + } + else if (*cp == ',') /* punctuation */ + { + cp++; + goto again; + } + + case STATE_READ_NAME: + vec_validate (nvp, 0); + v = 0; + while (*cp && *cp != '"') + cp++; + + if (*cp == 0) + { + vec_free (nvp); + goto error; + } + + cp++; + while (*cp && *cp != '"') + { + vec_add1 (v, *cp); + cp++; + } + if (*cp == 0) + { + vec_free (v); + goto error; + } + cp++; + vec_add1 (v, 0); + nvp->name = v; + state = STATE_READ_VALUE; + goto again; + + case STATE_READ_VALUE: + while (*cp && *cp != ':') + cp++; + if (*cp == 0) + { + vec_free (nvp->name); + goto error; + } + while (*cp && *cp != '"') + cp++; + if (*cp == 0) + { + vec_free (nvp->name); + goto error; + } + else + cp++; + v = 0; + while (*cp && *cp != '"') + { + vec_add1 (v, *cp); + cp++; + } + if (*cp == 0) + { + vec_free (nvp->name); + vec_free (v); + goto error; + } + vec_add1 (v, 0); + nvp->value = v; + vec_add1 (nvps, nvp); + while (*cp && *cp != ',' && *cp != '}') + cp++; + if (*cp == 0) + { + vec_free (nvp->name); + vec_free (nvp->value); + goto error; + } + else if (*cp == '}') + state = STATE_START; + else + { + cp++; + state = STATE_READ_NAME; + } + nvp = 0; + goto again; + } + } + +done: + return (ht); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c new file mode 100644 index 00000000000..61dbe5cd918 --- /dev/null +++ b/src/plugins/perfmon/perfmon.c @@ -0,0 +1,615 @@ +/* + * perfmon.c - skeleton vpp engine plug-in + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +perfmon_main_t perfmon_main; + +static char *perfmon_json_path = "/usr/share/vpp/plugins/perfmon"; + +static void +set_perfmon_json_path () +{ + char *p, path[PATH_MAX]; + int rv; + u8 *s; + + /* find executable path */ + if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1) + return; + + /* readlink doesn't provide null termination */ + path[rv] = 0; + + /* strip filename */ + if ((p = strrchr (path, '/')) == 0) + return; + *p = 0; + + /* strip bin/ */ + if ((p = strrchr (path, '/')) == 0) + return; + *p = 0; + + /* cons up the .json file path */ + s = format (0, "%s/share/vpp/plugins/perfmon", path); + vec_add1 (s, 0); + perfmon_json_path = (char *) s; +} + +#define foreach_cpuid_table \ +_(0x0306C3, haswell_core_v28.json) \ +_(0x0306F2, haswell_core_v28.json) \ +_(0x0406E3, skylake_core_v42.json) \ +_(0x0506E3, skylake_core_v42.json) + +static inline u32 +get_cpuid (void) +{ +#if defined(__x86_64__) + u32 cpuid; + asm volatile ("mov $1, %%eax; cpuid; mov %%eax, %0":"=r" (cpuid)::"%eax", + "%edx", "%ecx", "%rbx"); + return cpuid; +#else + return 0; +#endif +} + +static clib_error_t * +perfmon_init (vlib_main_t * vm) +{ + perfmon_main_t *pm = &perfmon_main; + clib_error_t *error = 0; + u32 cpuid; + uword *ht; + int found_a_table = 0; + + pm->vlib_main = vm; + pm->vnet_main = vnet_get_main (); + + pm->capture_by_thread_and_node_name = + hash_create_string (0, sizeof (uword)); + + pm->log_class = vlib_log_register_class ("perfmon", 0); + + /* Default data collection interval */ + pm->timeout_interval = 3.0; + vec_validate (pm->pm_fds, vec_len (vlib_mains) - 1); + vec_validate (pm->perf_event_pages, vec_len (vlib_mains) - 1); + vec_validate (pm->rdpmc_indices, vec_len (vlib_mains) - 1); + pm->page_size = getpagesize (); + + ht = pm->perfmon_table = 0; + + set_perfmon_json_path (); + + cpuid = get_cpuid (); + + if (0) + { + } +#define _(id,table) \ + else if (cpuid == id) \ + { \ + vlib_log_debug (pm->log_class, "Found table %s", #table); \ + ht = perfmon_parse_table (pm, perfmon_json_path, #table); \ + found_a_table = 1; \ + } + foreach_cpuid_table; +#undef _ + + pm->perfmon_table = ht; + + if (found_a_table == 0) + vlib_log_err (pm->log_class, "No table for cpuid %x", cpuid); + + return error; +} + +VLIB_INIT_FUNCTION (perfmon_init); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = +{ + .version = VPP_BUILD_VER, + .description = "Performance monitor plugin", +#if !defined(__x86_64__) + .default_disabled = 1, +#endif +}; +/* *INDENT-ON* */ + +static uword +atox (u8 * s) +{ + uword rv = 0; + + while (*s) + { + if (*s >= '0' && *s <= '9') + rv = (rv << 4) | (*s - '0'); + else if (*s >= 'a' && *s <= 'f') + rv = (rv << 4) | (*s - 'a' + 10); + else if (*s >= 'A' && *s <= 'A') + rv = (rv << 4) | (*s - 'A' + 10); + else if (*s == 'x') + ; + else + break; + s++; + } + return rv; +} + +static uword +unformat_processor_event (unformat_input_t * input, va_list * args) +{ + perfmon_main_t *pm = va_arg (*args, perfmon_main_t *); + perfmon_event_config_t *ep = va_arg (*args, perfmon_event_config_t *); + u8 *s = 0; + name_value_pair_t **nvps, *nvp; + hash_pair_t *hp; + int i; + int set_values = 0; + u32 pe_config = 0; + + if (pm->perfmon_table == 0) + return 0; + + if (!unformat (input, "%s", &s)) + return 0; + + hp = hash_get_pair_mem (pm->perfmon_table, s); + + vec_free (s); + + if (hp == 0) + return 0; + + nvps = (name_value_pair_t **) (hp->value[0]); + + for (i = 0; i < vec_len (nvps); i++) + { + nvp = nvps[i]; + if (!strncmp ((char *) nvp->name, "EventCode", 9)) + { + pe_config |= atox (nvp->value); + set_values++; + } + else if (!strncmp ((char *) nvp->name, "UMask", 5)) + { + pe_config |= (atox (nvp->value) << 8); + set_values++; + } + if (set_values == 2) + break; + } + + if (set_values != 2) + { + clib_warning ("BUG: only found %d values", set_values); + return 0; + } + + ep->name = (char *) hp->key; + ep->pe_type = PERF_TYPE_RAW; + ep->pe_config = pe_config; + return 1; +} + +static clib_error_t * +set_pmc_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + perfmon_main_t *pm = &perfmon_main; + unformat_input_t _line_input, *line_input = &_line_input; + perfmon_event_config_t ec; + u32 timeout_seconds; + u32 deadman; + + vec_reset_length (pm->events_to_collect); + pm->ipc_event_index = ~0; + pm->mispredict_event_index = ~0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "counter names required..."); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "timeout %u", &timeout_seconds)) + pm->timeout_interval = (f64) timeout_seconds; + else if (unformat (line_input, "instructions-per-clock")) + { + ec.name = "instructions"; + ec.pe_type = PERF_TYPE_HARDWARE; + ec.pe_config = PERF_COUNT_HW_INSTRUCTIONS; + pm->ipc_event_index = vec_len (pm->events_to_collect); + vec_add1 (pm->events_to_collect, ec); + ec.name = "cpu-cycles"; + ec.pe_type = PERF_TYPE_HARDWARE; + ec.pe_config = PERF_COUNT_HW_CPU_CYCLES; + vec_add1 (pm->events_to_collect, ec); + } + else if (unformat (line_input, "branch-mispredict-rate")) + { + ec.name = "branch-misses"; + ec.pe_type = PERF_TYPE_HARDWARE; + ec.pe_config = PERF_COUNT_HW_BRANCH_MISSES; + pm->mispredict_event_index = vec_len (pm->events_to_collect); + vec_add1 (pm->events_to_collect, ec); + ec.name = "branches"; + ec.pe_type = PERF_TYPE_HARDWARE; + ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + vec_add1 (pm->events_to_collect, ec); + } + else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec)) + { + vec_add1 (pm->events_to_collect, ec); + } +#define _(type,event,str) \ + else if (unformat (line_input, str)) \ + { \ + ec.name = str; \ + ec.pe_type = type; \ + ec.pe_config = event; \ + vec_add1 (pm->events_to_collect, ec); \ + } + foreach_perfmon_event +#undef _ + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (vec_len (pm->events_to_collect) == 0) + return clib_error_return (0, "no events specified..."); + + vlib_cli_output (vm, "Start collection for %d events, wait %.2f seconds", + vec_len (pm->events_to_collect), + (f64) (vec_len (pm->events_to_collect)) + * pm->timeout_interval); + + vlib_process_signal_event (pm->vlib_main, perfmon_periodic_node.index, + PERFMON_START, 0); + + /* Coarse-grained wait */ + vlib_process_suspend (vm, + ((f64) (vec_len (pm->events_to_collect) + * pm->timeout_interval))); + + deadman = 0; + /* Reasonable to guess that collection may not be quite done... */ + while (pm->state == PERFMON_STATE_RUNNING) + { + vlib_process_suspend (vm, 10e-3); + if (deadman++ > 200) + { + vlib_cli_output (vm, "DEADMAN: collection still running..."); + break; + } + } + + vlib_cli_output (vm, "Data collection complete..."); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_pmc_command, static) = +{ + .path = "set pmc", + .short_help = "set pmc c1 [..., use \"show pmc events\"]", + .function = set_pmc_command_fn, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static int +capture_name_sort (void *a1, void *a2) +{ + perfmon_capture_t *c1 = a1; + perfmon_capture_t *c2 = a2; + + return strcmp ((char *) c1->thread_and_node_name, + (char *) c2->thread_and_node_name); +} + +static u8 * +format_capture (u8 * s, va_list * args) +{ + perfmon_main_t *pm = va_arg (*args, perfmon_main_t *); + perfmon_capture_t *c = va_arg (*args, perfmon_capture_t *); + int verbose __attribute__ ((unused)) = va_arg (*args, int); + f64 ticks_per_pkt; + int i; + + if (c == 0) + { + s = format (s, "%=40s%=20s%=16s%=16s%=16s", + "Name", "Counter", "Count", "Pkts", "Counts/Pkt"); + return s; + } + + for (i = 0; i < vec_len (c->counter_names); i++) + { + u8 *name; + + if (i == 0) + name = c->thread_and_node_name; + else + { + vec_add1 (s, '\n'); + name = (u8 *) ""; + } + + /* Deal with synthetic events right here */ + if (i == pm->ipc_event_index) + { + f64 ipc_rate; + ASSERT (i + 1 < vec_len (c->counter_names)); + + if (c->counter_values[i + 1] > 0) + ipc_rate = (f64) c->counter_values[i] + / (f64) c->counter_values[i + 1]; + else + ipc_rate = 0.0; + + s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n", + name, "instructions-per-clock", + c->counter_values[i], + c->counter_values[i + 1], ipc_rate); + name = (u8 *) ""; + } + + if (i == pm->mispredict_event_index) + { + f64 mispredict_rate; + ASSERT (i + 1 < vec_len (c->counter_names)); + + if (c->counter_values[i + 1] > 0) + mispredict_rate = (f64) c->counter_values[i] + / (f64) c->counter_values[i + 1]; + else + mispredict_rate = 0.0; + + s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e\n", + name, "branch-mispredict-rate", + c->counter_values[i], + c->counter_values[i + 1], mispredict_rate); + name = (u8 *) ""; + } + + if (c->vectors_this_counter[i]) + ticks_per_pkt = + ((f64) c->counter_values[i]) / ((f64) c->vectors_this_counter[i]); + else + ticks_per_pkt = 0.0; + + s = format (s, "%-40s%+20s%+16llu%+16llu%+16.2e", + name, c->counter_names[i], + c->counter_values[i], + c->vectors_this_counter[i], ticks_per_pkt); + } + return s; +} + +static u8 * +format_generic_events (u8 * s, va_list * args) +{ + int verbose = va_arg (*args, int); + +#define _(type,config,name) \ + if (verbose == 0) \ + s = format (s, "\n %s", name); \ + else \ + s = format (s, "\n %s (%d, %d)", name, type, config); + foreach_perfmon_event; +#undef _ + return s; +} + +typedef struct +{ + u8 *name; + name_value_pair_t **nvps; +} sort_nvp_t; + +static int +sort_nvps_by_name (void *a1, void *a2) +{ + sort_nvp_t *nvp1 = a1; + sort_nvp_t *nvp2 = a2; + + return strcmp ((char *) nvp1->name, (char *) nvp2->name); +} + +static u8 * +format_processor_events (u8 * s, va_list * args) +{ + perfmon_main_t *pm = va_arg (*args, perfmon_main_t *); + int verbose = va_arg (*args, int); + int i, j; + sort_nvp_t *sort_nvps = 0; + sort_nvp_t *sn; + u8 *key; + name_value_pair_t **value; + + /* *INDENT-OFF* */ + hash_foreach_mem (key, value, pm->perfmon_table, + ({ + vec_add2 (sort_nvps, sn, 1); + sn->name = key; + sn->nvps = value; + })); + + vec_sort_with_function (sort_nvps, sort_nvps_by_name); + + if (verbose == 0) + { + for (i = 0; i < vec_len (sort_nvps); i++) + s = format (s, "\n %s ", sort_nvps[i].name); + } + else + { + for (i = 0; i < vec_len (sort_nvps); i++) + { + name_value_pair_t **nvps; + s = format (s, "\n %s:", sort_nvps[i].name); + + nvps = sort_nvps[i].nvps; + + for (j = 0; j < vec_len (nvps); j++) + s = format (s, "\n %s = %s", nvps[j]->name, nvps[j]->value); + } + } + vec_free (sort_nvps); + return s; +} + + +static clib_error_t * +show_pmc_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + perfmon_main_t *pm = &perfmon_main; + int verbose = 0; + int events = 0; + int i; + perfmon_capture_t *c; + perfmon_capture_t *captures = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "events")) + events = 1; + else if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + if (events) + { + vlib_cli_output (vm, "Generic Events %U", + format_generic_events, verbose); + vlib_cli_output (vm, "Synthetic Events"); + vlib_cli_output (vm, " instructions-per-clock"); + vlib_cli_output (vm, " branch-mispredict-rate"); + if (pm->perfmon_table) + vlib_cli_output (vm, "Processor Events %U", + format_processor_events, pm, verbose); + return 0; + } + + if (pm->state == PERFMON_STATE_RUNNING) + { + vlib_cli_output (vm, "Data collection in progress..."); + return 0; + } + + if (pool_elts (pm->capture_pool) == 0) + { + vlib_cli_output (vm, "No data..."); + return 0; + } + + /* *INDENT-OFF* */ + pool_foreach (c, pm->capture_pool, + ({ + vec_add1 (captures, *c); + })); + /* *INDENT-ON* */ + + vec_sort_with_function (captures, capture_name_sort); + + vlib_cli_output (vm, "%U", format_capture, pm, 0 /* header */ , + 0 /* verbose */ ); + + for (i = 0; i < vec_len (captures); i++) + { + c = captures + i; + + vlib_cli_output (vm, "%U", format_capture, pm, c, verbose); + } + + vec_free (captures); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_pmc_command, static) = +{ + .path = "show pmc", + .short_help = "show pmc [verbose]", + .function = show_pmc_command_fn, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +clear_pmc_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + perfmon_main_t *pm = &perfmon_main; + u8 *key; + u32 *value; + + if (pm->state == PERFMON_STATE_RUNNING) + { + vlib_cli_output (vm, "Performance monitor is still running..."); + return 0; + } + + pool_free (pm->capture_pool); + + /* *INDENT-OFF* */ + hash_foreach_mem (key, value, pm->capture_by_thread_and_node_name, + ({ + vec_free (key); + })); + /* *INDENT-ON* */ + hash_free (pm->capture_by_thread_and_node_name); + pm->capture_by_thread_and_node_name = + hash_create_string (0, sizeof (uword)); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (clear_pmc_command, static) = +{ + .path = "clear pmc", + .short_help = "clear the performance monitor counters", + .function = clear_pmc_command_fn, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h new file mode 100644 index 00000000000..47ee471d5fc --- /dev/null +++ b/src/plugins/perfmon/perfmon.h @@ -0,0 +1,145 @@ +/* + * perfmon.h - performance monitor + * + * Copyright (c) 2018 Cisco Systems and/or its affiliates + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_perfmon_h__ +#define __included_perfmon_h__ + +#include +#include +#include +#include + +#include +#include + +#include + +#define foreach_perfmon_event \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, "cpu-cycles") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, "instructions") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, \ + "cache-references") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, "cache-misses") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branches") \ + _(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, "bus-cycles") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, \ + "stall-frontend") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, \ + "stall-backend") \ +_(PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, "ref-cpu-cycles") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, "page-faults") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, "context-switches") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, "cpu-migrations") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, "minor-pagefaults") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, "major-pagefaults") \ +_(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, "emulation-faults") + +typedef struct +{ + char *name; + int pe_type; + int pe_config; +} perfmon_event_config_t; + +typedef enum +{ + PERFMON_STATE_OFF = 0, + PERFMON_STATE_RUNNING, +} perfmon_state_t; + +typedef struct +{ + u8 *thread_and_node_name; + u8 **counter_names; + u64 *counter_values; + u64 *vectors_this_counter; +} perfmon_capture_t; + +typedef struct +{ + u32 cpuid; + const char **table; +} perfmon_cpuid_and_table_t; + +typedef struct +{ + u8 *name; + u8 *value; +} name_value_pair_t; + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + + /* on/off switch for the periodic function */ + volatile u8 state; + + /* capture pool, hash table */ + perfmon_capture_t *capture_pool; + uword *capture_by_thread_and_node_name; + + /* CPU-specific event tables, hash table of selected table (if any) */ + perfmon_cpuid_and_table_t *perfmon_tables; + uword *perfmon_table; + + /* vector of events to collect */ + perfmon_event_config_t *events_to_collect; + + /* Base indices of synthetic event tuples */ + u32 ipc_event_index; + u32 mispredict_event_index; + + /* Length of time to capture a single event */ + f64 timeout_interval; + + /* Current event (index) being collected */ + u32 current_event; + u32 *rdpmc_indices; + /* mmap base / size of (mapped) struct perf_event_mmap_page */ + u8 **perf_event_pages; + u32 page_size; + + /* Current perf_event file descriptors, per thread */ + int *pm_fds; + + /* Logging */ + vlib_log_class_t log_class; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ethernet_main_t *ethernet_main; +} perfmon_main_t; + +extern perfmon_main_t perfmon_main; + +extern vlib_node_registration_t perfmon_periodic_node; +uword *perfmon_parse_table (perfmon_main_t * pm, char *path, char *filename); + +/* Periodic function events */ +#define PERFMON_START 1 + +#endif /* __included_perfmon_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c new file mode 100644 index 00000000000..329a75d7305 --- /dev/null +++ b/src/plugins/perfmon/perfmon_periodic.c @@ -0,0 +1,433 @@ +/* + * perfmon_periodic.c - skeleton plug-in periodic function + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +static long +perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, + int group_fd, unsigned long flags) +{ + int ret; + + ret = syscall (__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); + return ret; +} + +static u64 +read_current_perf_counter (vlib_main_t * vm) +{ + if (vm->perf_counter_id) + return clib_rdpmc (vm->perf_counter_id); + else + { + u64 sw_value; + if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) != + sizeof (sw_value)) + { + clib_unix_warning ("counter read failed, disable collection..."); + vm->vlib_node_runtime_perf_counter_cb = 0; + return 0ULL; + } + return sw_value; + } +} + +static void +clear_counters (perfmon_main_t * pm) +{ + int i, j; + vlib_main_t *vm = pm->vlib_main; + vlib_main_t *stat_vm; + vlib_node_main_t *nm; + vlib_node_t *n; + + vlib_worker_thread_barrier_sync (vm); + + for (j = 0; j < vec_len (vlib_mains); j++) + { + stat_vm = vlib_mains[j]; + if (stat_vm == 0) + continue; + + nm = &stat_vm->node_main; + + /* Clear the node runtime perfmon counters */ + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + vlib_node_sync_stats (stat_vm, n); + } + + /* And clear the node perfmon counters */ + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter_vectors = 0; + n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter_vectors = 0; + } + } + vlib_worker_thread_barrier_release (vm); +} + +static void +enable_current_event (perfmon_main_t * pm) +{ + struct perf_event_attr pe; + int fd; + struct perf_event_mmap_page *p = 0; + perfmon_event_config_t *c; + vlib_main_t *vm = vlib_get_main (); + u32 my_thread_index = vm->thread_index; + + c = vec_elt_at_index (pm->events_to_collect, pm->current_event); + + memset (&pe, 0, sizeof (struct perf_event_attr)); + pe.type = c->pe_type; + pe.size = sizeof (struct perf_event_attr); + pe.config = c->pe_config; + pe.disabled = 1; + pe.pinned = 1; + /* + * Note: excluding the kernel makes the + * (software) context-switch counter read 0... + */ + if (pe.type != PERF_TYPE_SOFTWARE) + { + /* Exclude kernel and hypervisor */ + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + } + + fd = perf_event_open (&pe, 0, -1, -1, 0); + if (fd == -1) + { + clib_unix_warning ("event open: type %d config %d", c->pe_type, + c->pe_config); + return; + } + + if (pe.type != PERF_TYPE_SOFTWARE) + { + p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (fd); + return; + } + } + + if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) + clib_unix_warning ("reset ioctl"); + + if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) + clib_unix_warning ("enable ioctl"); + + /* + * Software event counters - and others not capable of being + * read via the "rdpmc" instruction - will be read + * by system calls. + */ + if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0) + pm->rdpmc_indices[my_thread_index] = 0; + else /* use rdpmc instrs */ + pm->rdpmc_indices[my_thread_index] = p->index - 1; + pm->perf_event_pages[my_thread_index] = (void *) p; + + pm->pm_fds[my_thread_index] = fd; + + /* Enable the main loop counter snapshot mechanism */ + vm->perf_counter_id = pm->rdpmc_indices[my_thread_index]; + vm->perf_counter_fd = fd; + vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter; +} + +static void +disable_event (perfmon_main_t * pm) +{ + vlib_main_t *vm = vlib_get_main (); + u32 my_thread_index = vm->thread_index; + + if (pm->pm_fds[my_thread_index] == 0) + return; + + /* Stop main loop collection */ + vm->vlib_node_runtime_perf_counter_cb = 0; + + if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0) + clib_unix_warning ("disable ioctl"); + + if (pm->perf_event_pages[my_thread_index]) + if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0) + clib_unix_warning ("munmap"); + + (void) close (pm->pm_fds[my_thread_index]); + pm->pm_fds[my_thread_index] = 0; +} + +static void +worker_thread_start_event (vlib_main_t * vm) +{ + perfmon_main_t *pm = &perfmon_main; + + enable_current_event (pm); + vm->worker_thread_main_loop_callback = 0; +} + +static void +worker_thread_stop_event (vlib_main_t * vm) +{ + perfmon_main_t *pm = &perfmon_main; + disable_event (pm); + vm->worker_thread_main_loop_callback = 0; +} + +static void +start_event (perfmon_main_t * pm, f64 now, uword event_data) +{ + int i; + pm->current_event = 0; + if (vec_len (pm->events_to_collect) == 0) + { + pm->state = PERFMON_STATE_OFF; + return; + } + pm->state = PERFMON_STATE_RUNNING; + clear_counters (pm); + + /* Start collection on this thread */ + enable_current_event (pm); + + /* And also on worker threads */ + for (i = 1; i < vec_len (vlib_mains); i++) + { + if (vlib_mains[i] == 0) + continue; + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_start_event; + } +} + +void +scrape_and_clear_counters (perfmon_main_t * pm) +{ + int i, j; + vlib_main_t *vm = pm->vlib_main; + vlib_main_t *stat_vm; + vlib_node_main_t *nm; + vlib_node_t ***node_dups = 0; + vlib_node_t **nodes; + vlib_node_t *n; + perfmon_capture_t *c; + perfmon_event_config_t *current_event; + uword *p; + u8 *counter_name; + u64 counter_value; + u64 vectors_this_counter; + + /* snapshoot the nodes, including pm counters */ + vlib_worker_thread_barrier_sync (vm); + + for (j = 0; j < vec_len (vlib_mains); j++) + { + stat_vm = vlib_mains[j]; + if (stat_vm == 0) + continue; + + nm = &stat_vm->node_main; + + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + vlib_node_sync_stats (stat_vm, n); + } + + nodes = 0; + vec_validate (nodes, vec_len (nm->nodes) - 1); + vec_add1 (node_dups, nodes); + + /* Snapshoot and clear the per-node perfmon counters */ + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + nodes[i] = clib_mem_alloc (sizeof (*n)); + clib_memcpy (nodes[i], n, sizeof (*n)); + n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter_vectors = 0; + n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter_vectors = 0; + } + } + + vlib_worker_thread_barrier_release (vm); + + current_event = pm->events_to_collect + pm->current_event; + + for (j = 0; j < vec_len (vlib_mains); j++) + { + stat_vm = vlib_mains[j]; + if (stat_vm == 0) + continue; + + nodes = node_dups[j]; + + for (i = 0; i < vec_len (nodes); i++) + { + u8 *capture_name; + + n = nodes[i]; + if (n->stats_total.perf_counter_ticks == 0) + { + clib_mem_free (n); + continue; + } + + capture_name = format (0, "t%d-%v%c", j, n->name, 0); + + p = hash_get_mem (pm->capture_by_thread_and_node_name, + capture_name); + + if (p == 0) + { + pool_get (pm->capture_pool, c); + memset (c, 0, sizeof (*c)); + c->thread_and_node_name = capture_name; + hash_set_mem (pm->capture_by_thread_and_node_name, + capture_name, c - pm->capture_pool); + } + else + c = pool_elt_at_index (pm->capture_pool, p[0]); + + /* Snapshoot counters, etc. into the capture */ + counter_name = (u8 *) current_event->name; + counter_value = n->stats_total.perf_counter_ticks - + n->stats_last_clear.perf_counter_ticks; + vectors_this_counter = n->stats_total.perf_counter_vectors - + n->stats_last_clear.perf_counter_vectors; + + vec_add1 (c->counter_names, counter_name); + vec_add1 (c->counter_values, counter_value); + vec_add1 (c->vectors_this_counter, vectors_this_counter); + clib_mem_free (n); + } + vec_free (nodes); + } + vec_free (node_dups); +} + +static void +handle_timeout (perfmon_main_t * pm, f64 now) +{ + int i; + disable_event (pm); + + /* And also on worker threads */ + for (i = 1; i < vec_len (vlib_mains); i++) + { + if (vlib_mains[i] == 0) + continue; + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_stop_event; + } + + /* Short delay to make sure workers have stopped collection */ + if (i > 1) + vlib_process_suspend (pm->vlib_main, 1e-3); + scrape_and_clear_counters (pm); + pm->current_event++; + if (pm->current_event >= vec_len (pm->events_to_collect)) + { + pm->current_event = 0; + pm->state = PERFMON_STATE_OFF; + return; + } + enable_current_event (pm); + + /* And also on worker threads */ + for (i = 1; i < vec_len (vlib_mains); i++) + { + if (vlib_mains[i] == 0) + continue; + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_start_event; + } +} + +static uword +perfmon_periodic_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + perfmon_main_t *pm = &perfmon_main; + f64 now; + uword *event_data = 0; + uword event_type; + int i; + + while (1) + { + if (pm->state == PERFMON_STATE_RUNNING) + vlib_process_wait_for_event_or_clock (vm, pm->timeout_interval); + else + vlib_process_wait_for_event (vm); + + now = vlib_time_now (vm); + + event_type = vlib_process_get_events (vm, (uword **) & event_data); + + switch (event_type) + { + case PERFMON_START: + for (i = 0; i < vec_len (event_data); i++) + start_event (pm, now, event_data[i]); + break; + + /* Handle timeout */ + case ~0: + handle_timeout (pm, now); + break; + + default: + clib_warning ("Unexpected event %d", event_type); + break; + } + vec_reset_length (event_data); + } + return 0; /* or not */ +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (perfmon_periodic_node) = +{ + .function = perfmon_periodic_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "perfmon-periodic-process", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/main.c b/src/vlib/main.c index 62599437f35..273ae5256a5 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -540,29 +540,38 @@ vlib_put_next_frame (vlib_main_t * vm, never_inline void vlib_node_runtime_sync_stats (vlib_main_t * vm, vlib_node_runtime_t * r, - uword n_calls, uword n_vectors, uword n_clocks) + uword n_calls, uword n_vectors, uword n_clocks, + uword n_ticks) { vlib_node_t *n = vlib_get_node (vm, r->node_index); n->stats_total.calls += n_calls + r->calls_since_last_overflow; n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow; n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow; + n->stats_total.perf_counter_ticks += n_ticks + + r->perf_counter_ticks_since_last_overflow; + n->stats_total.perf_counter_vectors += n_vectors + + r->perf_counter_vectors_since_last_overflow; n->stats_total.max_clock = r->max_clock; n->stats_total.max_clock_n = r->max_clock_n; r->calls_since_last_overflow = 0; r->vectors_since_last_overflow = 0; r->clocks_since_last_overflow = 0; + r->perf_counter_ticks_since_last_overflow = 0ULL; + r->perf_counter_vectors_since_last_overflow = 0ULL; } always_inline void __attribute__ ((unused)) vlib_process_sync_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks) + uword n_calls, uword n_vectors, uword n_clocks, + uword n_ticks) { vlib_node_runtime_t *rt = &p->node_runtime; vlib_node_t *n = vlib_get_node (vm, rt->node_index); - vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks); + vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks, + n_ticks); n->stats_total.suspends += p->n_suspends; p->n_suspends = 0; } @@ -588,7 +597,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index); - vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0); + vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0); /* Sync up runtime next frame vector counters with main node structure. */ { @@ -608,45 +617,68 @@ always_inline u32 vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_calls, - uword n_vectors, uword n_clocks) + uword n_vectors, uword n_clocks, + uword n_ticks) { u32 ca0, ca1, v0, v1, cl0, cl1, r; + u32 ptick0, ptick1, pvec0, pvec1; cl0 = cl1 = node->clocks_since_last_overflow; ca0 = ca1 = node->calls_since_last_overflow; v0 = v1 = node->vectors_since_last_overflow; + ptick0 = ptick1 = node->perf_counter_ticks_since_last_overflow; + pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow; ca1 = ca0 + n_calls; v1 = v0 + n_vectors; cl1 = cl0 + n_clocks; + ptick1 = ptick0 + n_ticks; + pvec1 = pvec0 + n_vectors; node->calls_since_last_overflow = ca1; node->clocks_since_last_overflow = cl1; node->vectors_since_last_overflow = v1; + node->perf_counter_ticks_since_last_overflow = ptick1; + node->perf_counter_vectors_since_last_overflow = pvec1; + node->max_clock_n = node->max_clock > n_clocks ? node->max_clock_n : n_vectors; node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks; r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors); - if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) + if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick1 < ptick0) + || (pvec1 < pvec0)) { node->calls_since_last_overflow = ca0; node->clocks_since_last_overflow = cl0; node->vectors_since_last_overflow = v0; - vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks); + node->perf_counter_ticks_since_last_overflow = ptick0; + node->perf_counter_vectors_since_last_overflow = pvec0; + + vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks, + n_ticks); } return r; } +static inline u64 +vlib_node_runtime_perf_counter (vlib_main_t * vm) +{ + if (PREDICT_FALSE (vm->vlib_node_runtime_perf_counter_cb != 0)) + return ((*vm->vlib_node_runtime_perf_counter_cb) (vm)); + return 0ULL; +} + always_inline void vlib_process_update_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks) + uword n_calls, uword n_vectors, uword n_clocks, + uword n_ticks) { vlib_node_runtime_update_stats (vm, &p->node_runtime, - n_calls, n_vectors, n_clocks); + n_calls, n_vectors, n_clocks, n_ticks); } static clib_error_t * @@ -959,15 +991,19 @@ dispatch_node (vlib_main_t * vm, if (1 /* || vm->thread_index == node->thread_index */ ) { - vlib_main_t *stat_vm; - - stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm; + u64 pmc_before, pmc_delta; vlib_elog_main_loop_event (vm, node->node_index, last_time_stamp, frame ? frame->n_vectors : 0, /* is_after */ 0); + /* + * To validate accounting: pmc_before = last_time_stamp + * perf ticks should equal clocks/pkt... + */ + pmc_before = vlib_node_runtime_perf_counter (vm); + /* * Turn this on if you run into * "bad monkey" contexts, and you want to know exactly @@ -990,16 +1026,23 @@ dispatch_node (vlib_main_t * vm, t = clib_cpu_time_now (); + /* + * To validate accounting: pmc_delta = t - pmc_before; + * perf ticks should equal clocks/pkt... + */ + pmc_delta = vlib_node_runtime_perf_counter (vm) - pmc_before; + vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */ 1); vm->main_loop_vectors_processed += n; vm->main_loop_nodes_processed += n > 0; - v = vlib_node_runtime_update_stats (stat_vm, node, + v = vlib_node_runtime_update_stats (vm, node, /* n_calls */ 1, /* n_vectors */ n, - /* n_clocks */ t - last_time_stamp); + /* n_clocks */ t - last_time_stamp, + pmc_delta /* PMC ticks */ ); /* When in interrupt mode and vector rate crosses threshold switch to polling mode. */ @@ -1338,7 +1381,8 @@ dispatch_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp); + /* n_clocks */ t - last_time_stamp, + /* pmc_ticks */ 0ULL); return t; } @@ -1421,7 +1465,8 @@ dispatch_suspended_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp); + /* n_clocks */ t - last_time_stamp, + /* pmc_ticks */ 0ULL); return t; } @@ -1471,6 +1516,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (!nm->interrupt_threshold_vector_length) nm->interrupt_threshold_vector_length = 5; + /* Make sure the performance monitor counter is disabled */ + vm->perf_counter_id = ~0; + /* Start all processes. */ if (is_main) { @@ -1493,6 +1541,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) vlib_worker_thread_barrier_check (); vec_foreach (fqm, tm->frame_queue_mains) vlib_frame_queue_dequeue (vm, fqm); + if (PREDICT_FALSE (vm->worker_thread_main_loop_callback != 0)) + ((void (*)(vlib_main_t *)) vm->worker_thread_main_loop_callback) + (vm); } /* Process pre-input nodes. */ diff --git a/src/vlib/main.h b/src/vlib/main.h index ce42b6ea442..ddc14df5360 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -46,6 +46,7 @@ #include #include #include +#include #include @@ -81,6 +82,11 @@ typedef struct vlib_main_t u32 vector_counts_per_main_loop[2]; u32 node_counts_per_main_loop[2]; + /* Main loop hw / sw performance counters */ + u64 (*vlib_node_runtime_perf_counter_cb) (struct vlib_main_t *); + int perf_counter_id; + int perf_counter_fd; + /* Every so often we switch to the next counter. */ #define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7 @@ -192,6 +198,9 @@ typedef struct vlib_main_t void (*queue_signal_callback) (struct vlib_main_t *); u8 **argv; + /* Top of (worker) dispatch loop callback */ + volatile void (*worker_thread_main_loop_callback) (struct vlib_main_t *); + /* debugging */ volatile int parked_at_barrier; diff --git a/src/vlib/node.h b/src/vlib/node.h index 6efb6f3e4fe..fd245d59def 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -244,6 +244,8 @@ typedef struct u64 calls, vectors, clocks, suspends; u64 max_clock; u64 max_clock_n; + u64 perf_counter_ticks; + u64 perf_counter_vectors; } vlib_node_stats_t; #define foreach_vlib_node_state \ @@ -488,6 +490,9 @@ typedef struct vlib_node_runtime_t u32 vectors_since_last_overflow; /**< Number of vector elements processed by this node. */ + u32 perf_counter_ticks_since_last_overflow; /**< Perf counter ticks */ + u32 perf_counter_vectors_since_last_overflow; /**< Perf counter vectors */ + u32 next_frame_index; /**< Start of next frames for this node. */ diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c index 2523b41c404..062854af5bc 100644 --- a/src/vlib/node_cli.c +++ b/src/vlib/node_cli.c @@ -148,19 +148,25 @@ format_vlib_node_stats (u8 * s, va_list * va) f64 maxc, maxcn; u32 maxn; u32 indent; + u64 pmc_ticks; + f64 pmc_ticks_per_packet; if (!n) { if (max) - return format (s, - "%=30s%=17s%=16s%=16s%=16s%=16s", - "Name", "Max Node Clocks", "Vectors at Max", - "Max Clocks", "Avg Clocks", "Avg Vectors/Call"); + s = format (s, + "%=30s%=17s%=16s%=16s%=16s%=16s", + "Name", "Max Node Clocks", "Vectors at Max", + "Max Clocks", "Avg Clocks", "Avg Vectors/Call"); else - return format (s, - "%=30s%=12s%=16s%=16s%=16s%=16s%=16s", - "Name", "State", "Calls", "Vectors", "Suspends", - "Clocks", "Vectors/Call"); + s = format (s, + "%=30s%=12s%=16s%=16s%=16s%=16s%=16s", + "Name", "State", "Calls", "Vectors", "Suspends", + "Clocks", "Vectors/Call"); + if (vm->perf_counter_id) + s = format (s, "%=16s", "Perf Ticks"); + + return s; } indent = format_get_indent (s); @@ -176,6 +182,13 @@ format_vlib_node_stats (u8 * s, va_list * va) else maxcn = 0.0; + pmc_ticks = n->stats_total.perf_counter_ticks - + n->stats_last_clear.perf_counter_ticks; + if (p > 0) + pmc_ticks_per_packet = (f64) pmc_ticks / (f64) p; + else + pmc_ticks_per_packet = 0.0; + /* Clocks per packet, per call or per suspend. */ x = 0; if (p > 0) @@ -208,6 +221,9 @@ format_vlib_node_stats (u8 * s, va_list * va) s = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns, format_vlib_node_state, vm, n, c, p, d, x, v); + if (pmc_ticks_per_packet > 0.0) + s = format (s, "%16.2e", pmc_ticks_per_packet); + if (ns != n->name) vec_free (ns); diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c index b50d79e2922..0774eea251e 100644 --- a/src/vlibapi/node_serialize.c +++ b/src/vlibapi/node_serialize.c @@ -57,7 +57,7 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector, u8 *namep; u32 name_bytes; uword i, j, k; - u64 l, v, c, d; + u64 l, v, c, d, pmc; state_string_enum_t state_code; serialize_open_vector (sm, vector); @@ -77,6 +77,8 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector, v = n->stats_total.vectors - n->stats_last_clear.vectors; c = n->stats_total.calls - n->stats_last_clear.calls; d = n->stats_total.suspends - n->stats_last_clear.suspends; + pmc = n->stats_total.perf_counter_ticks + - n->stats_last_clear.perf_counter_ticks; state_code = STATE_INTERNAL; @@ -149,6 +151,8 @@ vlib_node_serialize (vlib_main_t * vm, vlib_node_t *** node_dups, u8 * vector, serialize_integer (sm, v, 8); /* Total suspends */ serialize_integer (sm, d, 8); + /* PMC counter */ + serialize_integer (sm, pmc, 8); } else /* no stats */ serialize_likely_small_unsigned_integer (sm, 0); @@ -167,7 +171,7 @@ vlib_node_unserialize (u8 * vector) vlib_node_t **nodes; vlib_node_t ***nodes_by_thread = 0; int i, j, k; - u64 l, v, c, d; + u64 l, v, c, d, pmc; state_string_enum_t state_code; int stats_present; @@ -225,6 +229,9 @@ vlib_node_unserialize (u8 * vector) /* Total suspends */ unserialize_integer (sm, &d, 8); node->stats_total.suspends = d; + /* PMC counter */ + unserialize_integer (sm, &pmc, 8); + node->stats_total.perf_counter_ticks = pmc; } } } diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index 7103d600780..26368493382 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -136,6 +136,7 @@ set(VPPINFRA_HEADERS os.h pipeline.h pool.h + pmc.h ptclosure.h random_buffer.h random.h diff --git a/src/vppinfra/pmc.h b/src/vppinfra/pmc.h new file mode 100644 index 00000000000..258b925127d --- /dev/null +++ b/src/vppinfra/pmc.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_clib_pmc_h +#define included_clib_pmc_h + +#if defined (__x86_64__) + +always_inline u64 +clib_rdpmc (int counter_id) +{ + u32 a, d; + + asm volatile ("rdpmc":"=a" (a), "=d" (d):"c" (counter_id)); + return (u64) a + ((u64) d << (u64) 32); +} + +#else +always_inline u64 +clib_rdpmc (int counter_id) +{ + return 0ULL; +} +#endif /* __aarch64__ */ + +#endif /* included_clib_pmc_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- 2.16.6