From 53fe4a79269671fd37bf8a1fbb147bcc99b04fab Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Sat, 26 Jan 2019 09:50:26 -0500 Subject: [PATCH] perfmon: collect data on selected thread(s) Add missing pre-input node runtime fork and refork code. unix-epoll-input runs on all threads; each instance needs its own runtime stats. Change-Id: I16b02e42d0c95f863161176c4bb9f9917bef809d Signed-off-by: Dave Barach --- src/plugins/perfmon/perfmon.c | 28 +++++++++++-- src/plugins/perfmon/perfmon.h | 3 ++ src/plugins/perfmon/perfmon_periodic.c | 73 +++++++++++++++++++++++++++------- src/vlib/main.c | 8 ---- src/vlib/threads.c | 44 +++++++++++++++++++- 5 files changed, 129 insertions(+), 27 deletions(-) diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c index 359555705aa..7f621a1e671 100644 --- a/src/plugins/perfmon/perfmon.c +++ b/src/plugins/perfmon/perfmon.c @@ -301,11 +301,15 @@ set_pmc_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { perfmon_main_t *pm = &perfmon_main; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + int num_threads = 1 + vtm->n_threads; unformat_input_t _line_input, *line_input = &_line_input; perfmon_event_config_t ec; f64 delay; u32 timeout_seconds; u32 deadman; + int last_set; + clib_error_t *error; vec_reset_length (pm->single_events_to_collect); vec_reset_length (pm->paired_events_to_collect); @@ -315,6 +319,8 @@ set_pmc_command_fn (vlib_main_t * vm, if (!unformat_user (input, unformat_line_input, line_input)) return clib_error_return (0, "counter names required..."); + clib_bitmap_zero (pm->thread_bitmap); + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "timeout %u", &timeout_seconds)) @@ -343,6 +349,12 @@ set_pmc_command_fn (vlib_main_t * vm, ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; vec_add1 (pm->paired_events_to_collect, ec); } + else if (unformat (line_input, "threads %U", + unformat_bitmap_list, &pm->thread_bitmap)) + ; + else if (unformat (line_input, "thread %U", + unformat_bitmap_list, &pm->thread_bitmap)) + ; else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec)) { vec_add1 (pm->single_events_to_collect, ec); @@ -358,10 +370,20 @@ set_pmc_command_fn (vlib_main_t * vm, foreach_perfmon_event #undef _ else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } } + unformat_free (line_input); + + last_set = clib_bitmap_last_set (pm->thread_bitmap); + if (last_set != ~0 && last_set >= num_threads) + return clib_error_return (0, "thread %d does not exist", last_set); + /* Stick paired events at the front of the (unified) list */ if (vec_len (pm->paired_events_to_collect) > 0) { @@ -410,7 +432,7 @@ set_pmc_command_fn (vlib_main_t * vm, VLIB_CLI_COMMAND (set_pmc_command, static) = { .path = "set pmc", - .short_help = "set pmc c1 [..., use \"show pmc events\"]", + .short_help = "set pmc [threads n,n1-n2] c1... [see \"show pmc events\"]", .function = set_pmc_command_fn, .is_mp_safe = 1, }; diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h index 9663dae36d1..9c4c34e36c1 100644 --- a/src/plugins/perfmon/perfmon.h +++ b/src/plugins/perfmon/perfmon.h @@ -121,6 +121,9 @@ typedef struct /* Current perf_event file descriptors, per thread */ int **pm_fds; + /* thread bitmap */ + uword *thread_bitmap; + /* Logging */ vlib_log_class_t log_class; diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c index ccf3e9eed81..0811439d7eb 100644 --- a/src/plugins/perfmon/perfmon_periodic.c +++ b/src/plugins/perfmon/perfmon_periodic.c @@ -21,6 +21,7 @@ #include #include +/* "not in glibc" */ static long perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) @@ -114,6 +115,7 @@ enable_current_events (perfmon_main_t * pm) u32 my_thread_index = vm->thread_index; u32 index; int i, limit = 1; + int cpu; if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect)) limit = 2; @@ -140,7 +142,9 @@ enable_current_events (perfmon_main_t * pm) pe.exclude_hv = 1; } - fd = perf_event_open (&pe, 0, -1, -1, 0); + cpu = vm->cpu_index; + + fd = perf_event_open (&pe, 0, cpu, -1, 0); if (fd == -1) { clib_unix_warning ("event open: type %d config %d", c->pe_type, @@ -237,25 +241,38 @@ static void start_event (perfmon_main_t * pm, f64 now, uword event_data) { int i; + int last_set; + int all = 0; pm->current_event = 0; + if (vec_len (pm->single_events_to_collect) == 0) { pm->state = PERFMON_STATE_OFF; return; } + + last_set = clib_bitmap_last_set (pm->thread_bitmap); + all = (last_set == ~0); + pm->state = PERFMON_STATE_RUNNING; clear_counters (pm); - /* Start collection on this thread */ - enable_current_events (pm); + /* Start collection on thread 0? */ + if (all || clib_bitmap_get (pm->thread_bitmap, 0)) + { + /* Start collection on this thread */ + enable_current_events (pm); + } /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; - vlib_mains[i]->worker_thread_main_loop_callback = (void *) - worker_thread_start_event; + + if (all || clib_bitmap_get (pm->thread_bitmap, i)) + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_start_event; } } @@ -397,23 +414,46 @@ scrape_and_clear_counters (perfmon_main_t * pm) } static void -handle_timeout (perfmon_main_t * pm, f64 now) +handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now) { int i; - disable_events (pm); + int last_set, all; + + last_set = clib_bitmap_last_set (pm->thread_bitmap); + all = (last_set == ~0); + + if (all || clib_bitmap_get (pm->thread_bitmap, 0)) + disable_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; - vlib_mains[i]->worker_thread_main_loop_callback = (void *) - worker_thread_stop_event; + if (all || clib_bitmap_get (pm->thread_bitmap, i)) + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_stop_event; } - /* Short delay to make sure workers have stopped collection */ + /* Make sure workers have stopped collection */ if (i > 1) - vlib_process_suspend (pm->vlib_main, 1e-3); + { + f64 deadman = vlib_time_now (vm) + 1.0; + + for (i = 1; i < vec_len (vlib_mains); i++) + { + /* Has the worker actually stopped collecting data? */ + while (vlib_mains[i]->worker_thread_main_loop_callback) + { + if (vlib_time_now (vm) > deadman) + { + clib_warning ("Thread %d deadman timeout!", i); + break; + } + vlib_process_suspend (pm->vlib_main, 1e-3); + } + } + } scrape_and_clear_counters (pm); pm->current_event += pm->n_active; if (pm->current_event >= vec_len (pm->single_events_to_collect)) @@ -422,15 +462,18 @@ handle_timeout (perfmon_main_t * pm, f64 now) pm->state = PERFMON_STATE_OFF; return; } - enable_current_events (pm); + + if (all || clib_bitmap_get (pm->thread_bitmap, 0)) + enable_current_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; - vlib_mains[i]->worker_thread_main_loop_callback = (void *) - worker_thread_start_event; + if (all || clib_bitmap_get (pm->thread_bitmap, i)) + vlib_mains[i]->worker_thread_main_loop_callback = (void *) + worker_thread_start_event; } } @@ -464,7 +507,7 @@ perfmon_periodic_process (vlib_main_t * vm, /* Handle timeout */ case ~0: - handle_timeout (pm, now); + handle_timeout (vm, pm, now); break; default: diff --git a/src/vlib/main.c b/src/vlib/main.c index 0e480fabe2a..3048a0d72de 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -1105,8 +1105,6 @@ dispatch_pcap_trace (vlib_main_t * vm, } } -u64 oingo0, oingo1; - static_always_inline u64 dispatch_node (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1207,12 +1205,6 @@ dispatch_node (vlib_main_t * vm, vm->main_loop_vectors_processed += n; vm->main_loop_nodes_processed += n > 0; - if (pmc_delta[0] || pmc_delta[1]) - { - oingo0 += pmc_delta[0]; - oingo1 += pmc_delta[1]; - } - v = vlib_node_runtime_update_stats (vm, node, /* n_calls */ 1, /* n_vectors */ n, diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 45e4d89b7ab..e6ac6db543a 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -801,7 +801,7 @@ start_workers (vlib_main_t * vm) /* fork the frame dispatch queue */ nm_clone->pending_frames = 0; - vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */ + vec_validate (nm_clone->pending_frames, 10); _vec_len (nm_clone->pending_frames) = 0; /* fork nodes */ @@ -850,6 +850,21 @@ start_workers (vlib_main_t * vm) n->runtime_data_bytes)); } + nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = + vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], + CLIB_CACHE_LINE_BYTES); + vec_foreach (rt, + nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + rt->thread_index = vm_clone->thread_index; + /* copy initial runtime_data from node */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); + } + nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); @@ -1173,6 +1188,33 @@ vlib_worker_thread_node_refork (void) vec_free (old_rt); + /* re-clone pre-input nodes */ + old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]; + nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = + vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], + CLIB_CACHE_LINE_BYTES); + + vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + rt->thread_index = vm_clone->thread_index; + /* copy runtime_data, will be overwritten later for existing rt */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy_fast (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); + } + + for (j = 0; j < vec_len (old_rt); j++) + { + rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); + rt->state = old_rt[j].state; + clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, + VLIB_NODE_RUNTIME_DATA_SIZE); + } + + vec_free (old_rt); + nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); } -- 2.16.6