Add missing pre-input node runtime fork and refork code.
unix-epoll-input runs on all threads; each instance needs its own
runtime stats.
Change-Id: I16b02e42d0c95f863161176c4bb9f9917bef809d
Signed-off-by: Dave Barach <dave@barachs.net>
unformat_input_t * input, vlib_cli_command_t * cmd)
{
perfmon_main_t *pm = &perfmon_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ int num_threads = 1 + vtm->n_threads;
unformat_input_t _line_input, *line_input = &_line_input;
perfmon_event_config_t ec;
f64 delay;
u32 timeout_seconds;
u32 deadman;
+ int last_set;
+ clib_error_t *error;
vec_reset_length (pm->single_events_to_collect);
vec_reset_length (pm->paired_events_to_collect);
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, "counter names required...");
+ clib_bitmap_zero (pm->thread_bitmap);
+
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (line_input, "timeout %u", &timeout_seconds))
ec.pe_config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
vec_add1 (pm->paired_events_to_collect, ec);
}
+ else if (unformat (line_input, "threads %U",
+ unformat_bitmap_list, &pm->thread_bitmap))
+ ;
+ else if (unformat (line_input, "thread %U",
+ unformat_bitmap_list, &pm->thread_bitmap))
+ ;
else if (unformat (line_input, "%U", unformat_processor_event, pm, &ec))
{
vec_add1 (pm->single_events_to_collect, ec);
foreach_perfmon_event
#undef _
else
- return clib_error_return (0, "unknown input '%U'",
- format_unformat_error, line_input);
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return error;
+ }
}
+ unformat_free (line_input);
+
+ last_set = clib_bitmap_last_set (pm->thread_bitmap);
+ if (last_set != ~0 && last_set >= num_threads)
+ return clib_error_return (0, "thread %d does not exist", last_set);
+
/* Stick paired events at the front of the (unified) list */
if (vec_len (pm->paired_events_to_collect) > 0)
{
VLIB_CLI_COMMAND (set_pmc_command, static) =
{
.path = "set pmc",
- .short_help = "set pmc c1 [..., use \"show pmc events\"]",
+ .short_help = "set pmc [threads n,n1-n2] c1... [see \"show pmc events\"]",
.function = set_pmc_command_fn,
.is_mp_safe = 1,
};
/* Current perf_event file descriptors, per thread */
int **pm_fds;
+ /* thread bitmap */
+ uword *thread_bitmap;
+
/* Logging */
vlib_log_class_t log_class;
#include <asm/unistd.h>
#include <sys/ioctl.h>
+/* "not in glibc" */
static long
perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
int group_fd, unsigned long flags)
u32 my_thread_index = vm->thread_index;
u32 index;
int i, limit = 1;
+ int cpu;
if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect))
limit = 2;
pe.exclude_hv = 1;
}
- fd = perf_event_open (&pe, 0, -1, -1, 0);
+ cpu = vm->cpu_index;
+
+ fd = perf_event_open (&pe, 0, cpu, -1, 0);
if (fd == -1)
{
clib_unix_warning ("event open: type %d config %d", c->pe_type,
start_event (perfmon_main_t * pm, f64 now, uword event_data)
{
int i;
+ int last_set;
+ int all = 0;
pm->current_event = 0;
+
if (vec_len (pm->single_events_to_collect) == 0)
{
pm->state = PERFMON_STATE_OFF;
return;
}
+
+ last_set = clib_bitmap_last_set (pm->thread_bitmap);
+ all = (last_set == ~0);
+
pm->state = PERFMON_STATE_RUNNING;
clear_counters (pm);
- /* Start collection on this thread */
- enable_current_events (pm);
+ /* Start collection on thread 0? */
+ if (all || clib_bitmap_get (pm->thread_bitmap, 0))
+ {
+ /* Start collection on this thread */
+ enable_current_events (pm);
+ }
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
{
if (vlib_mains[i] == 0)
continue;
- vlib_mains[i]->worker_thread_main_loop_callback = (void *)
- worker_thread_start_event;
+
+ if (all || clib_bitmap_get (pm->thread_bitmap, i))
+ vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+ worker_thread_start_event;
}
}
}
static void
-handle_timeout (perfmon_main_t * pm, f64 now)
+handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now)
{
int i;
- disable_events (pm);
+ int last_set, all;
+
+ last_set = clib_bitmap_last_set (pm->thread_bitmap);
+ all = (last_set == ~0);
+
+ if (all || clib_bitmap_get (pm->thread_bitmap, 0))
+ disable_events (pm);
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
{
if (vlib_mains[i] == 0)
continue;
- vlib_mains[i]->worker_thread_main_loop_callback = (void *)
- worker_thread_stop_event;
+ if (all || clib_bitmap_get (pm->thread_bitmap, i))
+ vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+ worker_thread_stop_event;
}
- /* Short delay to make sure workers have stopped collection */
+ /* Make sure workers have stopped collection */
if (i > 1)
- vlib_process_suspend (pm->vlib_main, 1e-3);
+ {
+ f64 deadman = vlib_time_now (vm) + 1.0;
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ /* Has the worker actually stopped collecting data? */
+ while (vlib_mains[i]->worker_thread_main_loop_callback)
+ {
+ if (vlib_time_now (vm) > deadman)
+ {
+ clib_warning ("Thread %d deadman timeout!", i);
+ break;
+ }
+ vlib_process_suspend (pm->vlib_main, 1e-3);
+ }
+ }
+ }
scrape_and_clear_counters (pm);
pm->current_event += pm->n_active;
if (pm->current_event >= vec_len (pm->single_events_to_collect))
pm->state = PERFMON_STATE_OFF;
return;
}
- enable_current_events (pm);
+
+ if (all || clib_bitmap_get (pm->thread_bitmap, 0))
+ enable_current_events (pm);
/* And also on worker threads */
for (i = 1; i < vec_len (vlib_mains); i++)
{
if (vlib_mains[i] == 0)
continue;
- vlib_mains[i]->worker_thread_main_loop_callback = (void *)
- worker_thread_start_event;
+ if (all || clib_bitmap_get (pm->thread_bitmap, i))
+ vlib_mains[i]->worker_thread_main_loop_callback = (void *)
+ worker_thread_start_event;
}
}
/* Handle timeout */
case ~0:
- handle_timeout (pm, now);
+ handle_timeout (vm, pm, now);
break;
default:
}
}
-u64 oingo0, oingo1;
-
static_always_inline u64
dispatch_node (vlib_main_t * vm,
vlib_node_runtime_t * node,
vm->main_loop_vectors_processed += n;
vm->main_loop_nodes_processed += n > 0;
- if (pmc_delta[0] || pmc_delta[1])
- {
- oingo0 += pmc_delta[0];
- oingo1 += pmc_delta[1];
- }
-
v = vlib_node_runtime_update_stats (vm, node,
/* n_calls */ 1,
/* n_vectors */ n,
/* fork the frame dispatch queue */
nm_clone->pending_frames = 0;
- vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */
+ vec_validate (nm_clone->pending_frames, 10);
_vec_len (nm_clone->pending_frames) = 0;
/* fork nodes */
n->runtime_data_bytes));
}
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] =
+ vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT],
+ CLIB_CACHE_LINE_BYTES);
+ vec_foreach (rt,
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy initial runtime_data from node */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
nm_clone->processes = vec_dup_aligned (nm->processes,
CLIB_CACHE_LINE_BYTES);
vec_free (old_rt);
+ /* re-clone pre-input nodes */
+ old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT];
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] =
+ vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT],
+ CLIB_CACHE_LINE_BYTES);
+
+ vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy runtime_data, will be overwritten later for existing rt */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy_fast (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
+ for (j = 0; j < vec_len (old_rt); j++)
+ {
+ rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+ rt->state = old_rt[j].state;
+ clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data,
+ VLIB_NODE_RUNTIME_DATA_SIZE);
+ }
+
+ vec_free (old_rt);
+
nm_clone->processes = vec_dup_aligned (nm->processes,
CLIB_CACHE_LINE_BYTES);
}