X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fperfmon%2Fperfmon_periodic.c;h=37d669b8d132bf377a436269a9f595096fe387aa;hb=08d01376de5abf2e93c562ee5e24cf62ce1be3ff;hp=4e7e2378320a617bc0435359c6b7b3aae3caaf3b;hpb=178cf493d009995b28fdf220f04c98860ff79a9b;p=vpp.git diff --git a/src/plugins/perfmon/perfmon_periodic.c b/src/plugins/perfmon/perfmon_periodic.c index 4e7e2378320..37d669b8d13 100644 --- a/src/plugins/perfmon/perfmon_periodic.c +++ b/src/plugins/perfmon/perfmon_periodic.c @@ -21,6 +21,7 @@ #include #include +/* "not in glibc" */ static long perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) @@ -31,22 +32,42 @@ perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu, return ret; } -static u64 -read_current_perf_counter (vlib_main_t * vm) +static void +read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int before_or_after) { - if (vm->perf_counter_id) - return clib_rdpmc (vm->perf_counter_id); - else + int i; + u64 *cc; + perfmon_main_t *pm = &perfmon_main; + uword my_thread_index = vm->thread_index; + + *c0 = *c1 = 0; + + for (i = 0; i < pm->n_active; i++) { - u64 sw_value; - if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) != - sizeof (sw_value)) + cc = (i == 0) ? c0 : c1; + if (pm->rdpmc_indices[i][my_thread_index] != ~0) + *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]); + else { - clib_unix_warning ("counter read failed, disable collection..."); - vm->vlib_node_runtime_perf_counter_cb = 0; - return 0ULL; + u64 sw_value; + int read_result; + if ((read_result = read (pm->pm_fds[i][my_thread_index], &sw_value, + sizeof (sw_value)) != sizeof (sw_value))) + { + clib_unix_warning + ("counter read returned %d, expected %d", + read_result, sizeof (sw_value)); + clib_callback_enable_disable + (vm->vlib_node_runtime_perf_counter_cbs, + vm->vlib_node_runtime_perf_counter_cb_tmp, + vm->worker_thread_main_loop_callback_lock, + read_current_perf_counters, 0 /* enable */ ); + return; + } + *cc = sw_value; } - return sw_value; } } @@ -80,9 +101,11 @@ clear_counters (perfmon_main_t * pm) for (i = 0; i < vec_len (nm->nodes); i++) { n = nm->nodes[i]; - n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter0_ticks = 0; + n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; - n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter0_ticks = 0; + n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } @@ -90,7 +113,7 @@ clear_counters (perfmon_main_t * pm) } static void -enable_current_event (perfmon_main_t * pm) +enable_current_events (perfmon_main_t * pm) { struct perf_event_attr pe; int fd; @@ -98,91 +121,140 @@ enable_current_event (perfmon_main_t * pm) perfmon_event_config_t *c; vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; + u32 index; + int i, limit = 1; + int cpu; - c = vec_elt_at_index (pm->events_to_collect, pm->current_event); + if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect)) + limit = 2; - memset (&pe, 0, sizeof (struct perf_event_attr)); - pe.type = c->pe_type; - pe.size = sizeof (struct perf_event_attr); - pe.config = c->pe_config; - pe.disabled = 1; - pe.pinned = 1; - /* - * Note: excluding the kernel makes the - * (software) context-switch counter read 0... - */ - if (pe.type != PERF_TYPE_SOFTWARE) + for (i = 0; i < limit; i++) { - /* Exclude kernel and hypervisor */ - pe.exclude_kernel = 1; - pe.exclude_hv = 1; - } + vec_validate (pm->pm_fds[i], vec_len (vlib_mains) - 1); + vec_validate (pm->perf_event_pages[i], vec_len (vlib_mains) - 1); + vec_validate (pm->rdpmc_indices[i], vec_len (vlib_mains) - 1); + + c = vec_elt_at_index (pm->single_events_to_collect, + pm->current_event + i); + + memset (&pe, 0, sizeof (struct perf_event_attr)); + pe.type = c->pe_type; + pe.size = sizeof (struct perf_event_attr); + pe.config = c->pe_config; + pe.disabled = 1; + pe.pinned = 1; + /* + * Note: excluding the kernel makes the + * (software) context-switch counter read 0... + */ + if (pe.type != PERF_TYPE_SOFTWARE) + { + /* Exclude kernel and hypervisor */ + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + } - fd = perf_event_open (&pe, 0, -1, -1, 0); - if (fd == -1) - { - clib_unix_warning ("event open: type %d config %d", c->pe_type, - c->pe_config); - return; - } + cpu = vm->cpu_id; - if (pe.type != PERF_TYPE_SOFTWARE) - { - p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); - if (p == MAP_FAILED) + fd = perf_event_open (&pe, 0, cpu, -1, 0); + if (fd == -1) { - clib_unix_warning ("mmap"); - close (fd); + clib_unix_warning ("event open: type %d config %d", c->pe_type, + c->pe_config); return; } - } - if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) - clib_unix_warning ("reset ioctl"); + if (pe.type != PERF_TYPE_SOFTWARE) + { + p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (fd); + return; + } + CLIB_MEM_UNPOISON (p, pm->page_size); + } + else + p = 0; - if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) - clib_unix_warning ("enable ioctl"); + if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0) + clib_unix_warning ("reset ioctl"); + + if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0) + clib_unix_warning ("enable ioctl"); + + pm->perf_event_pages[i][my_thread_index] = (void *) p; + pm->pm_fds[i][my_thread_index] = fd; + } /* - * Software event counters - and others not capable of being - * read via the "rdpmc" instruction - will be read - * by system calls. + * Hardware events must be all opened and enabled before aquiring + * pmc indices, otherwise the pmc indices might be out-dated. */ - if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0) - pm->rdpmc_indices[my_thread_index] = 0; - else /* use rdpmc instrs */ - pm->rdpmc_indices[my_thread_index] = p->index - 1; - pm->perf_event_pages[my_thread_index] = (void *) p; + for (i = 0; i < limit; i++) + { + p = + (struct perf_event_mmap_page *) + pm->perf_event_pages[i][my_thread_index]; + + /* + * Software event counters - and others not capable of being + * read via the "rdpmc" instruction - will be read + * by system calls. + */ + if (p == 0 || p->cap_user_rdpmc == 0) + index = ~0; + else + index = p->index - 1; - pm->pm_fds[my_thread_index] = fd; + pm->rdpmc_indices[i][my_thread_index] = index; + } + pm->n_active = i; /* Enable the main loop counter snapshot mechanism */ - vm->perf_counter_id = pm->rdpmc_indices[my_thread_index]; - vm->perf_counter_fd = fd; - vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter; + clib_callback_enable_disable + (vm->vlib_node_runtime_perf_counter_cbs, + vm->vlib_node_runtime_perf_counter_cb_tmp, + vm->worker_thread_main_loop_callback_lock, + read_current_perf_counters, 1 /* enable */ ); } static void -disable_event (perfmon_main_t * pm) +disable_events (perfmon_main_t * pm) { vlib_main_t *vm = vlib_get_main (); u32 my_thread_index = vm->thread_index; - - if (pm->pm_fds[my_thread_index] == 0) - return; + int i; /* Stop main loop collection */ - vm->vlib_node_runtime_perf_counter_cb = 0; + clib_callback_enable_disable + (vm->vlib_node_runtime_perf_counter_cbs, + vm->vlib_node_runtime_perf_counter_cb_tmp, + vm->worker_thread_main_loop_callback_lock, + read_current_perf_counters, 0 /* enable */ ); + + for (i = 0; i < pm->n_active; i++) + { + if (pm->pm_fds[i][my_thread_index] == 0) + continue; - if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0) - clib_unix_warning ("disable ioctl"); + if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < + 0) + clib_unix_warning ("disable ioctl"); - if (pm->perf_event_pages[my_thread_index]) - if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0) - clib_unix_warning ("munmap"); + if (pm->perf_event_pages[i][my_thread_index]) + { + if (munmap (pm->perf_event_pages[i][my_thread_index], + pm->page_size) < 0) + clib_unix_warning ("munmap"); + pm->perf_event_pages[i][my_thread_index] = 0; + } - (void) close (pm->pm_fds[my_thread_index]); - pm->pm_fds[my_thread_index] = 0; + (void) close (pm->pm_fds[i][my_thread_index]); + pm->pm_fds[i][my_thread_index] = 0; + + } } static void @@ -190,48 +262,70 @@ worker_thread_start_event (vlib_main_t * vm) { perfmon_main_t *pm = &perfmon_main; - enable_current_event (pm); - vm->worker_thread_main_loop_callback = 0; + clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks, + vm->worker_thread_main_loop_callback_tmp, + vm->worker_thread_main_loop_callback_lock, + worker_thread_start_event, 0 /* enable */ ); + enable_current_events (pm); } static void worker_thread_stop_event (vlib_main_t * vm) { perfmon_main_t *pm = &perfmon_main; - disable_event (pm); - vm->worker_thread_main_loop_callback = 0; + clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks, + vm->worker_thread_main_loop_callback_tmp, + vm->worker_thread_main_loop_callback_lock, + worker_thread_stop_event, 0 /* enable */ ); + disable_events (pm); } static void start_event (perfmon_main_t * pm, f64 now, uword event_data) { int i; + int last_set; + int all = 0; pm->current_event = 0; - if (vec_len (pm->events_to_collect) == 0) + + if (vec_len (pm->single_events_to_collect) == 0) { pm->state = PERFMON_STATE_OFF; return; } + + last_set = clib_bitmap_last_set (pm->thread_bitmap); + all = (last_set == ~0); + pm->state = PERFMON_STATE_RUNNING; clear_counters (pm); - /* Start collection on this thread */ - enable_current_event (pm); + /* Start collection on thread 0? */ + if (all || clib_bitmap_get (pm->thread_bitmap, 0)) + { + /* Start collection on this thread */ + enable_current_events (pm); + } /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; - vlib_mains[i]->worker_thread_main_loop_callback = (void *) - worker_thread_start_event; + + if (all || clib_bitmap_get (pm->thread_bitmap, i)) + clib_callback_enable_disable + (vlib_mains[i]->worker_thread_main_loop_callbacks, + vlib_mains[i]->worker_thread_main_loop_callback_tmp, + vlib_mains[i]->worker_thread_main_loop_callback_lock, + (void *) worker_thread_start_event, 1 /* enable */ ); } } void scrape_and_clear_counters (perfmon_main_t * pm) { - int i, j; + int i, j, k; vlib_main_t *vm = pm->vlib_main; vlib_main_t *stat_vm; vlib_node_main_t *nm; @@ -242,7 +336,6 @@ scrape_and_clear_counters (perfmon_main_t * pm) perfmon_event_config_t *current_event; uword *p; u8 *counter_name; - u64 counter_value; u64 vectors_this_counter; /* snapshoot the nodes, including pm counters */ @@ -272,17 +365,17 @@ scrape_and_clear_counters (perfmon_main_t * pm) n = nm->nodes[i]; nodes[i] = clib_mem_alloc (sizeof (*n)); clib_memcpy_fast (nodes[i], n, sizeof (*n)); - n->stats_total.perf_counter_ticks = 0; + n->stats_total.perf_counter0_ticks = 0; + n->stats_total.perf_counter1_ticks = 0; n->stats_total.perf_counter_vectors = 0; - n->stats_last_clear.perf_counter_ticks = 0; + n->stats_last_clear.perf_counter0_ticks = 0; + n->stats_last_clear.perf_counter1_ticks = 0; n->stats_last_clear.perf_counter_vectors = 0; } } vlib_worker_thread_barrier_release (vm); - current_event = pm->events_to_collect + pm->current_event; - for (j = 0; j < vec_len (vlib_mains); j++) { stat_vm = vlib_mains[j]; @@ -296,38 +389,69 @@ scrape_and_clear_counters (perfmon_main_t * pm) u8 *capture_name; n = nodes[i]; - if (n->stats_total.perf_counter_ticks == 0) - { - clib_mem_free (n); - continue; - } - - capture_name = format (0, "t%d-%v%c", j, n->name, 0); - p = hash_get_mem (pm->capture_by_thread_and_node_name, - capture_name); + if (n->stats_total.perf_counter0_ticks == 0 && + n->stats_total.perf_counter1_ticks == 0) + goto skip_this_node; - if (p == 0) + for (k = 0; k < 2; k++) { - pool_get (pm->capture_pool, c); - memset (c, 0, sizeof (*c)); - c->thread_and_node_name = capture_name; - hash_set_mem (pm->capture_by_thread_and_node_name, - capture_name, c - pm->capture_pool); + u64 counter_value, counter_last_clear; + + /* + * We collect 2 counters at once, except for the + * last counter when the user asks for an odd number of + * counters + */ + if ((pm->current_event + k) + >= vec_len (pm->single_events_to_collect)) + break; + + if (k == 0) + { + counter_value = n->stats_total.perf_counter0_ticks; + counter_last_clear = + n->stats_last_clear.perf_counter0_ticks; + } + else + { + counter_value = n->stats_total.perf_counter1_ticks; + counter_last_clear = + n->stats_last_clear.perf_counter1_ticks; + } + + capture_name = format (0, "t%d-%v%c", j, n->name, 0); + + p = hash_get_mem (pm->capture_by_thread_and_node_name, + capture_name); + + if (p == 0) + { + pool_get (pm->capture_pool, c); + memset (c, 0, sizeof (*c)); + c->thread_and_node_name = capture_name; + hash_set_mem (pm->capture_by_thread_and_node_name, + capture_name, c - pm->capture_pool); + } + else + { + c = pool_elt_at_index (pm->capture_pool, p[0]); + vec_free (capture_name); + } + + /* Snapshoot counters, etc. into the capture */ + current_event = pm->single_events_to_collect + + pm->current_event + k; + counter_name = (u8 *) current_event->name; + vectors_this_counter = n->stats_total.perf_counter_vectors - + n->stats_last_clear.perf_counter_vectors; + + vec_add1 (c->counter_names, counter_name); + vec_add1 (c->counter_values, + counter_value - counter_last_clear); + vec_add1 (c->vectors_this_counter, vectors_this_counter); } - else - c = pool_elt_at_index (pm->capture_pool, p[0]); - - /* Snapshoot counters, etc. into the capture */ - counter_name = (u8 *) current_event->name; - counter_value = n->stats_total.perf_counter_ticks - - n->stats_last_clear.perf_counter_ticks; - vectors_this_counter = n->stats_total.perf_counter_vectors - - n->stats_last_clear.perf_counter_vectors; - - vec_add1 (c->counter_names, counter_name); - vec_add1 (c->counter_values, counter_value); - vec_add1 (c->vectors_this_counter, vectors_this_counter); + skip_this_node: clib_mem_free (n); } vec_free (nodes); @@ -336,40 +460,75 @@ scrape_and_clear_counters (perfmon_main_t * pm) } static void -handle_timeout (perfmon_main_t * pm, f64 now) +handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now) { int i; - disable_event (pm); + int last_set, all; + + last_set = clib_bitmap_last_set (pm->thread_bitmap); + all = (last_set == ~0); + + if (all || clib_bitmap_get (pm->thread_bitmap, 0)) + disable_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; - vlib_mains[i]->worker_thread_main_loop_callback = (void *) - worker_thread_stop_event; + if (all || clib_bitmap_get (pm->thread_bitmap, i)) + clib_callback_enable_disable + (vlib_mains[i]->worker_thread_main_loop_callbacks, + vlib_mains[i]->worker_thread_main_loop_callback_tmp, + vlib_mains[i]->worker_thread_main_loop_callback_lock, + (void *) worker_thread_stop_event, 1 /* enable */ ); } - /* Short delay to make sure workers have stopped collection */ + /* Make sure workers have stopped collection */ if (i > 1) - vlib_process_suspend (pm->vlib_main, 1e-3); + { + f64 deadman = vlib_time_now (vm) + 1.0; + + for (i = 1; i < vec_len (vlib_mains); i++) + { + /* Has the worker actually stopped collecting data? */ + while (clib_callback_is_set + (vlib_mains[i]->worker_thread_main_loop_callbacks, + vlib_mains[i]->worker_thread_main_loop_callback_lock, + read_current_perf_counters)) + { + if (vlib_time_now (vm) > deadman) + { + clib_warning ("Thread %d deadman timeout!", i); + break; + } + vlib_process_suspend (pm->vlib_main, 1e-3); + } + } + } scrape_and_clear_counters (pm); - pm->current_event++; - if (pm->current_event >= vec_len (pm->events_to_collect)) + pm->current_event += pm->n_active; + if (pm->current_event >= vec_len (pm->single_events_to_collect)) { pm->current_event = 0; pm->state = PERFMON_STATE_OFF; return; } - enable_current_event (pm); + + if (all || clib_bitmap_get (pm->thread_bitmap, 0)) + enable_current_events (pm); /* And also on worker threads */ for (i = 1; i < vec_len (vlib_mains); i++) { if (vlib_mains[i] == 0) continue; - vlib_mains[i]->worker_thread_main_loop_callback = (void *) - worker_thread_start_event; + if (all || clib_bitmap_get (pm->thread_bitmap, i)) + clib_callback_enable_disable + (vlib_mains[i]->worker_thread_main_loop_callbacks, + vlib_mains[i]->worker_thread_main_loop_callback_tmp, + vlib_mains[i]->worker_thread_main_loop_callback_lock, + worker_thread_start_event, 1 /* enable */ ); } } @@ -403,7 +562,7 @@ perfmon_periodic_process (vlib_main_t * vm, /* Handle timeout */ case ~0: - handle_timeout (pm, now); + handle_timeout (vm, pm, now); break; default: