From: Benoît Ganne Date: Thu, 5 Aug 2021 09:47:52 +0000 (+0200) Subject: perfmon: fix perf event user page read X-Git-Tag: v22.02-rc0~110 X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;h=4e3af51a66384295eec5b1cf980ef4d88b949e1d;p=vpp.git perfmon: fix perf event user page read When mmap()-ing perf event in userspace, we must adhere to the kernel update protocol to read consistent values. Also, 'offset' is an offset to add to the counter value, not to apply to the PMC index. Type: fix Change-Id: I59106bb3a48185ff3fcb0d2f09097269a67bb6d6 Signed-off-by: Benoît Ganne --- diff --git a/src/plugins/perfmon/dispatch_wrapper.c b/src/plugins/perfmon/dispatch_wrapper.c index fe0a449df99..f5972f667a2 100644 --- a/src/plugins/perfmon/dispatch_wrapper.c +++ b/src/plugins/perfmon/dispatch_wrapper.c @@ -25,42 +25,64 @@ #include +static_always_inline u64 +perfmon_mmap_read_pmc1 (const struct perf_event_mmap_page *mmap_page) +{ + u64 count; + u32 seq; + + /* See documentation in /usr/include/linux/perf_event.h, for more details + * but the 2 main important things are: + * 1) if seq != mmap_page->lock, it means the kernel is currently updating + * the user page and we need to read it again + * 2) if idx == 0, it means the perf event is currently turned off and we + * just need to read the kernel-updated 'offset', otherwise we must also + * add the current hw value (hence rdmpc) */ + do + { + u32 idx; + + seq = mmap_page->lock; + CLIB_COMPILER_BARRIER (); + + idx = mmap_page->index; + count = mmap_page->offset; + if (idx) + count += _rdpmc (idx - 1); + + CLIB_COMPILER_BARRIER (); + } + while (mmap_page->lock != seq); + + return count; +} + static_always_inline void -perfmon_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters) +perfmon_mmap_read_pmcs (u64 *counters, + struct perf_event_mmap_page **mmap_pages, + u8 n_counters) { switch (n_counters) { default: case 7: - counters[6] = _rdpmc (pmc_index[6]); + counters[6] = perfmon_mmap_read_pmc1 (mmap_pages[6]); case 6: - counters[5] = _rdpmc (pmc_index[5]); + counters[5] = perfmon_mmap_read_pmc1 (mmap_pages[5]); case 5: - counters[4] = _rdpmc (pmc_index[4]); + counters[4] = perfmon_mmap_read_pmc1 (mmap_pages[4]); case 4: - counters[3] = _rdpmc (pmc_index[3]); + counters[3] = perfmon_mmap_read_pmc1 (mmap_pages[3]); case 3: - counters[2] = _rdpmc (pmc_index[2]); + counters[2] = perfmon_mmap_read_pmc1 (mmap_pages[2]); case 2: - counters[1] = _rdpmc (pmc_index[1]); + counters[1] = perfmon_mmap_read_pmc1 (mmap_pages[1]); case 1: - counters[0] = _rdpmc (pmc_index[0]); + counters[0] = perfmon_mmap_read_pmc1 (mmap_pages[0]); break; } } -static_always_inline int -perfmon_calc_mmap_offset (perfmon_thread_runtime_t *tr, u8 i) -{ - return (int) (tr->mmap_pages[i]->index + tr->mmap_pages[i]->offset); -} - -static_always_inline int -perfmon_metric_index (perfmon_bundle_t *b, u8 i) -{ - return (int) (b->metrics[i]); -} - uword perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) @@ -75,34 +97,13 @@ perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node, u64 before[PERF_MAX_EVENTS]; u64 after[PERF_MAX_EVENTS]; - int pmc_index[PERF_MAX_EVENTS]; uword rv; clib_prefetch_load (s); - switch (n_events) - { - default: - case 7: - pmc_index[6] = perfmon_calc_mmap_offset (rt, 6); - case 6: - pmc_index[5] = perfmon_calc_mmap_offset (rt, 5); - case 5: - pmc_index[4] = perfmon_calc_mmap_offset (rt, 4); - case 4: - pmc_index[3] = perfmon_calc_mmap_offset (rt, 3); - case 3: - pmc_index[2] = perfmon_calc_mmap_offset (rt, 2); - case 2: - pmc_index[1] = perfmon_calc_mmap_offset (rt, 1); - case 1: - pmc_index[0] = perfmon_calc_mmap_offset (rt, 0); - break; - } - - perfmon_read_pmcs (&before[0], pmc_index, n_events); + perfmon_mmap_read_pmcs (&before[0], rt->mmap_pages, n_events); rv = node->function (vm, node, frame); - perfmon_read_pmcs (&after[0], pmc_index, n_events); + perfmon_mmap_read_pmcs (&after[0], rt->mmap_pages, n_events); if (rv == 0) return rv; @@ -116,6 +117,36 @@ perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node, return rv; } +static_always_inline void +perfmon_metric_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters) +{ + switch (n_counters) + { + default: + case 7: + counters[6] = _rdpmc (pmc_index[6]); + case 6: + counters[5] = _rdpmc (pmc_index[5]); + case 5: + counters[4] = _rdpmc (pmc_index[4]); + case 4: + counters[3] = _rdpmc (pmc_index[3]); + case 3: + counters[2] = _rdpmc (pmc_index[2]); + case 2: + counters[1] = _rdpmc (pmc_index[1]); + case 1: + counters[0] = _rdpmc (pmc_index[0]); + break; + } +} + +static_always_inline int +perfmon_metric_index (perfmon_bundle_t *b, u8 i) +{ + return (int) (b->metrics[i]); +} + uword perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) @@ -154,11 +185,11 @@ perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node, break; } - perfmon_read_pmcs (&before[0], pmc_index, n_events); + perfmon_metric_read_pmcs (&before[0], pmc_index, n_events); rv = node->function (vm, node, frame); clib_memcpy_fast (&s->t[0].value[0], &before, sizeof (before)); - perfmon_read_pmcs (&s->t[1].value[0], pmc_index, n_events); + perfmon_metric_read_pmcs (&s->t[1].value[0], pmc_index, n_events); if (rv == 0) return rv;