summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenoît Ganne <bganne@cisco.com>2021-08-05 11:47:52 +0200
committerDamjan Marion <dmarion@me.com>2021-08-20 11:22:29 +0000
commit4e3af51a66384295eec5b1cf980ef4d88b949e1d (patch)
treec7b3db4ba08b676099adbcf830232f7476fd37a2
parent03f2a015999ed9ba34041afb408a22ea5fe601ff (diff)
perfmon: fix perf event user page read
When mmap()-ing perf event in userspace, we must adhere to the kernel update protocol to read consistent values. Also, 'offset' is an offset to add to the counter value, not to apply to the PMC index. Type: fix Change-Id: I59106bb3a48185ff3fcb0d2f09097269a67bb6d6 Signed-off-by: Benoît Ganne <bganne@cisco.com>
-rw-r--r--src/plugins/perfmon/dispatch_wrapper.c121
1 files changed, 76 insertions, 45 deletions
diff --git a/src/plugins/perfmon/dispatch_wrapper.c b/src/plugins/perfmon/dispatch_wrapper.c
index fe0a449df99..f5972f667a2 100644
--- a/src/plugins/perfmon/dispatch_wrapper.c
+++ b/src/plugins/perfmon/dispatch_wrapper.c
@@ -25,42 +25,64 @@
#include <perfmon/perfmon.h>
+static_always_inline u64
+perfmon_mmap_read_pmc1 (const struct perf_event_mmap_page *mmap_page)
+{
+ u64 count;
+ u32 seq;
+
+ /* See documentation in /usr/include/linux/perf_event.h, for more details
+ * but the 2 main important things are:
+ * 1) if seq != mmap_page->lock, it means the kernel is currently updating
+ * the user page and we need to read it again
+ * 2) if idx == 0, it means the perf event is currently turned off and we
+ * just need to read the kernel-updated 'offset', otherwise we must also
+ * add the current hw value (hence rdmpc) */
+ do
+ {
+ u32 idx;
+
+ seq = mmap_page->lock;
+ CLIB_COMPILER_BARRIER ();
+
+ idx = mmap_page->index;
+ count = mmap_page->offset;
+ if (idx)
+ count += _rdpmc (idx - 1);
+
+ CLIB_COMPILER_BARRIER ();
+ }
+ while (mmap_page->lock != seq);
+
+ return count;
+}
+
static_always_inline void
-perfmon_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
+perfmon_mmap_read_pmcs (u64 *counters,
+ struct perf_event_mmap_page **mmap_pages,
+ u8 n_counters)
{
switch (n_counters)
{
default:
case 7:
- counters[6] = _rdpmc (pmc_index[6]);
+ counters[6] = perfmon_mmap_read_pmc1 (mmap_pages[6]);
case 6:
- counters[5] = _rdpmc (pmc_index[5]);
+ counters[5] = perfmon_mmap_read_pmc1 (mmap_pages[5]);
case 5:
- counters[4] = _rdpmc (pmc_index[4]);
+ counters[4] = perfmon_mmap_read_pmc1 (mmap_pages[4]);
case 4:
- counters[3] = _rdpmc (pmc_index[3]);
+ counters[3] = perfmon_mmap_read_pmc1 (mmap_pages[3]);
case 3:
- counters[2] = _rdpmc (pmc_index[2]);
+ counters[2] = perfmon_mmap_read_pmc1 (mmap_pages[2]);
case 2:
- counters[1] = _rdpmc (pmc_index[1]);
+ counters[1] = perfmon_mmap_read_pmc1 (mmap_pages[1]);
case 1:
- counters[0] = _rdpmc (pmc_index[0]);
+ counters[0] = perfmon_mmap_read_pmc1 (mmap_pages[0]);
break;
}
}
-static_always_inline int
-perfmon_calc_mmap_offset (perfmon_thread_runtime_t *tr, u8 i)
-{
- return (int) (tr->mmap_pages[i]->index + tr->mmap_pages[i]->offset);
-}
-
-static_always_inline int
-perfmon_metric_index (perfmon_bundle_t *b, u8 i)
-{
- return (int) (b->metrics[i]);
-}
-
uword
perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame)
@@ -75,34 +97,13 @@ perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
u64 before[PERF_MAX_EVENTS];
u64 after[PERF_MAX_EVENTS];
- int pmc_index[PERF_MAX_EVENTS];
uword rv;
clib_prefetch_load (s);
- switch (n_events)
- {
- default:
- case 7:
- pmc_index[6] = perfmon_calc_mmap_offset (rt, 6);
- case 6:
- pmc_index[5] = perfmon_calc_mmap_offset (rt, 5);
- case 5:
- pmc_index[4] = perfmon_calc_mmap_offset (rt, 4);
- case 4:
- pmc_index[3] = perfmon_calc_mmap_offset (rt, 3);
- case 3:
- pmc_index[2] = perfmon_calc_mmap_offset (rt, 2);
- case 2:
- pmc_index[1] = perfmon_calc_mmap_offset (rt, 1);
- case 1:
- pmc_index[0] = perfmon_calc_mmap_offset (rt, 0);
- break;
- }
-
- perfmon_read_pmcs (&before[0], pmc_index, n_events);
+ perfmon_mmap_read_pmcs (&before[0], rt->mmap_pages, n_events);
rv = node->function (vm, node, frame);
- perfmon_read_pmcs (&after[0], pmc_index, n_events);
+ perfmon_mmap_read_pmcs (&after[0], rt->mmap_pages, n_events);
if (rv == 0)
return rv;
@@ -116,6 +117,36 @@ perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
return rv;
}
+static_always_inline void
+perfmon_metric_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
+{
+ switch (n_counters)
+ {
+ default:
+ case 7:
+ counters[6] = _rdpmc (pmc_index[6]);
+ case 6:
+ counters[5] = _rdpmc (pmc_index[5]);
+ case 5:
+ counters[4] = _rdpmc (pmc_index[4]);
+ case 4:
+ counters[3] = _rdpmc (pmc_index[3]);
+ case 3:
+ counters[2] = _rdpmc (pmc_index[2]);
+ case 2:
+ counters[1] = _rdpmc (pmc_index[1]);
+ case 1:
+ counters[0] = _rdpmc (pmc_index[0]);
+ break;
+ }
+}
+
+static_always_inline int
+perfmon_metric_index (perfmon_bundle_t *b, u8 i)
+{
+ return (int) (b->metrics[i]);
+}
+
uword
perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame)
@@ -154,11 +185,11 @@ perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node,
break;
}
- perfmon_read_pmcs (&before[0], pmc_index, n_events);
+ perfmon_metric_read_pmcs (&before[0], pmc_index, n_events);
rv = node->function (vm, node, frame);
clib_memcpy_fast (&s->t[0].value[0], &before, sizeof (before));
- perfmon_read_pmcs (&s->t[1].value[0], pmc_index, n_events);
+ perfmon_metric_read_pmcs (&s->t[1].value[0], pmc_index, n_events);
if (rv == 0)
return rv;