summaryrefslogtreecommitdiffstats
path: root/src/plugins/perfmon/arm
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/perfmon/arm')
-rw-r--r--src/plugins/perfmon/arm/dispatch_wrapper.c142
-rw-r--r--src/plugins/perfmon/arm/dispatch_wrapper.h18
-rw-r--r--src/plugins/perfmon/arm/events.c227
-rw-r--r--src/plugins/perfmon/arm/events.h130
4 files changed, 517 insertions, 0 deletions
diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.c b/src/plugins/perfmon/arm/dispatch_wrapper.c
new file mode 100644
index 00000000000..df79bcd8631
--- /dev/null
+++ b/src/plugins/perfmon/arm/dispatch_wrapper.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+#include <sys/ioctl.h>
+
+#include <perfmon/perfmon.h>
+
+#define barrier() asm volatile("dmb ish" : : : "memory");
+
+typedef int64_t s64;
+
+static_always_inline u64
+get_pmc_register (u32 pmc_idx)
+{
+ u64 value = 0;
+ if (pmc_idx == 31)
+ /* i.e. CPU Cycle event code 0x11 - need to read via pmccntr_el0 */
+ asm volatile("mrs %x0, pmccntr_el0" : "=r"(value));
+ else
+ {
+ /* set event register 0x0-0x1F */
+ asm volatile("msr pmselr_el0, %x0" : : "r"((pmc_idx)));
+ /* get register value */
+ asm volatile("mrs %x0, pmxevcntr_el0" : "=r"(value));
+ }
+ asm volatile("isb" : : : "memory");
+ return value;
+}
+
+static_always_inline u64
+read_pmc_from_mmap (struct perf_event_mmap_page *pc)
+{
+ u32 seq, idx, width;
+ u64 offset = 0;
+ s64 pmc = 0;
+
+ do
+ {
+ seq = pc->lock;
+ barrier ();
+ idx = pc->index;
+ offset = pc->offset;
+ if (pc->cap_user_rdpmc && idx)
+ {
+ width = pc->pmc_width;
+ pmc = get_pmc_register (idx - 1);
+ /* for 32 bit regs, left shift 32b to zero/discard the top bits */
+ pmc <<= 64 - width;
+ pmc >>= 64 - width;
+ }
+ barrier ();
+ }
+ while (pc->lock != seq);
+
+ return pmc + offset;
+}
+
+static_always_inline void
+perfmon_read_pmcs (u64 *counters, perfmon_thread_runtime_t *rt, u8 n_counters)
+{
+ switch (n_counters)
+ {
+ default:
+ case 7:
+ counters[6] = read_pmc_from_mmap (rt->mmap_pages[6]);
+ case 6:
+ counters[5] = read_pmc_from_mmap (rt->mmap_pages[5]);
+ case 5:
+ counters[4] = read_pmc_from_mmap (rt->mmap_pages[4]);
+ case 4:
+ counters[3] = read_pmc_from_mmap (rt->mmap_pages[3]);
+ case 3:
+ counters[2] = read_pmc_from_mmap (rt->mmap_pages[2]);
+ case 2:
+ counters[1] = read_pmc_from_mmap (rt->mmap_pages[1]);
+ case 1:
+ counters[0] = read_pmc_from_mmap (rt->mmap_pages[0]);
+ break;
+ }
+}
+
+uword
+perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ perfmon_thread_runtime_t *rt =
+ vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
+ perfmon_node_stats_t *s =
+ vec_elt_at_index (rt->node_stats, node->node_index);
+ u8 n_events = rt->n_events;
+ u64 before[n_events];
+ u64 after[n_events];
+
+ uword rv;
+
+ clib_prefetch_load (s);
+
+ perfmon_read_pmcs (before, rt, n_events);
+ rv = node->function (vm, node, frame);
+ perfmon_read_pmcs (after, rt, n_events);
+
+ if (rv == 0)
+ return rv;
+
+ s->n_calls += 1;
+ s->n_packets += rv;
+
+ for (int i = 0; i < n_events; i++)
+ {
+ s->value[i] += after[i] - before[i];
+ }
+
+ return rv;
+}
+
+clib_error_t *
+arm_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper)
+{
+ (*dispatch_wrapper) = perfmon_dispatch_wrapper;
+ return 0;
+}
diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.h b/src/plugins/perfmon/arm/dispatch_wrapper.h
new file mode 100644
index 00000000000..903971f8b5e
--- /dev/null
+++ b/src/plugins/perfmon/arm/dispatch_wrapper.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+clib_error_t *
+arm_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper);
diff --git a/src/plugins/perfmon/arm/events.c b/src/plugins/perfmon/arm/events.c
new file mode 100644
index 00000000000..bf73ad6e896
--- /dev/null
+++ b/src/plugins/perfmon/arm/events.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+#include <perfmon/arm/dispatch_wrapper.h>
+#include <linux/perf_event.h>
+#include <dirent.h>
+
+VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
+ .class_name = "perfmon",
+};
+
+#define log_debug(fmt, ...) \
+ vlib_log_debug (if_default_log.class, fmt, __VA_ARGS__)
+#define log_warn(fmt, ...) \
+ vlib_log_warn (if_default_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...) vlib_log_err (if_default_log.class, fmt, __VA_ARGS__)
+
+/*
+ * config1 = 2 : user access enabled and always 32-bit
+ * config1 = 3 : user access enabled and always 64-bit
+ *
+ * Since there is no discovery into whether 64b counters are supported
+ * or not, first attempt to request 64b counters, then fall back to
+ * 32b if perf_event_open returns EOPNOTSUPP
+ */
+static perfmon_event_t events[] = {
+#define _(event, n, desc) \
+ [ARMV8_PMUV3_##n] = { \
+ .type = PERF_TYPE_RAW, \
+ .config = event, \
+ .config1 = 3, \
+ .name = #n, \
+ .description = desc, \
+ .exclude_kernel = 1, \
+ },
+ foreach_perf_arm_event
+#undef _
+};
+
+u8 *
+format_arm_config (u8 *s, va_list *args)
+{
+ u64 config = va_arg (*args, u64);
+
+ s = format (s, "event=0x%02x", config & 0xff);
+
+ return s;
+}
+
+static clib_error_t *
+arm_init (vlib_main_t *vm, perfmon_source_t *src)
+{
+ clib_error_t *err;
+
+ /*
+ check /proc/sys/kernel/perf_user_access flag to check if userspace
+ access to perf counters is enabled (disabled by default)
+ - if this file doesn't exist, we are on an unsupported kernel ver
+ - if the file exists and is 0, user access needs to be granted
+ with 'sudo sysctl kernel/perf_user_access=1'
+ */
+ u8 perf_user_access_enabled;
+ char *path = "/proc/sys/kernel/perf_user_access";
+ err = clib_sysfs_read (path, "%u", &perf_user_access_enabled);
+ if (err)
+ {
+ if (err->code == ENOENT) /* No such file or directory */
+ {
+ return clib_error_create (
+ "linux kernel version is unsupported, please upgrade to v5.17+ "
+ "- user access to perf counters is not possible");
+ }
+ return clib_error_return_unix (0, "failed to read: %s", path);
+ }
+
+ if (perf_user_access_enabled == 1)
+ log_debug ("user access to perf counters is enabled in %s", path);
+ else
+ {
+ return clib_error_create (
+ "user access to perf counters is not enabled: run"
+ " \'sudo sysctl kernel/perf_user_access=1\'");
+ }
+
+ /*
+ perfmon/arm/events.h has up to 0xFF/256 possible PMUv3 event codes
+ supported - create a bitmap to store whether each event is
+ implemented or not
+ */
+ uword *bitmap = NULL;
+ clib_bitmap_alloc (bitmap, 256);
+
+ struct dirent *dir_entry;
+ const char *event_path =
+ "/sys/bus/event_source/devices/armv8_pmuv3_0/events";
+ DIR *event_dir = opendir (event_path);
+
+ if (event_dir == NULL)
+ {
+ err =
+ clib_error_return_unix (0, "error listing directory: %s", event_path);
+ log_err ("%U", format_clib_error, err);
+ return err;
+ }
+
+ while ((dir_entry = readdir (event_dir)) != NULL)
+ {
+ if (dir_entry->d_name[0] != '.')
+ {
+ u8 *s = NULL;
+ u8 *tmpstr = NULL;
+ unformat_input_t input;
+ u32 config;
+
+ s = format (s, "%s/%s%c", event_path, dir_entry->d_name, 0);
+ err = clib_sysfs_read ((char *) s, "%s", &tmpstr);
+ if (err)
+ {
+ log_err ("%U", format_clib_error, err);
+ continue;
+ }
+ unformat_init_vector (&input, tmpstr);
+ if (unformat (&input, "event=0x%x", &config))
+ {
+ /* it's possible to have have event codes up to 0xFFFF */
+ if (config < 0xFF) /* perfmon supports < 0xFF */
+ {
+ clib_bitmap_set (bitmap, config, 1);
+ }
+ log_debug ("found supported event in sysfs: %s \'%s\' 0x%x",
+ dir_entry->d_name, tmpstr, config);
+ }
+ else
+ {
+ err = clib_error_create ("error parsing event: %s %s",
+ dir_entry->d_name, tmpstr);
+ log_err ("%U", format_clib_error, err);
+ continue;
+ }
+ }
+ }
+ closedir (event_dir);
+
+ for (int i = 0; i < ARRAY_LEN (events); i++)
+ {
+ if (clib_bitmap_get (bitmap, events[i].config))
+ events[i].implemented = 1;
+ }
+ clib_bitmap_free (bitmap);
+
+ return 0;
+}
+
+u8
+arm_bundle_supported (perfmon_bundle_t *b)
+{
+ clib_bitmap_alloc (b->event_disabled, b->n_events);
+ for (u32 i = 0; i < b->n_events; i++)
+ {
+ perfmon_event_t *e = b->src->events + b->events[i];
+ if (!e->implemented)
+ {
+ log_debug (
+ "bundle \'%s\': perf event %s is not implemented on this CPU",
+ b->name, e->name);
+ clib_bitmap_set (b->event_disabled, i, 1);
+ }
+ }
+
+ /* if no events are implemented, fail and do not register bundle */
+ if (clib_bitmap_count_set_bits (b->event_disabled) == b->n_events)
+ {
+ return 0;
+ }
+
+ /* disable columns that use unimplemented events */
+ clib_bitmap_alloc (b->column_disabled, b->n_columns);
+ if (b->column_events)
+ {
+ u32 disabled_event;
+ /* iterate through set bits */
+ clib_bitmap_foreach (disabled_event, b->event_disabled)
+ {
+ for (u32 j = 0; j < b->n_columns; j++)
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ continue;
+ if (GET_BIT (b->column_events[j], disabled_event))
+ {
+ clib_bitmap_set (b->column_disabled, j, 1);
+ log_debug (
+ "bundle \'%s\': disabling column %d as event unsupported",
+ b->name, j);
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+PERFMON_REGISTER_SOURCE (arm) = {
+ .name = "arm",
+ .description = "Arm PMU events",
+ .events = events,
+ .n_events = ARRAY_LEN (events),
+ .init_fn = arm_init,
+ .format_config = format_arm_config,
+ .bundle_support = arm_bundle_supported,
+ .config_dispatch_wrapper = arm_config_dispatch_wrapper,
+};
diff --git a/src/plugins/perfmon/arm/events.h b/src/plugins/perfmon/arm/events.h
new file mode 100644
index 00000000000..5b7c49801d0
--- /dev/null
+++ b/src/plugins/perfmon/arm/events.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __perfmon_arm_h
+#define __perfmon_arm_h
+
+/*
+ * Events from the Armv8 PMUv3 - See "Arm Architecture Reference Manual Armv8,
+ * for Armv8-A architecture profile" D7.10 PMU events and event numbers:
+ * https://developer.arm.com/documentation/ddi0487/latest/
+ * EventCode, name, description
+ */
+#define foreach_perf_arm_event \
+ _ (0x0D, BR_IMMED_RETIRED, "Immediate branch architecturally executed") \
+ _ (0x10, BR_MIS_PRED, \
+ "Mispredicted or not predicted branch Speculatively executed") \
+ _ (0x22, BR_MIS_PRED_RETIRED, \
+ "Instruction architecturally executed, mispredicted branch") \
+ _ (0x12, BR_PRED, "Predictable branch Speculatively executed") \
+ _ (0x21, BR_RETIRED, "Branch instruction architecturally executed") \
+ _ (0x0E, BR_RETURN_RETIRED, \
+ "Function return instruction architecturally executed and the " \
+ "condition code check pass") \
+ _ (0x19, BUS_ACCESS, "Attributable Bus access") \
+ _ (0x1D, BUS_CYCLES, "Bus cycle") \
+ _ (0x1E, CHAIN, \
+ "For an odd numbered counter, increment when an overflow occurs on" \
+ "the preceding even-numbered counter on the same PE") \
+ _ (0x0B, CID_WRITE_RETIRED, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "write to CONTEXTIDR") \
+ _ (0x11, CPU_CYCLES, "Cycle counter") \
+ _ (0x34, DTLB_WALK, \
+ "Access to data or unified TLB causes a translation table walk") \
+ _ (0x0A, EXC_RETURN, \
+ "Exception return instruction architecturally executed and the " \
+ "condition code check pass") \
+ _ (0x09, EXC_TAKEN, "Exception entry") \
+ _ (0x08, INST_RETIRED, "Instruction architecturally executed") \
+ _ (0x1B, INST_SPEC, "Operation Speculatively executed") \
+ _ (0x35, ITLB_WALK, \
+ "Access to instruction TLB that causes a translation table walk") \
+ _ (0x04, L1D_CACHE, "Level 1 data cache access") \
+ _ (0x1F, L1D_CACHE_ALLOCATE, \
+ "Level 1 data cache allocation without refill") \
+ _ (0x39, L1D_CACHE_LMISS_RD, "Level 1 data cache long-latency read miss") \
+ _ (0x03, L1D_CACHE_REFILL, "Level 1 data cache refill") \
+ _ (0x15, L1D_CACHE_WB, "Attributable Level 1 data cache write-back") \
+ _ (0x25, L1D_TLB, "Level 1 data or unified TLB access") \
+ _ (0x05, L1D_TLB_REFILL, "Level 1 data or unified TLB refill") \
+ _ (0x14, L1I_CACHE, "Level 1 instruction cache access") \
+ _ (0x01, L1I_CACHE_REFILL, "Level 1 instruction cache refill") \
+ _ (0x26, L1I_TLB, "Level 1 instruction TLB access") \
+ _ (0x02, L1I_TLB_REFILL, "Level 1 instruction TLB refill") \
+ _ (0x16, L2D_CACHE, "Level 2 data cache access") \
+ _ (0x20, L2D_CACHE_ALLOCATE, \
+ "Level 2 data cache allocation without refill") \
+ _ (0x17, L2D_CACHE_REFILL, "Level 2 data cache refill") \
+ _ (0x18, L2D_CACHE_WB, "Attributable Level 2 data cache write-back") \
+ _ (0x2F, L2D_TLB, "Level 2 data or unified TLB access") \
+ _ (0x2D, L2D_TLB_REFILL, "Level 2 data or unified TLB refill") \
+ _ (0x27, L2I_CACHE, "Level 2 instruction cache access") \
+ _ (0x28, L2I_CACHE_REFILL, "Attributable Level 2 instruction cache refill") \
+ _ (0x30, L2I_TLB, "Level 2 instruction TLB access") \
+ _ (0x2E, L2I_TLB_REFILL, "Level 2 instruction TLB refill") \
+ _ (0x2B, L3D_CACHE, "Level 3 data cache access") \
+ _ (0x29, L3D_CACHE_ALLOCATE, \
+ "Level 3 data cache allocation without refill") \
+ _ (0x2A, L3D_CACHE_REFILL, "Attributable Level 3 data cache refill") \
+ _ (0x2C, L3D_CACHE_WB, "Attributable Level 3 data cache write-back") \
+ _ (0x06, LD_RETIRED, \
+ "Memory-reading instruction architecturally executed and condition" \
+ " code check pass") \
+ _ (0x32, LL_CACHE, "Last Level cache access") \
+ _ (0x33, LL_CACHE_MISS, "Last Level cache miss") \
+ _ (0x37, LL_CACHE_MISS_RD, "Last level cache miss, read") \
+ _ (0x36, LL_CACHE_RD, "Last level data cache access, read") \
+ _ (0x1A, MEMORY_ERROR, "Local memory error") \
+ _ (0x13, MEM_ACCESS, "Data memory access") \
+ _ (0x3A, OP_RETIRED, "Micro-operation architecturally executed") \
+ _ (0x3B, OP_SPEC, "Micro-operation Speculatively executed") \
+ _ (0x0C, PC_WRITE_RETIRED, \
+ "Software change to the Program Counter (PC). Instruction is " \
+ "architecturally executed and condition code check pass") \
+ _ (0x31, REMOTE_ACCESS, \
+ "Access to another socket in a multi-socket system") \
+ _ (0x38, REMOTE_ACCESS_RD, \
+ "Access to another socket in a multi-socket system, read") \
+ _ (0x3C, STALL, "No operation sent for execution") \
+ _ (0x24, STALL_BACKEND, "No operation issued due to the backend") \
+ _ (0x23, STALL_FRONTEND, "No operation issued due to the frontend") \
+ _ (0x3F, STALL_SLOT, "No operation sent for execution on a Slot") \
+ _ (0x3D, STALL_SLOT_BACKEND, \
+ "No operation sent for execution on a Slot due to the backend") \
+ _ (0x3E, STALL_SLOT_FRONTEND, \
+ "No operation sent for execution on a Slot due to the frontend") \
+ _ (0x07, ST_RETIRED, \
+ "Memory-writing instruction architecturally executed and condition" \
+ " code check pass") \
+ _ (0x00, SW_INCR, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "software increment") \
+ _ (0x1C, TTBR_WRITE_RETIRED, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "write to TTBR") \
+ _ (0x0F, UNALIGNED_LDST_RETIRED, \
+ "Unaligned memory memory-reading or memory-writing instruction " \
+ "architecturally executed and condition code check pass")
+
+typedef enum
+{
+#define _(event, n, desc) ARMV8_PMUV3_##n,
+ foreach_perf_arm_event
+#undef _
+ ARM_N_EVENTS,
+} perf_arm_event_t;
+
+#endif