diff options
Diffstat (limited to 'src/plugins/perfmon/arm')
-rw-r--r-- | src/plugins/perfmon/arm/dispatch_wrapper.c | 142 | ||||
-rw-r--r-- | src/plugins/perfmon/arm/dispatch_wrapper.h | 18 | ||||
-rw-r--r-- | src/plugins/perfmon/arm/events.c | 227 | ||||
-rw-r--r-- | src/plugins/perfmon/arm/events.h | 130 |
4 files changed, 517 insertions, 0 deletions
diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.c b/src/plugins/perfmon/arm/dispatch_wrapper.c new file mode 100644 index 00000000000..df79bcd8631 --- /dev/null +++ b/src/plugins/perfmon/arm/dispatch_wrapper.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2022 Arm and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +#include <linux/limits.h> +#include <sys/ioctl.h> + +#include <perfmon/perfmon.h> + +#define barrier() asm volatile("dmb ish" : : : "memory"); + +typedef int64_t s64; + +static_always_inline u64 +get_pmc_register (u32 pmc_idx) +{ + u64 value = 0; + if (pmc_idx == 31) + /* i.e. CPU Cycle event code 0x11 - need to read via pmccntr_el0 */ + asm volatile("mrs %x0, pmccntr_el0" : "=r"(value)); + else + { + /* set event register 0x0-0x1F */ + asm volatile("msr pmselr_el0, %x0" : : "r"((pmc_idx))); + /* get register value */ + asm volatile("mrs %x0, pmxevcntr_el0" : "=r"(value)); + } + asm volatile("isb" : : : "memory"); + return value; +} + +static_always_inline u64 +read_pmc_from_mmap (struct perf_event_mmap_page *pc) +{ + u32 seq, idx, width; + u64 offset = 0; + s64 pmc = 0; + + do + { + seq = pc->lock; + barrier (); + idx = pc->index; + offset = pc->offset; + if (pc->cap_user_rdpmc && idx) + { + width = pc->pmc_width; + pmc = get_pmc_register (idx - 1); + /* for 32 bit regs, left shift 32b to zero/discard the top bits */ + pmc <<= 64 - width; + pmc >>= 64 - width; + } + barrier (); + } + while (pc->lock != seq); + + return pmc + offset; +} + +static_always_inline void +perfmon_read_pmcs (u64 *counters, perfmon_thread_runtime_t *rt, u8 n_counters) +{ + switch (n_counters) + { + default: + case 7: + counters[6] = read_pmc_from_mmap (rt->mmap_pages[6]); + case 6: + counters[5] = read_pmc_from_mmap (rt->mmap_pages[5]); + case 5: + counters[4] = read_pmc_from_mmap (rt->mmap_pages[4]); + case 4: + counters[3] = read_pmc_from_mmap (rt->mmap_pages[3]); + case 3: + counters[2] = read_pmc_from_mmap (rt->mmap_pages[2]); + case 2: + counters[1] = read_pmc_from_mmap (rt->mmap_pages[1]); + case 1: + counters[0] = read_pmc_from_mmap (rt->mmap_pages[0]); + break; + } +} + +uword +perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + perfmon_main_t *pm = &perfmon_main; + perfmon_thread_runtime_t *rt = + vec_elt_at_index (pm->thread_runtimes, vm->thread_index); + perfmon_node_stats_t *s = + vec_elt_at_index (rt->node_stats, node->node_index); + u8 n_events = rt->n_events; + u64 before[n_events]; + u64 after[n_events]; + + uword rv; + + clib_prefetch_load (s); + + perfmon_read_pmcs (before, rt, n_events); + rv = node->function (vm, node, frame); + perfmon_read_pmcs (after, rt, n_events); + + if (rv == 0) + return rv; + + s->n_calls += 1; + s->n_packets += rv; + + for (int i = 0; i < n_events; i++) + { + s->value[i] += after[i] - before[i]; + } + + return rv; +} + +clib_error_t * +arm_config_dispatch_wrapper (perfmon_bundle_t *b, + vlib_node_function_t **dispatch_wrapper) +{ + (*dispatch_wrapper) = perfmon_dispatch_wrapper; + return 0; +} diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.h b/src/plugins/perfmon/arm/dispatch_wrapper.h new file mode 100644 index 00000000000..903971f8b5e --- /dev/null +++ b/src/plugins/perfmon/arm/dispatch_wrapper.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2022 Arm and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +clib_error_t * +arm_config_dispatch_wrapper (perfmon_bundle_t *b, + vlib_node_function_t **dispatch_wrapper); diff --git a/src/plugins/perfmon/arm/events.c b/src/plugins/perfmon/arm/events.c new file mode 100644 index 00000000000..bf73ad6e896 --- /dev/null +++ b/src/plugins/perfmon/arm/events.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2022 Arm and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vppinfra/linux/sysfs.h> +#include <perfmon/perfmon.h> +#include <perfmon/arm/events.h> +#include <perfmon/arm/dispatch_wrapper.h> +#include <linux/perf_event.h> +#include <dirent.h> + +VLIB_REGISTER_LOG_CLASS (if_default_log, static) = { + .class_name = "perfmon", +}; + +#define log_debug(fmt, ...) \ + vlib_log_debug (if_default_log.class, fmt, __VA_ARGS__) +#define log_warn(fmt, ...) \ + vlib_log_warn (if_default_log.class, fmt, __VA_ARGS__) +#define log_err(fmt, ...) vlib_log_err (if_default_log.class, fmt, __VA_ARGS__) + +/* + * config1 = 2 : user access enabled and always 32-bit + * config1 = 3 : user access enabled and always 64-bit + * + * Since there is no discovery into whether 64b counters are supported + * or not, first attempt to request 64b counters, then fall back to + * 32b if perf_event_open returns EOPNOTSUPP + */ +static perfmon_event_t events[] = { +#define _(event, n, desc) \ + [ARMV8_PMUV3_##n] = { \ + .type = PERF_TYPE_RAW, \ + .config = event, \ + .config1 = 3, \ + .name = #n, \ + .description = desc, \ + .exclude_kernel = 1, \ + }, + foreach_perf_arm_event +#undef _ +}; + +u8 * +format_arm_config (u8 *s, va_list *args) +{ + u64 config = va_arg (*args, u64); + + s = format (s, "event=0x%02x", config & 0xff); + + return s; +} + +static clib_error_t * +arm_init (vlib_main_t *vm, perfmon_source_t *src) +{ + clib_error_t *err; + + /* + check /proc/sys/kernel/perf_user_access flag to check if userspace + access to perf counters is enabled (disabled by default) + - if this file doesn't exist, we are on an unsupported kernel ver + - if the file exists and is 0, user access needs to be granted + with 'sudo sysctl kernel/perf_user_access=1' + */ + u8 perf_user_access_enabled; + char *path = "/proc/sys/kernel/perf_user_access"; + err = clib_sysfs_read (path, "%u", &perf_user_access_enabled); + if (err) + { + if (err->code == ENOENT) /* No such file or directory */ + { + return clib_error_create ( + "linux kernel version is unsupported, please upgrade to v5.17+ " + "- user access to perf counters is not possible"); + } + return clib_error_return_unix (0, "failed to read: %s", path); + } + + if (perf_user_access_enabled == 1) + log_debug ("user access to perf counters is enabled in %s", path); + else + { + return clib_error_create ( + "user access to perf counters is not enabled: run" + " \'sudo sysctl kernel/perf_user_access=1\'"); + } + + /* + perfmon/arm/events.h has up to 0xFF/256 possible PMUv3 event codes + supported - create a bitmap to store whether each event is + implemented or not + */ + uword *bitmap = NULL; + clib_bitmap_alloc (bitmap, 256); + + struct dirent *dir_entry; + const char *event_path = + "/sys/bus/event_source/devices/armv8_pmuv3_0/events"; + DIR *event_dir = opendir (event_path); + + if (event_dir == NULL) + { + err = + clib_error_return_unix (0, "error listing directory: %s", event_path); + log_err ("%U", format_clib_error, err); + return err; + } + + while ((dir_entry = readdir (event_dir)) != NULL) + { + if (dir_entry->d_name[0] != '.') + { + u8 *s = NULL; + u8 *tmpstr = NULL; + unformat_input_t input; + u32 config; + + s = format (s, "%s/%s%c", event_path, dir_entry->d_name, 0); + err = clib_sysfs_read ((char *) s, "%s", &tmpstr); + if (err) + { + log_err ("%U", format_clib_error, err); + continue; + } + unformat_init_vector (&input, tmpstr); + if (unformat (&input, "event=0x%x", &config)) + { + /* it's possible to have have event codes up to 0xFFFF */ + if (config < 0xFF) /* perfmon supports < 0xFF */ + { + clib_bitmap_set (bitmap, config, 1); + } + log_debug ("found supported event in sysfs: %s \'%s\' 0x%x", + dir_entry->d_name, tmpstr, config); + } + else + { + err = clib_error_create ("error parsing event: %s %s", + dir_entry->d_name, tmpstr); + log_err ("%U", format_clib_error, err); + continue; + } + } + } + closedir (event_dir); + + for (int i = 0; i < ARRAY_LEN (events); i++) + { + if (clib_bitmap_get (bitmap, events[i].config)) + events[i].implemented = 1; + } + clib_bitmap_free (bitmap); + + return 0; +} + +u8 +arm_bundle_supported (perfmon_bundle_t *b) +{ + clib_bitmap_alloc (b->event_disabled, b->n_events); + for (u32 i = 0; i < b->n_events; i++) + { + perfmon_event_t *e = b->src->events + b->events[i]; + if (!e->implemented) + { + log_debug ( + "bundle \'%s\': perf event %s is not implemented on this CPU", + b->name, e->name); + clib_bitmap_set (b->event_disabled, i, 1); + } + } + + /* if no events are implemented, fail and do not register bundle */ + if (clib_bitmap_count_set_bits (b->event_disabled) == b->n_events) + { + return 0; + } + + /* disable columns that use unimplemented events */ + clib_bitmap_alloc (b->column_disabled, b->n_columns); + if (b->column_events) + { + u32 disabled_event; + /* iterate through set bits */ + clib_bitmap_foreach (disabled_event, b->event_disabled) + { + for (u32 j = 0; j < b->n_columns; j++) + { + if (clib_bitmap_get (b->column_disabled, j)) + continue; + if (GET_BIT (b->column_events[j], disabled_event)) + { + clib_bitmap_set (b->column_disabled, j, 1); + log_debug ( + "bundle \'%s\': disabling column %d as event unsupported", + b->name, j); + } + } + } + } + + return 1; +} + +PERFMON_REGISTER_SOURCE (arm) = { + .name = "arm", + .description = "Arm PMU events", + .events = events, + .n_events = ARRAY_LEN (events), + .init_fn = arm_init, + .format_config = format_arm_config, + .bundle_support = arm_bundle_supported, + .config_dispatch_wrapper = arm_config_dispatch_wrapper, +}; diff --git a/src/plugins/perfmon/arm/events.h b/src/plugins/perfmon/arm/events.h new file mode 100644 index 00000000000..5b7c49801d0 --- /dev/null +++ b/src/plugins/perfmon/arm/events.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2022 Arm and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __perfmon_arm_h +#define __perfmon_arm_h + +/* + * Events from the Armv8 PMUv3 - See "Arm Architecture Reference Manual Armv8, + * for Armv8-A architecture profile" D7.10 PMU events and event numbers: + * https://developer.arm.com/documentation/ddi0487/latest/ + * EventCode, name, description + */ +#define foreach_perf_arm_event \ + _ (0x0D, BR_IMMED_RETIRED, "Immediate branch architecturally executed") \ + _ (0x10, BR_MIS_PRED, \ + "Mispredicted or not predicted branch Speculatively executed") \ + _ (0x22, BR_MIS_PRED_RETIRED, \ + "Instruction architecturally executed, mispredicted branch") \ + _ (0x12, BR_PRED, "Predictable branch Speculatively executed") \ + _ (0x21, BR_RETIRED, "Branch instruction architecturally executed") \ + _ (0x0E, BR_RETURN_RETIRED, \ + "Function return instruction architecturally executed and the " \ + "condition code check pass") \ + _ (0x19, BUS_ACCESS, "Attributable Bus access") \ + _ (0x1D, BUS_CYCLES, "Bus cycle") \ + _ (0x1E, CHAIN, \ + "For an odd numbered counter, increment when an overflow occurs on" \ + "the preceding even-numbered counter on the same PE") \ + _ (0x0B, CID_WRITE_RETIRED, \ + "Instruction architecturally executed, Condition code check pass, " \ + "write to CONTEXTIDR") \ + _ (0x11, CPU_CYCLES, "Cycle counter") \ + _ (0x34, DTLB_WALK, \ + "Access to data or unified TLB causes a translation table walk") \ + _ (0x0A, EXC_RETURN, \ + "Exception return instruction architecturally executed and the " \ + "condition code check pass") \ + _ (0x09, EXC_TAKEN, "Exception entry") \ + _ (0x08, INST_RETIRED, "Instruction architecturally executed") \ + _ (0x1B, INST_SPEC, "Operation Speculatively executed") \ + _ (0x35, ITLB_WALK, \ + "Access to instruction TLB that causes a translation table walk") \ + _ (0x04, L1D_CACHE, "Level 1 data cache access") \ + _ (0x1F, L1D_CACHE_ALLOCATE, \ + "Level 1 data cache allocation without refill") \ + _ (0x39, L1D_CACHE_LMISS_RD, "Level 1 data cache long-latency read miss") \ + _ (0x03, L1D_CACHE_REFILL, "Level 1 data cache refill") \ + _ (0x15, L1D_CACHE_WB, "Attributable Level 1 data cache write-back") \ + _ (0x25, L1D_TLB, "Level 1 data or unified TLB access") \ + _ (0x05, L1D_TLB_REFILL, "Level 1 data or unified TLB refill") \ + _ (0x14, L1I_CACHE, "Level 1 instruction cache access") \ + _ (0x01, L1I_CACHE_REFILL, "Level 1 instruction cache refill") \ + _ (0x26, L1I_TLB, "Level 1 instruction TLB access") \ + _ (0x02, L1I_TLB_REFILL, "Level 1 instruction TLB refill") \ + _ (0x16, L2D_CACHE, "Level 2 data cache access") \ + _ (0x20, L2D_CACHE_ALLOCATE, \ + "Level 2 data cache allocation without refill") \ + _ (0x17, L2D_CACHE_REFILL, "Level 2 data cache refill") \ + _ (0x18, L2D_CACHE_WB, "Attributable Level 2 data cache write-back") \ + _ (0x2F, L2D_TLB, "Level 2 data or unified TLB access") \ + _ (0x2D, L2D_TLB_REFILL, "Level 2 data or unified TLB refill") \ + _ (0x27, L2I_CACHE, "Level 2 instruction cache access") \ + _ (0x28, L2I_CACHE_REFILL, "Attributable Level 2 instruction cache refill") \ + _ (0x30, L2I_TLB, "Level 2 instruction TLB access") \ + _ (0x2E, L2I_TLB_REFILL, "Level 2 instruction TLB refill") \ + _ (0x2B, L3D_CACHE, "Level 3 data cache access") \ + _ (0x29, L3D_CACHE_ALLOCATE, \ + "Level 3 data cache allocation without refill") \ + _ (0x2A, L3D_CACHE_REFILL, "Attributable Level 3 data cache refill") \ + _ (0x2C, L3D_CACHE_WB, "Attributable Level 3 data cache write-back") \ + _ (0x06, LD_RETIRED, \ + "Memory-reading instruction architecturally executed and condition" \ + " code check pass") \ + _ (0x32, LL_CACHE, "Last Level cache access") \ + _ (0x33, LL_CACHE_MISS, "Last Level cache miss") \ + _ (0x37, LL_CACHE_MISS_RD, "Last level cache miss, read") \ + _ (0x36, LL_CACHE_RD, "Last level data cache access, read") \ + _ (0x1A, MEMORY_ERROR, "Local memory error") \ + _ (0x13, MEM_ACCESS, "Data memory access") \ + _ (0x3A, OP_RETIRED, "Micro-operation architecturally executed") \ + _ (0x3B, OP_SPEC, "Micro-operation Speculatively executed") \ + _ (0x0C, PC_WRITE_RETIRED, \ + "Software change to the Program Counter (PC). Instruction is " \ + "architecturally executed and condition code check pass") \ + _ (0x31, REMOTE_ACCESS, \ + "Access to another socket in a multi-socket system") \ + _ (0x38, REMOTE_ACCESS_RD, \ + "Access to another socket in a multi-socket system, read") \ + _ (0x3C, STALL, "No operation sent for execution") \ + _ (0x24, STALL_BACKEND, "No operation issued due to the backend") \ + _ (0x23, STALL_FRONTEND, "No operation issued due to the frontend") \ + _ (0x3F, STALL_SLOT, "No operation sent for execution on a Slot") \ + _ (0x3D, STALL_SLOT_BACKEND, \ + "No operation sent for execution on a Slot due to the backend") \ + _ (0x3E, STALL_SLOT_FRONTEND, \ + "No operation sent for execution on a Slot due to the frontend") \ + _ (0x07, ST_RETIRED, \ + "Memory-writing instruction architecturally executed and condition" \ + " code check pass") \ + _ (0x00, SW_INCR, \ + "Instruction architecturally executed, Condition code check pass, " \ + "software increment") \ + _ (0x1C, TTBR_WRITE_RETIRED, \ + "Instruction architecturally executed, Condition code check pass, " \ + "write to TTBR") \ + _ (0x0F, UNALIGNED_LDST_RETIRED, \ + "Unaligned memory memory-reading or memory-writing instruction " \ + "architecturally executed and condition code check pass") + +typedef enum +{ +#define _(event, n, desc) ARMV8_PMUV3_##n, + foreach_perf_arm_event +#undef _ + ARM_N_EVENTS, +} perf_arm_event_t; + +#endif |