diff options
author | Zachary Leaf <zachary.leaf@arm.com> | 2022-05-12 02:26:00 -0500 |
---|---|---|
committer | Damjan Marion <dmarion@0xa5.net> | 2022-07-12 15:29:23 +0000 |
commit | 268d7be66b8b48a230e06de645e3a8b7de29d93c (patch) | |
tree | 098d15e0c3f6fa7864c3f581367d8d5bb80495fa | |
parent | c7d43a5eb19f2acab900274432cfd0e136d6cb44 (diff) |
perfmon: enable perfmon plugin for Arm
This patch enables statistics from the Arm PMUv3 through the perfmon
plugin.
In comparison to using the Linux "perf" tool, it allows obtaining
direct, per node level statistics (rather than per thread). By accessing
the PMU counter registers directly from userspace, we can avoid the
overhead of using a read() system call and get more accurate and fine
grained statistics about the running of individual nodes.
A demo of perfmon on Arm can be found at:
https://asciinema.org/a/egVNN1OF7JEKHYmfl5bpDYxfF
*Important Note*
Perfmon on Arm is dependent on and works only on Linux kernel versions
of v5.17+ as this is when userspace access to Arm perf counters was
included.
On most Arm systems, a maximum of 7 PMU events can be configured at once
- (6x PMU events + 1x CPU_CYCLE counter). If some perf counters are in
use elsewhere by other applications, and there are insufficient counters
remaining to open the bundle, the perf_event_open call will fail
(provided the events are grouped with the group_fd param, which perfmon
currently utilises).
See arm/events.h for a list of PMUv3 events available, although it is
implementation defined whether most events are implemented or not. Only
a small set of 7 events is required to be implemented in Armv8.0, with
some additional events required in later versions. As such, depending on
the implementation, some statistics may not be available. See Arm
Architecture Reference Manual for Armv8-A, D7.10.2 "The PMU event number
space and common events" for more information.
arm/events.c:arm_init() gets information from the sysfs about what
events are implemented on a particular CPU at runtime. Arm's
implementation of the perfmon source callback .bundle_support uses this
information to disable unsupported events in a bundle, or in the case
no events are supported, disable the entire bundle.
Where a particular event in a bundle is not implemented, the statistic
for that event is shown as '-' in the 'show perfmon statistics' cli
output, by disabling the column.
There is additional code in perfmon.c to only open events which are
marked as implemented. Since we're only opening and reading events that
are implemented, some extra logic is required in cli.c to re-align
either perfmon_node_stats_t or perfmon_reading_t with the column
headings configured in each bundle, taking into account disabled
columns.
Userspace access to perf counters is disabled by default, and needs to
be enabled with 'sudo sysctl kernel/perf_user_access=1'.
There is a check built into the Arm event source init function
(arm/events.c:arm_init) to check that userspace reading of perf counters
is enabled in the /proc/sys/kernel/perf_user_access file.
If the above file does not exist, it means the kernel version is
unsupported. Users without a supported kernel will see a warning
message, and no Arm bundles will be registered to use in perfmon.
Enabling/using plugin:
- include the following in startup.conf:
- plugins { plugin perfmon_plugin.so { enable }
- 'show perfmon bundle [verbose]' - show available statistics bundles
- 'perfmon start bundle <bundle-name>' - enable and start logging
- 'perfmon stop' - stop logging
- 'show perfmon statistics' - show output
For a general guide on using and understanding Arm PMUv3 events, see
https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/arm-neoverse-n1-performance-analysis-methodology
Type: feature
Signed-off-by: Zachary Leaf <zachary.leaf@arm.com>
Tested-by: Jieqiang Wang <jieqiang.wang@arm.com>
Change-Id: I0620fe5b1bbe78842dfb1d0b6a060bb99e777651
-rw-r--r-- | src/plugins/perfmon/CMakeLists.txt | 51 | ||||
-rw-r--r-- | src/plugins/perfmon/arm/dispatch_wrapper.c | 142 | ||||
-rw-r--r-- | src/plugins/perfmon/arm/dispatch_wrapper.h | 18 | ||||
-rw-r--r-- | src/plugins/perfmon/arm/events.c | 227 | ||||
-rw-r--r-- | src/plugins/perfmon/arm/events.h | 130 | ||||
-rw-r--r-- | src/plugins/perfmon/cli.c | 73 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/core.c | 1 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/uncore.c | 1 | ||||
-rw-r--r-- | src/plugins/perfmon/linux.c | 7 | ||||
-rw-r--r-- | src/plugins/perfmon/perfmon.c | 27 | ||||
-rw-r--r-- | src/plugins/perfmon/perfmon.h | 12 | ||||
-rw-r--r-- | src/vppinfra/bitops.h | 3 | ||||
-rw-r--r-- | src/vppinfra/cpu.h | 4 |
13 files changed, 659 insertions, 37 deletions
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt index 59eddf01145..d7d4f372da1 100644 --- a/src/plugins/perfmon/CMakeLists.txt +++ b/src/plugins/perfmon/CMakeLists.txt @@ -11,30 +11,39 @@ # See the License for the specific language governing permissions and # limitations under the License. -if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") +if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*|aarch64.*") return() endif() -list(APPEND ARCH_PMU_SOURCES - intel/dispatch_wrapper.c - intel/core.c - intel/uncore.c - intel/bundle/backend_bound_core.c - intel/bundle/backend_bound_mem.c - intel/bundle/branch_mispred.c - intel/bundle/cache_hit_miss.c - intel/bundle/frontend_bound_bw_src.c - intel/bundle/frontend_bound_bw_uops.c - intel/bundle/frontend_bound_lat.c - intel/bundle/iio_bw.c - intel/bundle/inst_and_clock.c - intel/bundle/load_blocks.c - intel/bundle/mem_bw.c - intel/bundle/power_license.c - intel/bundle/topdown_icelake.c - intel/bundle/topdown_metrics.c - intel/bundle/topdown_tremont.c -) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") + list(APPEND ARCH_PMU_SOURCES + intel/dispatch_wrapper.c + intel/core.c + intel/uncore.c + intel/bundle/backend_bound_core.c + intel/bundle/backend_bound_mem.c + intel/bundle/branch_mispred.c + intel/bundle/cache_hit_miss.c + intel/bundle/frontend_bound_bw_src.c + intel/bundle/frontend_bound_bw_uops.c + intel/bundle/frontend_bound_lat.c + intel/bundle/iio_bw.c + intel/bundle/inst_and_clock.c + intel/bundle/load_blocks.c + intel/bundle/mem_bw.c + intel/bundle/power_license.c + intel/bundle/topdown_icelake.c + intel/bundle/topdown_metrics.c + intel/bundle/topdown_tremont.c + ) +endif() + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64.*") + list(APPEND ARCH_PMU_SOURCES + arm/dispatch_wrapper.c + arm/events.c + ) +endif() add_vpp_plugin(perfmon SOURCES diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.c b/src/plugins/perfmon/arm/dispatch_wrapper.c new file mode 100644 index 00000000000..df79bcd8631 --- /dev/null +++ b/src/plugins/perfmon/arm/dispatch_wrapper.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2022 Arm and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +#include <linux/limits.h> +#include <sys/ioctl.h> + +#include <perfmon/perfmon.h> + +#define barrier() asm volatile("dmb ish" : : : "memory"); + +typedef int64_t s64; + +static_always_inline u64 +get_pmc_register (u32 pmc_idx) +{ + u64 value = 0; + if (pmc_idx == 31) + /* i.e. CPU Cycle event code 0x11 - need to read via pmccntr_el0 */ + asm volatile("mrs %x0, pmccntr_el0" : "=r"(value)); + else + { + /* set event register 0x0-0x1F */ + asm volatile("msr pmselr_el0, %x0" : : "r"((pmc_idx))); + /* get register value */ + asm volatile("mrs %x0, pmxevcntr_el0" : "=r"(value)); + } + asm volatile("isb" : : : "memory"); + return value; +} + +static_always_inline u64 +read_pmc_from_mmap (struct perf_event_mmap_page *pc) +{ + u32 seq, idx, width; + u64 offset = 0; + s64 pmc = 0; + + do + { + seq = pc->lock; + barrier (); + idx = pc->index; + offset = pc->offset; + if (pc->cap_user_rdpmc && idx) + { + width = pc->pmc_width; + pmc = get_pmc_register (idx - 1); + /* for 32 bit regs, left shift 32b to zero/discard the top bits */ + pmc <<= 64 - width; + pmc >>= 64 - width; + } + barrier (); + } + while (pc->lock != seq); + + return pmc + offset; +} + +static_always_inline void +perfmon_read_pmcs (u64 *counters, perfmon_thread_runtime_t *rt, u8 n_counters) +{ + switch (n_counters) + { + default: + case 7: + counters[6] = read_pmc_from_mmap (rt->mmap_pages[6]); + case 6: + counters[5] = read_pmc_from_mmap (rt->mmap_pages[5]); + case 5: + counters[4] = read_pmc_from_mmap (rt->mmap_pages[4]); + case 4: + counters[3] = read_pmc_from_mmap (rt->mmap_pages[3]); + case 3: + counters[2] = read_pmc_from_mmap (rt->mmap_pages[2]); + case 2: + counters[1] = read_pmc_from_mmap (rt->mmap_pages[1]); + case 1: + counters[0] = read_pmc_from_mmap (rt->mmap_pages[0]); + break; + } +} + +uword +perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + perfmon_main_t *pm = &perfmon_main; + perfmon_thread_runtime_t *rt = + vec_elt_at_index (pm->thread_runtimes, vm->thread_index); + perfmon_node_stats_t *s = + vec_elt_at_index (rt->node_stats, node->node_index); + u8 n_events = rt->n_events; + u64 before[n_events]; + u64 after[n_events]; + + uword rv; + + clib_prefetch_load (s); + + perfmon_read_pmcs (before, rt, n_events); + rv = node->function (vm, node, frame); + perfmon_read_pmcs (after, rt, n_events); + + if (rv == 0) + return rv; + + s->n_calls += 1; + s->n_packets += rv; + + for (int i = 0; i < n_events; i++) + { + s->value[i] += after[i] - before[i]; + } + + return rv; +} + +clib_error_t * +arm_config_dispatch_wrapper (perfmon_bundle_t *b, + vlib_node_function_t **dispatch_wrapper) +{ + (*dispatch_wrapper) = perfmon_dispatch_wrapper; + return 0; +} diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.h b/src/plugins/perfmon/arm/dispatch_wrapper.h new file mode 100644 index 00000000000..903971f8b5e --- /dev/null +++ b/src/plugins/perfmon/arm/dispatch_wrapper.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2022 Arm and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +clib_error_t * +arm_config_dispatch_wrapper (perfmon_bundle_t *b, + vlib_node_function_t **dispatch_wrapper); diff --git a/src/plugins/perfmon/arm/events.c b/src/plugins/perfmon/arm/events.c new file mode 100644 index 00000000000..bf73ad6e896 --- /dev/null +++ b/src/plugins/perfmon/arm/events.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2022 Arm and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vppinfra/linux/sysfs.h> +#include <perfmon/perfmon.h> +#include <perfmon/arm/events.h> +#include <perfmon/arm/dispatch_wrapper.h> +#include <linux/perf_event.h> +#include <dirent.h> + +VLIB_REGISTER_LOG_CLASS (if_default_log, static) = { + .class_name = "perfmon", +}; + +#define log_debug(fmt, ...) \ + vlib_log_debug (if_default_log.class, fmt, __VA_ARGS__) +#define log_warn(fmt, ...) \ + vlib_log_warn (if_default_log.class, fmt, __VA_ARGS__) +#define log_err(fmt, ...) vlib_log_err (if_default_log.class, fmt, __VA_ARGS__) + +/* + * config1 = 2 : user access enabled and always 32-bit + * config1 = 3 : user access enabled and always 64-bit + * + * Since there is no discovery into whether 64b counters are supported + * or not, first attempt to request 64b counters, then fall back to + * 32b if perf_event_open returns EOPNOTSUPP + */ +static perfmon_event_t events[] = { +#define _(event, n, desc) \ + [ARMV8_PMUV3_##n] = { \ + .type = PERF_TYPE_RAW, \ + .config = event, \ + .config1 = 3, \ + .name = #n, \ + .description = desc, \ + .exclude_kernel = 1, \ + }, + foreach_perf_arm_event +#undef _ +}; + +u8 * +format_arm_config (u8 *s, va_list *args) +{ + u64 config = va_arg (*args, u64); + + s = format (s, "event=0x%02x", config & 0xff); + + return s; +} + +static clib_error_t * +arm_init (vlib_main_t *vm, perfmon_source_t *src) +{ + clib_error_t *err; + + /* + check /proc/sys/kernel/perf_user_access flag to check if userspace + access to perf counters is enabled (disabled by default) + - if this file doesn't exist, we are on an unsupported kernel ver + - if the file exists and is 0, user access needs to be granted + with 'sudo sysctl kernel/perf_user_access=1' + */ + u8 perf_user_access_enabled; + char *path = "/proc/sys/kernel/perf_user_access"; + err = clib_sysfs_read (path, "%u", &perf_user_access_enabled); + if (err) + { + if (err->code == ENOENT) /* No such file or directory */ + { + return clib_error_create ( + "linux kernel version is unsupported, please upgrade to v5.17+ " + "- user access to perf counters is not possible"); + } + return clib_error_return_unix (0, "failed to read: %s", path); + } + + if (perf_user_access_enabled == 1) + log_debug ("user access to perf counters is enabled in %s", path); + else + { + return clib_error_create ( + "user access to perf counters is not enabled: run" + " \'sudo sysctl kernel/perf_user_access=1\'"); + } + + /* + perfmon/arm/events.h has up to 0xFF/256 possible PMUv3 event codes + supported - create a bitmap to store whether each event is + implemented or not + */ + uword *bitmap = NULL; + clib_bitmap_alloc (bitmap, 256); + + struct dirent *dir_entry; + const char *event_path = + "/sys/bus/event_source/devices/armv8_pmuv3_0/events"; + DIR *event_dir = opendir (event_path); + + if (event_dir == NULL) + { + err = + clib_error_return_unix (0, "error listing directory: %s", event_path); + log_err ("%U", format_clib_error, err); + return err; + } + + while ((dir_entry = readdir (event_dir)) != NULL) + { + if (dir_entry->d_name[0] != '.') + { + u8 *s = NULL; + u8 *tmpstr = NULL; + unformat_input_t input; + u32 config; + + s = format (s, "%s/%s%c", event_path, dir_entry->d_name, 0); + err = clib_sysfs_read ((char *) s, "%s", &tmpstr); + if (err) + { + log_err ("%U", format_clib_error, err); + continue; + } + unformat_init_vector (&input, tmpstr); + if (unformat (&input, "event=0x%x", &config)) + { + /* it's possible to have have event codes up to 0xFFFF */ + if (config < 0xFF) /* perfmon supports < 0xFF */ + { + clib_bitmap_set (bitmap, config, 1); + } + log_debug ("found supported event in sysfs: %s \'%s\' 0x%x", + dir_entry->d_name, tmpstr, config); + } + else + { + err = clib_error_create ("error parsing event: %s %s", + dir_entry->d_name, tmpstr); + log_err ("%U", format_clib_error, err); + continue; + } + } + } + closedir (event_dir); + + for (int i = 0; i < ARRAY_LEN (events); i++) + { + if (clib_bitmap_get (bitmap, events[i].config)) + events[i].implemented = 1; + } + clib_bitmap_free (bitmap); + + return 0; +} + +u8 +arm_bundle_supported (perfmon_bundle_t *b) +{ + clib_bitmap_alloc (b->event_disabled, b->n_events); + for (u32 i = 0; i < b->n_events; i++) + { + perfmon_event_t *e = b->src->events + b->events[i]; + if (!e->implemented) + { + log_debug ( + "bundle \'%s\': perf event %s is not implemented on this CPU", + b->name, e->name); + clib_bitmap_set (b->event_disabled, i, 1); + } + } + + /* if no events are implemented, fail and do not register bundle */ + if (clib_bitmap_count_set_bits (b->event_disabled) == b->n_events) + { + return 0; + } + + /* disable columns that use unimplemented events */ + clib_bitmap_alloc (b->column_disabled, b->n_columns); + if (b->column_events) + { + u32 disabled_event; + /* iterate through set bits */ + clib_bitmap_foreach (disabled_event, b->event_disabled) + { + for (u32 j = 0; j < b->n_columns; j++) + { + if (clib_bitmap_get (b->column_disabled, j)) + continue; + if (GET_BIT (b->column_events[j], disabled_event)) + { + clib_bitmap_set (b->column_disabled, j, 1); + log_debug ( + "bundle \'%s\': disabling column %d as event unsupported", + b->name, j); + } + } + } + } + + return 1; +} + +PERFMON_REGISTER_SOURCE (arm) = { + .name = "arm", + .description = "Arm PMU events", + .events = events, + .n_events = ARRAY_LEN (events), + .init_fn = arm_init, + .format_config = format_arm_config, + .bundle_support = arm_bundle_supported, + .config_dispatch_wrapper = arm_config_dispatch_wrapper, +}; diff --git a/src/plugins/perfmon/arm/events.h b/src/plugins/perfmon/arm/events.h new file mode 100644 index 00000000000..5b7c49801d0 --- /dev/null +++ b/src/plugins/perfmon/arm/events.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2022 Arm and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __perfmon_arm_h +#define __perfmon_arm_h + +/* + * Events from the Armv8 PMUv3 - See "Arm Architecture Reference Manual Armv8, + * for Armv8-A architecture profile" D7.10 PMU events and event numbers: + * https://developer.arm.com/documentation/ddi0487/latest/ + * EventCode, name, description + */ +#define foreach_perf_arm_event \ + _ (0x0D, BR_IMMED_RETIRED, "Immediate branch architecturally executed") \ + _ (0x10, BR_MIS_PRED, \ + "Mispredicted or not predicted branch Speculatively executed") \ + _ (0x22, BR_MIS_PRED_RETIRED, \ + "Instruction architecturally executed, mispredicted branch") \ + _ (0x12, BR_PRED, "Predictable branch Speculatively executed") \ + _ (0x21, BR_RETIRED, "Branch instruction architecturally executed") \ + _ (0x0E, BR_RETURN_RETIRED, \ + "Function return instruction architecturally executed and the " \ + "condition code check pass") \ + _ (0x19, BUS_ACCESS, "Attributable Bus access") \ + _ (0x1D, BUS_CYCLES, "Bus cycle") \ + _ (0x1E, CHAIN, \ + "For an odd numbered counter, increment when an overflow occurs on" \ + "the preceding even-numbered counter on the same PE") \ + _ (0x0B, CID_WRITE_RETIRED, \ + "Instruction architecturally executed, Condition code check pass, " \ + "write to CONTEXTIDR") \ + _ (0x11, CPU_CYCLES, "Cycle counter") \ + _ (0x34, DTLB_WALK, \ + "Access to data or unified TLB causes a translation table walk") \ + _ (0x0A, EXC_RETURN, \ + "Exception return instruction architecturally executed and the " \ + "condition code check pass") \ + _ (0x09, EXC_TAKEN, "Exception entry") \ + _ (0x08, INST_RETIRED, "Instruction architecturally executed") \ + _ (0x1B, INST_SPEC, "Operation Speculatively executed") \ + _ (0x35, ITLB_WALK, \ + "Access to instruction TLB that causes a translation table walk") \ + _ (0x04, L1D_CACHE, "Level 1 data cache access") \ + _ (0x1F, L1D_CACHE_ALLOCATE, \ + "Level 1 data cache allocation without refill") \ + _ (0x39, L1D_CACHE_LMISS_RD, "Level 1 data cache long-latency read miss") \ + _ (0x03, L1D_CACHE_REFILL, "Level 1 data cache refill") \ + _ (0x15, L1D_CACHE_WB, "Attributable Level 1 data cache write-back") \ + _ (0x25, L1D_TLB, "Level 1 data or unified TLB access") \ + _ (0x05, L1D_TLB_REFILL, "Level 1 data or unified TLB refill") \ + _ (0x14, L1I_CACHE, "Level 1 instruction cache access") \ + _ (0x01, L1I_CACHE_REFILL, "Level 1 instruction cache refill") \ + _ (0x26, L1I_TLB, "Level 1 instruction TLB access") \ + _ (0x02, L1I_TLB_REFILL, "Level 1 instruction TLB refill") \ + _ (0x16, L2D_CACHE, "Level 2 data cache access") \ + _ (0x20, L2D_CACHE_ALLOCATE, \ + "Level 2 data cache allocation without refill") \ + _ (0x17, L2D_CACHE_REFILL, "Level 2 data cache refill") \ + _ (0x18, L2D_CACHE_WB, "Attributable Level 2 data cache write-back") \ + _ (0x2F, L2D_TLB, "Level 2 data or unified TLB access") \ + _ (0x2D, L2D_TLB_REFILL, "Level 2 data or unified TLB refill") \ + _ (0x27, L2I_CACHE, "Level 2 instruction cache access") \ + _ (0x28, L2I_CACHE_REFILL, "Attributable Level 2 instruction cache refill") \ + _ (0x30, L2I_TLB, "Level 2 instruction TLB access") \ + _ (0x2E, L2I_TLB_REFILL, "Level 2 instruction TLB refill") \ + _ (0x2B, L3D_CACHE, "Level 3 data cache access") \ + _ (0x29, L3D_CACHE_ALLOCATE, \ + "Level 3 data cache allocation without refill") \ + _ (0x2A, L3D_CACHE_REFILL, "Attributable Level 3 data cache refill") \ + _ (0x2C, L3D_CACHE_WB, "Attributable Level 3 data cache write-back") \ + _ (0x06, LD_RETIRED, \ + "Memory-reading instruction architecturally executed and condition" \ + " code check pass") \ + _ (0x32, LL_CACHE, "Last Level cache access") \ + _ (0x33, LL_CACHE_MISS, "Last Level cache miss") \ + _ (0x37, LL_CACHE_MISS_RD, "Last level cache miss, read") \ + _ (0x36, LL_CACHE_RD, "Last level data cache access, read") \ + _ (0x1A, MEMORY_ERROR, "Local memory error") \ + _ (0x13, MEM_ACCESS, "Data memory access") \ + _ (0x3A, OP_RETIRED, "Micro-operation architecturally executed") \ + _ (0x3B, OP_SPEC, "Micro-operation Speculatively executed") \ + _ (0x0C, PC_WRITE_RETIRED, \ + "Software change to the Program Counter (PC). Instruction is " \ + "architecturally executed and condition code check pass") \ + _ (0x31, REMOTE_ACCESS, \ + "Access to another socket in a multi-socket system") \ + _ (0x38, REMOTE_ACCESS_RD, \ + "Access to another socket in a multi-socket system, read") \ + _ (0x3C, STALL, "No operation sent for execution") \ + _ (0x24, STALL_BACKEND, "No operation issued due to the backend") \ + _ (0x23, STALL_FRONTEND, "No operation issued due to the frontend") \ + _ (0x3F, STALL_SLOT, "No operation sent for execution on a Slot") \ + _ (0x3D, STALL_SLOT_BACKEND, \ + "No operation sent for execution on a Slot due to the backend") \ + _ (0x3E, STALL_SLOT_FRONTEND, \ + "No operation sent for execution on a Slot due to the frontend") \ + _ (0x07, ST_RETIRED, \ + "Memory-writing instruction architecturally executed and condition" \ + " code check pass") \ + _ (0x00, SW_INCR, \ + "Instruction architecturally executed, Condition code check pass, " \ + "software increment") \ + _ (0x1C, TTBR_WRITE_RETIRED, \ + "Instruction architecturally executed, Condition code check pass, " \ + "write to TTBR") \ + _ (0x0F, UNALIGNED_LDST_RETIRED, \ + "Unaligned memory memory-reading or memory-writing instruction " \ + "architecturally executed and condition code check pass") + +typedef enum +{ +#define _(event, n, desc) ARMV8_PMUV3_##n, + foreach_perf_arm_event +#undef _ + ARM_N_EVENTS, +} perf_arm_event_t; + +#endif diff --git a/src/plugins/perfmon/cli.c b/src/plugins/perfmon/cli.c index aa5b5636235..0cdc4dba073 100644 --- a/src/plugins/perfmon/cli.c +++ b/src/plugins/perfmon/cli.c @@ -413,18 +413,77 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vm, j, b->active_type); table_set_cell_align (t, col, -1, TTAA_RIGHT); table_set_cell_fg_color (t, col, -1, TTAC_CYAN); - clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns)); + + if (PREDICT_TRUE (clib_bitmap_is_zero (b->event_disabled))) + clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns)); + /* if some events are not implemented, we need to realign these + to display under the correct column headers */ + else + { + perfmon_node_stats_t *tr_ns = tr->node_stats + j; + ns.n_calls = tr_ns->n_calls; + ns.n_packets = tr_ns->n_packets; + /* loop through all events in bundle + manually copy into + the correct place, until we've read all values that are + implemented */ + int num_enabled_events = + b->n_events - + clib_bitmap_count_set_bits (b->event_disabled); + for (int i = 0, k = 0; k < num_enabled_events; i++) + { + if (!clib_bitmap_get (b->event_disabled, i)) + { + ns.value[i] = tr_ns->value[k]; + k++; + } + } + } for (int j = 0; j < n_row; j++) - table_format_cell (t, col, j, "%U", b->format_fn, &ns, j, - b->active_type); + { + if (clib_bitmap_get (b->column_disabled, j)) + table_format_cell (t, col, j, "-"); + else + table_format_cell (t, col, j, "%U", b->format_fn, &ns, j, + b->active_type); + } } } - else + else /* b->type != PERFMON_BUNDLE_TYPE_NODE */ { - for (int j = 0; j < n_row; j++) - table_format_cell (t, i, j, "%U", b->format_fn, r, j, - b->active_type); + if (PREDICT_TRUE (clib_bitmap_is_zero (b->event_disabled))) + { + for (int j = 0; j < n_row; j++) + table_format_cell (t, i, j, "%U", b->format_fn, r, j, + b->active_type); + } + /* similarly for THREAD/SYSTEM bundles, if some events are not + implemented, we need to realign readings under column headings */ + else + { + perfmon_reading_t aligned_r[b->n_events]; + aligned_r->nr = r->nr; + aligned_r->time_enabled = r->time_enabled; + aligned_r->time_running = r->time_running; + int num_enabled_events = + b->n_events - clib_bitmap_count_set_bits (b->event_disabled); + for (int i = 0, k = 0; k < num_enabled_events; i++) + { + if (!clib_bitmap_get (b->event_disabled, i)) + { + aligned_r->value[i] = r->value[k]; + k++; + } + } + for (int j = 0; j < n_row; j++) + { + if (clib_bitmap_get (b->column_disabled, j)) + table_format_cell (t, col, j, "-"); + else + table_format_cell (t, i, j, "%U", b->format_fn, aligned_r, + j, b->active_type); + } + } } col++; } diff --git a/src/plugins/perfmon/intel/core.c b/src/plugins/perfmon/intel/core.c index 5c4c336e2e8..d6a16b2125e 100644 --- a/src/plugins/perfmon/intel/core.c +++ b/src/plugins/perfmon/intel/core.c @@ -26,6 +26,7 @@ static perfmon_event_t events[] = { event, umask, edge, any, inv, cmask), \ .name = #n "." #suffix, \ .description = desc, \ + .implemented = 1, \ .exclude_kernel = 1 }, foreach_perf_intel_core_event foreach_perf_intel_peusdo_event diff --git a/src/plugins/perfmon/intel/uncore.c b/src/plugins/perfmon/intel/uncore.c index ac5580a3e62..a709a8d40be 100644 --- a/src/plugins/perfmon/intel/uncore.c +++ b/src/plugins/perfmon/intel/uncore.c @@ -49,6 +49,7 @@ static perfmon_event_t intel_uncore_events[] = { .description = desc, \ .type_from_instance = 1, \ .instance_type = INTEL_UNCORE_UNIT_##unit, \ + .implemented = 1, \ }, foreach_intel_uncore_event diff --git a/src/plugins/perfmon/linux.c b/src/plugins/perfmon/linux.c index 3715267266a..ef21f2d72fd 100644 --- a/src/plugins/perfmon/linux.c +++ b/src/plugins/perfmon/linux.c @@ -39,7 +39,12 @@ typedef enum static perfmon_event_t events[] = { #define _(n, s) \ - [n] = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##n, .name = s }, + [n] = { \ + .type = PERF_TYPE_SOFTWARE, \ + .config = PERF_COUNT_SW_##n, \ + .name = s, \ + .implemented = 1, \ + }, foreach_perf_sw_counter #undef _ }; diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c index 0643384957e..e618f9b314a 100644 --- a/src/plugins/perfmon/perfmon.c +++ b/src/plugins/perfmon/perfmon.c @@ -141,15 +141,19 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b) vec_validate (pm->group_fds, i); pm->group_fds[i] = -1; + u8 n_events_opened = 0; for (int j = 0; j < b->n_events; j++) { int fd; perfmon_event_t *e = s->events + b->events[j]; + if (!e->implemented) + continue; struct perf_event_attr pe = { .size = sizeof (struct perf_event_attr), .type = e->type_from_instance ? in->type : e->type, .config = e->config, + .config1 = e->config1, .exclude_kernel = e->exclude_kernel, .read_format = (PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED | @@ -157,6 +161,7 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b) .disabled = 1, }; + perf_event_open: log_debug ("perf_event_open pe.type=%u pe.config=0x%x pid=%d " "cpu=%d group_fd=%d", pe.type, pe.config, in->pid, in->cpu, pm->group_fds[i]); @@ -165,8 +170,17 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b) if (fd == -1) { - err = clib_error_return_unix (0, "perf_event_open"); - goto error; + if (errno == + EOPNOTSUPP) /* 64b counters not supported on aarch64 */ + { + pe.config1 = 2; /* retry with 32b counter width */ + goto perf_event_open; + } + else + { + err = clib_error_return_unix (0, "perf_event_open"); + goto error; + } } vec_add1 (pm->fds_to_close, fd); @@ -178,23 +192,24 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b) { perfmon_thread_runtime_t *tr; tr = vec_elt_at_index (pm->thread_runtimes, i); - tr->mmap_pages[j] = + tr->mmap_pages[n_events_opened] = mmap (0, page_size, PROT_READ, MAP_SHARED, fd, 0); - if (tr->mmap_pages[j] == MAP_FAILED) + if (tr->mmap_pages[n_events_opened] == MAP_FAILED) { err = clib_error_return_unix (0, "mmap"); goto error; } } + n_events_opened++; } - if (is_node) + if (is_node && n_events_opened) { perfmon_thread_runtime_t *rt; rt = vec_elt_at_index (pm->thread_runtimes, i); rt->bundle = b; - rt->n_events = b->n_events; + rt->n_events = n_events_opened; rt->n_nodes = n_nodes; rt->preserve_samples = b->preserve_samples; vec_validate_aligned (rt->node_stats, n_nodes - 1, diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h index f08cc7a5dcb..b76cf4b2138 100644 --- a/src/plugins/perfmon/perfmon.h +++ b/src/plugins/perfmon/perfmon.h @@ -25,6 +25,8 @@ #if defined(__x86_64__) #define PERF_MAX_EVENTS 12 /* 4 fixed and 8 programable on ICX */ +#elif defined(__aarch64__) +#define PERF_MAX_EVENTS 7 /* 6 events + 1 CPU cycle counter */ #endif typedef enum @@ -63,6 +65,8 @@ typedef struct { u32 type_from_instance : 1; u32 exclude_kernel : 1; + u32 config1 : 2; + u32 implemented : 1; union { u32 type; @@ -137,6 +141,11 @@ typedef struct perfmon_bundle u32 events[PERF_MAX_EVENTS]; u32 n_events; + u32 n_columns; + + uword *event_disabled; + uword *column_disabled; + u8 *column_events; u16 preserve_samples; @@ -269,4 +278,7 @@ clib_error_t *perfmon_stop (vlib_main_t *vm); #define PERFMON_STRINGS(...) \ (char *[]) { __VA_ARGS__, 0 } +#define PERFMON_COLUMN_EVENTS(...) \ + (u8[]) { __VA_ARGS__ } + #endif diff --git a/src/vppinfra/bitops.h b/src/vppinfra/bitops.h index 15454ca5036..7a4be3ce4c3 100644 --- a/src/vppinfra/bitops.h +++ b/src/vppinfra/bitops.h @@ -38,6 +38,9 @@ #ifndef included_clib_bitops_h #define included_clib_bitops_h +#define SET_BIT(i) (1 << i) +#define GET_BIT(n, i) (n >> i) & 1U + static_always_inline uword clear_lowest_set_bit (uword x) { diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h index d123f39871d..3c5e59e6e01 100644 --- a/src/vppinfra/cpu.h +++ b/src/vppinfra/cpu.h @@ -168,6 +168,8 @@ _ (sve, 22) u32 clib_get_current_cpu_id (); u32 clib_get_current_numa_node (); +typedef int (*clib_cpu_supports_func_t) (); + #if defined(__x86_64__) #include "cpuid.h" @@ -183,8 +185,6 @@ clib_get_cpuid (const u32 lev, u32 * eax, u32 * ebx, u32 * ecx, u32 * edx) return 1; } -typedef int (*clib_cpu_supports_func_t) (); - #define _(flag, func, reg, bit) \ static inline int \ clib_cpu_supports_ ## flag() \ |