summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/plugins/perfmon/CMakeLists.txt51
-rw-r--r--src/plugins/perfmon/arm/dispatch_wrapper.c142
-rw-r--r--src/plugins/perfmon/arm/dispatch_wrapper.h18
-rw-r--r--src/plugins/perfmon/arm/events.c227
-rw-r--r--src/plugins/perfmon/arm/events.h130
-rw-r--r--src/plugins/perfmon/cli.c73
-rw-r--r--src/plugins/perfmon/intel/core.c1
-rw-r--r--src/plugins/perfmon/intel/uncore.c1
-rw-r--r--src/plugins/perfmon/linux.c7
-rw-r--r--src/plugins/perfmon/perfmon.c27
-rw-r--r--src/plugins/perfmon/perfmon.h12
-rw-r--r--src/vppinfra/bitops.h3
-rw-r--r--src/vppinfra/cpu.h4
13 files changed, 659 insertions, 37 deletions
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt
index 59eddf01145..d7d4f372da1 100644
--- a/src/plugins/perfmon/CMakeLists.txt
+++ b/src/plugins/perfmon/CMakeLists.txt
@@ -11,30 +11,39 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*|aarch64.*")
return()
endif()
-list(APPEND ARCH_PMU_SOURCES
- intel/dispatch_wrapper.c
- intel/core.c
- intel/uncore.c
- intel/bundle/backend_bound_core.c
- intel/bundle/backend_bound_mem.c
- intel/bundle/branch_mispred.c
- intel/bundle/cache_hit_miss.c
- intel/bundle/frontend_bound_bw_src.c
- intel/bundle/frontend_bound_bw_uops.c
- intel/bundle/frontend_bound_lat.c
- intel/bundle/iio_bw.c
- intel/bundle/inst_and_clock.c
- intel/bundle/load_blocks.c
- intel/bundle/mem_bw.c
- intel/bundle/power_license.c
- intel/bundle/topdown_icelake.c
- intel/bundle/topdown_metrics.c
- intel/bundle/topdown_tremont.c
-)
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+ list(APPEND ARCH_PMU_SOURCES
+ intel/dispatch_wrapper.c
+ intel/core.c
+ intel/uncore.c
+ intel/bundle/backend_bound_core.c
+ intel/bundle/backend_bound_mem.c
+ intel/bundle/branch_mispred.c
+ intel/bundle/cache_hit_miss.c
+ intel/bundle/frontend_bound_bw_src.c
+ intel/bundle/frontend_bound_bw_uops.c
+ intel/bundle/frontend_bound_lat.c
+ intel/bundle/iio_bw.c
+ intel/bundle/inst_and_clock.c
+ intel/bundle/load_blocks.c
+ intel/bundle/mem_bw.c
+ intel/bundle/power_license.c
+ intel/bundle/topdown_icelake.c
+ intel/bundle/topdown_metrics.c
+ intel/bundle/topdown_tremont.c
+ )
+endif()
+
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64.*")
+ list(APPEND ARCH_PMU_SOURCES
+ arm/dispatch_wrapper.c
+ arm/events.c
+ )
+endif()
add_vpp_plugin(perfmon
SOURCES
diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.c b/src/plugins/perfmon/arm/dispatch_wrapper.c
new file mode 100644
index 00000000000..df79bcd8631
--- /dev/null
+++ b/src/plugins/perfmon/arm/dispatch_wrapper.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+#include <sys/ioctl.h>
+
+#include <perfmon/perfmon.h>
+
+#define barrier() asm volatile("dmb ish" : : : "memory");
+
+typedef int64_t s64;
+
+static_always_inline u64
+get_pmc_register (u32 pmc_idx)
+{
+ u64 value = 0;
+ if (pmc_idx == 31)
+ /* i.e. CPU Cycle event code 0x11 - need to read via pmccntr_el0 */
+ asm volatile("mrs %x0, pmccntr_el0" : "=r"(value));
+ else
+ {
+ /* set event register 0x0-0x1F */
+ asm volatile("msr pmselr_el0, %x0" : : "r"((pmc_idx)));
+ /* get register value */
+ asm volatile("mrs %x0, pmxevcntr_el0" : "=r"(value));
+ }
+ asm volatile("isb" : : : "memory");
+ return value;
+}
+
+static_always_inline u64
+read_pmc_from_mmap (struct perf_event_mmap_page *pc)
+{
+ u32 seq, idx, width;
+ u64 offset = 0;
+ s64 pmc = 0;
+
+ do
+ {
+ seq = pc->lock;
+ barrier ();
+ idx = pc->index;
+ offset = pc->offset;
+ if (pc->cap_user_rdpmc && idx)
+ {
+ width = pc->pmc_width;
+ pmc = get_pmc_register (idx - 1);
+ /* for 32 bit regs, left shift 32b to zero/discard the top bits */
+ pmc <<= 64 - width;
+ pmc >>= 64 - width;
+ }
+ barrier ();
+ }
+ while (pc->lock != seq);
+
+ return pmc + offset;
+}
+
+static_always_inline void
+perfmon_read_pmcs (u64 *counters, perfmon_thread_runtime_t *rt, u8 n_counters)
+{
+ switch (n_counters)
+ {
+ default:
+ case 7:
+ counters[6] = read_pmc_from_mmap (rt->mmap_pages[6]);
+ case 6:
+ counters[5] = read_pmc_from_mmap (rt->mmap_pages[5]);
+ case 5:
+ counters[4] = read_pmc_from_mmap (rt->mmap_pages[4]);
+ case 4:
+ counters[3] = read_pmc_from_mmap (rt->mmap_pages[3]);
+ case 3:
+ counters[2] = read_pmc_from_mmap (rt->mmap_pages[2]);
+ case 2:
+ counters[1] = read_pmc_from_mmap (rt->mmap_pages[1]);
+ case 1:
+ counters[0] = read_pmc_from_mmap (rt->mmap_pages[0]);
+ break;
+ }
+}
+
+uword
+perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ perfmon_thread_runtime_t *rt =
+ vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
+ perfmon_node_stats_t *s =
+ vec_elt_at_index (rt->node_stats, node->node_index);
+ u8 n_events = rt->n_events;
+ u64 before[n_events];
+ u64 after[n_events];
+
+ uword rv;
+
+ clib_prefetch_load (s);
+
+ perfmon_read_pmcs (before, rt, n_events);
+ rv = node->function (vm, node, frame);
+ perfmon_read_pmcs (after, rt, n_events);
+
+ if (rv == 0)
+ return rv;
+
+ s->n_calls += 1;
+ s->n_packets += rv;
+
+ for (int i = 0; i < n_events; i++)
+ {
+ s->value[i] += after[i] - before[i];
+ }
+
+ return rv;
+}
+
+clib_error_t *
+arm_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper)
+{
+ (*dispatch_wrapper) = perfmon_dispatch_wrapper;
+ return 0;
+}
diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.h b/src/plugins/perfmon/arm/dispatch_wrapper.h
new file mode 100644
index 00000000000..903971f8b5e
--- /dev/null
+++ b/src/plugins/perfmon/arm/dispatch_wrapper.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+clib_error_t *
+arm_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper);
diff --git a/src/plugins/perfmon/arm/events.c b/src/plugins/perfmon/arm/events.c
new file mode 100644
index 00000000000..bf73ad6e896
--- /dev/null
+++ b/src/plugins/perfmon/arm/events.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+#include <perfmon/arm/dispatch_wrapper.h>
+#include <linux/perf_event.h>
+#include <dirent.h>
+
+VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
+ .class_name = "perfmon",
+};
+
+#define log_debug(fmt, ...) \
+ vlib_log_debug (if_default_log.class, fmt, __VA_ARGS__)
+#define log_warn(fmt, ...) \
+ vlib_log_warn (if_default_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...) vlib_log_err (if_default_log.class, fmt, __VA_ARGS__)
+
+/*
+ * config1 = 2 : user access enabled and always 32-bit
+ * config1 = 3 : user access enabled and always 64-bit
+ *
+ * Since there is no discovery into whether 64b counters are supported
+ * or not, first attempt to request 64b counters, then fall back to
+ * 32b if perf_event_open returns EOPNOTSUPP
+ */
+static perfmon_event_t events[] = {
+#define _(event, n, desc) \
+ [ARMV8_PMUV3_##n] = { \
+ .type = PERF_TYPE_RAW, \
+ .config = event, \
+ .config1 = 3, \
+ .name = #n, \
+ .description = desc, \
+ .exclude_kernel = 1, \
+ },
+ foreach_perf_arm_event
+#undef _
+};
+
+u8 *
+format_arm_config (u8 *s, va_list *args)
+{
+ u64 config = va_arg (*args, u64);
+
+ s = format (s, "event=0x%02x", config & 0xff);
+
+ return s;
+}
+
+static clib_error_t *
+arm_init (vlib_main_t *vm, perfmon_source_t *src)
+{
+ clib_error_t *err;
+
+ /*
+ check /proc/sys/kernel/perf_user_access flag to check if userspace
+ access to perf counters is enabled (disabled by default)
+ - if this file doesn't exist, we are on an unsupported kernel ver
+ - if the file exists and is 0, user access needs to be granted
+ with 'sudo sysctl kernel/perf_user_access=1'
+ */
+ u8 perf_user_access_enabled;
+ char *path = "/proc/sys/kernel/perf_user_access";
+ err = clib_sysfs_read (path, "%u", &perf_user_access_enabled);
+ if (err)
+ {
+ if (err->code == ENOENT) /* No such file or directory */
+ {
+ return clib_error_create (
+ "linux kernel version is unsupported, please upgrade to v5.17+ "
+ "- user access to perf counters is not possible");
+ }
+ return clib_error_return_unix (0, "failed to read: %s", path);
+ }
+
+ if (perf_user_access_enabled == 1)
+ log_debug ("user access to perf counters is enabled in %s", path);
+ else
+ {
+ return clib_error_create (
+ "user access to perf counters is not enabled: run"
+ " \'sudo sysctl kernel/perf_user_access=1\'");
+ }
+
+ /*
+ perfmon/arm/events.h has up to 0xFF/256 possible PMUv3 event codes
+ supported - create a bitmap to store whether each event is
+ implemented or not
+ */
+ uword *bitmap = NULL;
+ clib_bitmap_alloc (bitmap, 256);
+
+ struct dirent *dir_entry;
+ const char *event_path =
+ "/sys/bus/event_source/devices/armv8_pmuv3_0/events";
+ DIR *event_dir = opendir (event_path);
+
+ if (event_dir == NULL)
+ {
+ err =
+ clib_error_return_unix (0, "error listing directory: %s", event_path);
+ log_err ("%U", format_clib_error, err);
+ return err;
+ }
+
+ while ((dir_entry = readdir (event_dir)) != NULL)
+ {
+ if (dir_entry->d_name[0] != '.')
+ {
+ u8 *s = NULL;
+ u8 *tmpstr = NULL;
+ unformat_input_t input;
+ u32 config;
+
+ s = format (s, "%s/%s%c", event_path, dir_entry->d_name, 0);
+ err = clib_sysfs_read ((char *) s, "%s", &tmpstr);
+ if (err)
+ {
+ log_err ("%U", format_clib_error, err);
+ continue;
+ }
+ unformat_init_vector (&input, tmpstr);
+ if (unformat (&input, "event=0x%x", &config))
+ {
+ /* it's possible to have have event codes up to 0xFFFF */
+ if (config < 0xFF) /* perfmon supports < 0xFF */
+ {
+ clib_bitmap_set (bitmap, config, 1);
+ }
+ log_debug ("found supported event in sysfs: %s \'%s\' 0x%x",
+ dir_entry->d_name, tmpstr, config);
+ }
+ else
+ {
+ err = clib_error_create ("error parsing event: %s %s",
+ dir_entry->d_name, tmpstr);
+ log_err ("%U", format_clib_error, err);
+ continue;
+ }
+ }
+ }
+ closedir (event_dir);
+
+ for (int i = 0; i < ARRAY_LEN (events); i++)
+ {
+ if (clib_bitmap_get (bitmap, events[i].config))
+ events[i].implemented = 1;
+ }
+ clib_bitmap_free (bitmap);
+
+ return 0;
+}
+
+u8
+arm_bundle_supported (perfmon_bundle_t *b)
+{
+ clib_bitmap_alloc (b->event_disabled, b->n_events);
+ for (u32 i = 0; i < b->n_events; i++)
+ {
+ perfmon_event_t *e = b->src->events + b->events[i];
+ if (!e->implemented)
+ {
+ log_debug (
+ "bundle \'%s\': perf event %s is not implemented on this CPU",
+ b->name, e->name);
+ clib_bitmap_set (b->event_disabled, i, 1);
+ }
+ }
+
+ /* if no events are implemented, fail and do not register bundle */
+ if (clib_bitmap_count_set_bits (b->event_disabled) == b->n_events)
+ {
+ return 0;
+ }
+
+ /* disable columns that use unimplemented events */
+ clib_bitmap_alloc (b->column_disabled, b->n_columns);
+ if (b->column_events)
+ {
+ u32 disabled_event;
+ /* iterate through set bits */
+ clib_bitmap_foreach (disabled_event, b->event_disabled)
+ {
+ for (u32 j = 0; j < b->n_columns; j++)
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ continue;
+ if (GET_BIT (b->column_events[j], disabled_event))
+ {
+ clib_bitmap_set (b->column_disabled, j, 1);
+ log_debug (
+ "bundle \'%s\': disabling column %d as event unsupported",
+ b->name, j);
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+PERFMON_REGISTER_SOURCE (arm) = {
+ .name = "arm",
+ .description = "Arm PMU events",
+ .events = events,
+ .n_events = ARRAY_LEN (events),
+ .init_fn = arm_init,
+ .format_config = format_arm_config,
+ .bundle_support = arm_bundle_supported,
+ .config_dispatch_wrapper = arm_config_dispatch_wrapper,
+};
diff --git a/src/plugins/perfmon/arm/events.h b/src/plugins/perfmon/arm/events.h
new file mode 100644
index 00000000000..5b7c49801d0
--- /dev/null
+++ b/src/plugins/perfmon/arm/events.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __perfmon_arm_h
+#define __perfmon_arm_h
+
+/*
+ * Events from the Armv8 PMUv3 - See "Arm Architecture Reference Manual Armv8,
+ * for Armv8-A architecture profile" D7.10 PMU events and event numbers:
+ * https://developer.arm.com/documentation/ddi0487/latest/
+ * EventCode, name, description
+ */
+#define foreach_perf_arm_event \
+ _ (0x0D, BR_IMMED_RETIRED, "Immediate branch architecturally executed") \
+ _ (0x10, BR_MIS_PRED, \
+ "Mispredicted or not predicted branch Speculatively executed") \
+ _ (0x22, BR_MIS_PRED_RETIRED, \
+ "Instruction architecturally executed, mispredicted branch") \
+ _ (0x12, BR_PRED, "Predictable branch Speculatively executed") \
+ _ (0x21, BR_RETIRED, "Branch instruction architecturally executed") \
+ _ (0x0E, BR_RETURN_RETIRED, \
+ "Function return instruction architecturally executed and the " \
+ "condition code check pass") \
+ _ (0x19, BUS_ACCESS, "Attributable Bus access") \
+ _ (0x1D, BUS_CYCLES, "Bus cycle") \
+ _ (0x1E, CHAIN, \
+ "For an odd numbered counter, increment when an overflow occurs on" \
+ "the preceding even-numbered counter on the same PE") \
+ _ (0x0B, CID_WRITE_RETIRED, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "write to CONTEXTIDR") \
+ _ (0x11, CPU_CYCLES, "Cycle counter") \
+ _ (0x34, DTLB_WALK, \
+ "Access to data or unified TLB causes a translation table walk") \
+ _ (0x0A, EXC_RETURN, \
+ "Exception return instruction architecturally executed and the " \
+ "condition code check pass") \
+ _ (0x09, EXC_TAKEN, "Exception entry") \
+ _ (0x08, INST_RETIRED, "Instruction architecturally executed") \
+ _ (0x1B, INST_SPEC, "Operation Speculatively executed") \
+ _ (0x35, ITLB_WALK, \
+ "Access to instruction TLB that causes a translation table walk") \
+ _ (0x04, L1D_CACHE, "Level 1 data cache access") \
+ _ (0x1F, L1D_CACHE_ALLOCATE, \
+ "Level 1 data cache allocation without refill") \
+ _ (0x39, L1D_CACHE_LMISS_RD, "Level 1 data cache long-latency read miss") \
+ _ (0x03, L1D_CACHE_REFILL, "Level 1 data cache refill") \
+ _ (0x15, L1D_CACHE_WB, "Attributable Level 1 data cache write-back") \
+ _ (0x25, L1D_TLB, "Level 1 data or unified TLB access") \
+ _ (0x05, L1D_TLB_REFILL, "Level 1 data or unified TLB refill") \
+ _ (0x14, L1I_CACHE, "Level 1 instruction cache access") \
+ _ (0x01, L1I_CACHE_REFILL, "Level 1 instruction cache refill") \
+ _ (0x26, L1I_TLB, "Level 1 instruction TLB access") \
+ _ (0x02, L1I_TLB_REFILL, "Level 1 instruction TLB refill") \
+ _ (0x16, L2D_CACHE, "Level 2 data cache access") \
+ _ (0x20, L2D_CACHE_ALLOCATE, \
+ "Level 2 data cache allocation without refill") \
+ _ (0x17, L2D_CACHE_REFILL, "Level 2 data cache refill") \
+ _ (0x18, L2D_CACHE_WB, "Attributable Level 2 data cache write-back") \
+ _ (0x2F, L2D_TLB, "Level 2 data or unified TLB access") \
+ _ (0x2D, L2D_TLB_REFILL, "Level 2 data or unified TLB refill") \
+ _ (0x27, L2I_CACHE, "Level 2 instruction cache access") \
+ _ (0x28, L2I_CACHE_REFILL, "Attributable Level 2 instruction cache refill") \
+ _ (0x30, L2I_TLB, "Level 2 instruction TLB access") \
+ _ (0x2E, L2I_TLB_REFILL, "Level 2 instruction TLB refill") \
+ _ (0x2B, L3D_CACHE, "Level 3 data cache access") \
+ _ (0x29, L3D_CACHE_ALLOCATE, \
+ "Level 3 data cache allocation without refill") \
+ _ (0x2A, L3D_CACHE_REFILL, "Attributable Level 3 data cache refill") \
+ _ (0x2C, L3D_CACHE_WB, "Attributable Level 3 data cache write-back") \
+ _ (0x06, LD_RETIRED, \
+ "Memory-reading instruction architecturally executed and condition" \
+ " code check pass") \
+ _ (0x32, LL_CACHE, "Last Level cache access") \
+ _ (0x33, LL_CACHE_MISS, "Last Level cache miss") \
+ _ (0x37, LL_CACHE_MISS_RD, "Last level cache miss, read") \
+ _ (0x36, LL_CACHE_RD, "Last level data cache access, read") \
+ _ (0x1A, MEMORY_ERROR, "Local memory error") \
+ _ (0x13, MEM_ACCESS, "Data memory access") \
+ _ (0x3A, OP_RETIRED, "Micro-operation architecturally executed") \
+ _ (0x3B, OP_SPEC, "Micro-operation Speculatively executed") \
+ _ (0x0C, PC_WRITE_RETIRED, \
+ "Software change to the Program Counter (PC). Instruction is " \
+ "architecturally executed and condition code check pass") \
+ _ (0x31, REMOTE_ACCESS, \
+ "Access to another socket in a multi-socket system") \
+ _ (0x38, REMOTE_ACCESS_RD, \
+ "Access to another socket in a multi-socket system, read") \
+ _ (0x3C, STALL, "No operation sent for execution") \
+ _ (0x24, STALL_BACKEND, "No operation issued due to the backend") \
+ _ (0x23, STALL_FRONTEND, "No operation issued due to the frontend") \
+ _ (0x3F, STALL_SLOT, "No operation sent for execution on a Slot") \
+ _ (0x3D, STALL_SLOT_BACKEND, \
+ "No operation sent for execution on a Slot due to the backend") \
+ _ (0x3E, STALL_SLOT_FRONTEND, \
+ "No operation sent for execution on a Slot due to the frontend") \
+ _ (0x07, ST_RETIRED, \
+ "Memory-writing instruction architecturally executed and condition" \
+ " code check pass") \
+ _ (0x00, SW_INCR, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "software increment") \
+ _ (0x1C, TTBR_WRITE_RETIRED, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "write to TTBR") \
+ _ (0x0F, UNALIGNED_LDST_RETIRED, \
+ "Unaligned memory memory-reading or memory-writing instruction " \
+ "architecturally executed and condition code check pass")
+
+typedef enum
+{
+#define _(event, n, desc) ARMV8_PMUV3_##n,
+ foreach_perf_arm_event
+#undef _
+ ARM_N_EVENTS,
+} perf_arm_event_t;
+
+#endif
diff --git a/src/plugins/perfmon/cli.c b/src/plugins/perfmon/cli.c
index aa5b5636235..0cdc4dba073 100644
--- a/src/plugins/perfmon/cli.c
+++ b/src/plugins/perfmon/cli.c
@@ -413,18 +413,77 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
vm, j, b->active_type);
table_set_cell_align (t, col, -1, TTAA_RIGHT);
table_set_cell_fg_color (t, col, -1, TTAC_CYAN);
- clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
+
+ if (PREDICT_TRUE (clib_bitmap_is_zero (b->event_disabled)))
+ clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
+ /* if some events are not implemented, we need to realign these
+ to display under the correct column headers */
+ else
+ {
+ perfmon_node_stats_t *tr_ns = tr->node_stats + j;
+ ns.n_calls = tr_ns->n_calls;
+ ns.n_packets = tr_ns->n_packets;
+ /* loop through all events in bundle + manually copy into
+ the correct place, until we've read all values that are
+ implemented */
+ int num_enabled_events =
+ b->n_events -
+ clib_bitmap_count_set_bits (b->event_disabled);
+ for (int i = 0, k = 0; k < num_enabled_events; i++)
+ {
+ if (!clib_bitmap_get (b->event_disabled, i))
+ {
+ ns.value[i] = tr_ns->value[k];
+ k++;
+ }
+ }
+ }
for (int j = 0; j < n_row; j++)
- table_format_cell (t, col, j, "%U", b->format_fn, &ns, j,
- b->active_type);
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ table_format_cell (t, col, j, "-");
+ else
+ table_format_cell (t, col, j, "%U", b->format_fn, &ns, j,
+ b->active_type);
+ }
}
}
- else
+ else /* b->type != PERFMON_BUNDLE_TYPE_NODE */
{
- for (int j = 0; j < n_row; j++)
- table_format_cell (t, i, j, "%U", b->format_fn, r, j,
- b->active_type);
+ if (PREDICT_TRUE (clib_bitmap_is_zero (b->event_disabled)))
+ {
+ for (int j = 0; j < n_row; j++)
+ table_format_cell (t, i, j, "%U", b->format_fn, r, j,
+ b->active_type);
+ }
+ /* similarly for THREAD/SYSTEM bundles, if some events are not
+ implemented, we need to realign readings under column headings */
+ else
+ {
+ perfmon_reading_t aligned_r[b->n_events];
+ aligned_r->nr = r->nr;
+ aligned_r->time_enabled = r->time_enabled;
+ aligned_r->time_running = r->time_running;
+ int num_enabled_events =
+ b->n_events - clib_bitmap_count_set_bits (b->event_disabled);
+ for (int i = 0, k = 0; k < num_enabled_events; i++)
+ {
+ if (!clib_bitmap_get (b->event_disabled, i))
+ {
+ aligned_r->value[i] = r->value[k];
+ k++;
+ }
+ }
+ for (int j = 0; j < n_row; j++)
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ table_format_cell (t, col, j, "-");
+ else
+ table_format_cell (t, i, j, "%U", b->format_fn, aligned_r,
+ j, b->active_type);
+ }
+ }
}
col++;
}
diff --git a/src/plugins/perfmon/intel/core.c b/src/plugins/perfmon/intel/core.c
index 5c4c336e2e8..d6a16b2125e 100644
--- a/src/plugins/perfmon/intel/core.c
+++ b/src/plugins/perfmon/intel/core.c
@@ -26,6 +26,7 @@ static perfmon_event_t events[] = {
event, umask, edge, any, inv, cmask), \
.name = #n "." #suffix, \
.description = desc, \
+ .implemented = 1, \
.exclude_kernel = 1 },
foreach_perf_intel_core_event foreach_perf_intel_peusdo_event
diff --git a/src/plugins/perfmon/intel/uncore.c b/src/plugins/perfmon/intel/uncore.c
index ac5580a3e62..a709a8d40be 100644
--- a/src/plugins/perfmon/intel/uncore.c
+++ b/src/plugins/perfmon/intel/uncore.c
@@ -49,6 +49,7 @@ static perfmon_event_t intel_uncore_events[] = {
.description = desc, \
.type_from_instance = 1, \
.instance_type = INTEL_UNCORE_UNIT_##unit, \
+ .implemented = 1, \
},
foreach_intel_uncore_event
diff --git a/src/plugins/perfmon/linux.c b/src/plugins/perfmon/linux.c
index 3715267266a..ef21f2d72fd 100644
--- a/src/plugins/perfmon/linux.c
+++ b/src/plugins/perfmon/linux.c
@@ -39,7 +39,12 @@ typedef enum
static perfmon_event_t events[] = {
#define _(n, s) \
- [n] = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##n, .name = s },
+ [n] = { \
+ .type = PERF_TYPE_SOFTWARE, \
+ .config = PERF_COUNT_SW_##n, \
+ .name = s, \
+ .implemented = 1, \
+ },
foreach_perf_sw_counter
#undef _
};
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
index 0643384957e..e618f9b314a 100644
--- a/src/plugins/perfmon/perfmon.c
+++ b/src/plugins/perfmon/perfmon.c
@@ -141,15 +141,19 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
vec_validate (pm->group_fds, i);
pm->group_fds[i] = -1;
+ u8 n_events_opened = 0;
for (int j = 0; j < b->n_events; j++)
{
int fd;
perfmon_event_t *e = s->events + b->events[j];
+ if (!e->implemented)
+ continue;
struct perf_event_attr pe = {
.size = sizeof (struct perf_event_attr),
.type = e->type_from_instance ? in->type : e->type,
.config = e->config,
+ .config1 = e->config1,
.exclude_kernel = e->exclude_kernel,
.read_format =
(PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -157,6 +161,7 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
.disabled = 1,
};
+ perf_event_open:
log_debug ("perf_event_open pe.type=%u pe.config=0x%x pid=%d "
"cpu=%d group_fd=%d",
pe.type, pe.config, in->pid, in->cpu, pm->group_fds[i]);
@@ -165,8 +170,17 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
if (fd == -1)
{
- err = clib_error_return_unix (0, "perf_event_open");
- goto error;
+ if (errno ==
+ EOPNOTSUPP) /* 64b counters not supported on aarch64 */
+ {
+ pe.config1 = 2; /* retry with 32b counter width */
+ goto perf_event_open;
+ }
+ else
+ {
+ err = clib_error_return_unix (0, "perf_event_open");
+ goto error;
+ }
}
vec_add1 (pm->fds_to_close, fd);
@@ -178,23 +192,24 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
{
perfmon_thread_runtime_t *tr;
tr = vec_elt_at_index (pm->thread_runtimes, i);
- tr->mmap_pages[j] =
+ tr->mmap_pages[n_events_opened] =
mmap (0, page_size, PROT_READ, MAP_SHARED, fd, 0);
- if (tr->mmap_pages[j] == MAP_FAILED)
+ if (tr->mmap_pages[n_events_opened] == MAP_FAILED)
{
err = clib_error_return_unix (0, "mmap");
goto error;
}
}
+ n_events_opened++;
}
- if (is_node)
+ if (is_node && n_events_opened)
{
perfmon_thread_runtime_t *rt;
rt = vec_elt_at_index (pm->thread_runtimes, i);
rt->bundle = b;
- rt->n_events = b->n_events;
+ rt->n_events = n_events_opened;
rt->n_nodes = n_nodes;
rt->preserve_samples = b->preserve_samples;
vec_validate_aligned (rt->node_stats, n_nodes - 1,
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
index f08cc7a5dcb..b76cf4b2138 100644
--- a/src/plugins/perfmon/perfmon.h
+++ b/src/plugins/perfmon/perfmon.h
@@ -25,6 +25,8 @@
#if defined(__x86_64__)
#define PERF_MAX_EVENTS 12 /* 4 fixed and 8 programable on ICX */
+#elif defined(__aarch64__)
+#define PERF_MAX_EVENTS 7 /* 6 events + 1 CPU cycle counter */
#endif
typedef enum
@@ -63,6 +65,8 @@ typedef struct
{
u32 type_from_instance : 1;
u32 exclude_kernel : 1;
+ u32 config1 : 2;
+ u32 implemented : 1;
union
{
u32 type;
@@ -137,6 +141,11 @@ typedef struct perfmon_bundle
u32 events[PERF_MAX_EVENTS];
u32 n_events;
+ u32 n_columns;
+
+ uword *event_disabled;
+ uword *column_disabled;
+ u8 *column_events;
u16 preserve_samples;
@@ -269,4 +278,7 @@ clib_error_t *perfmon_stop (vlib_main_t *vm);
#define PERFMON_STRINGS(...) \
(char *[]) { __VA_ARGS__, 0 }
+#define PERFMON_COLUMN_EVENTS(...) \
+ (u8[]) { __VA_ARGS__ }
+
#endif
diff --git a/src/vppinfra/bitops.h b/src/vppinfra/bitops.h
index 15454ca5036..7a4be3ce4c3 100644
--- a/src/vppinfra/bitops.h
+++ b/src/vppinfra/bitops.h
@@ -38,6 +38,9 @@
#ifndef included_clib_bitops_h
#define included_clib_bitops_h
+#define SET_BIT(i) (1 << i)
+#define GET_BIT(n, i) (n >> i) & 1U
+
static_always_inline uword
clear_lowest_set_bit (uword x)
{
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h
index d123f39871d..3c5e59e6e01 100644
--- a/src/vppinfra/cpu.h
+++ b/src/vppinfra/cpu.h
@@ -168,6 +168,8 @@ _ (sve, 22)
u32 clib_get_current_cpu_id ();
u32 clib_get_current_numa_node ();
+typedef int (*clib_cpu_supports_func_t) ();
+
#if defined(__x86_64__)
#include "cpuid.h"
@@ -183,8 +185,6 @@ clib_get_cpuid (const u32 lev, u32 * eax, u32 * ebx, u32 * ecx, u32 * edx)
return 1;
}
-typedef int (*clib_cpu_supports_func_t) ();
-
#define _(flag, func, reg, bit) \
static inline int \
clib_cpu_supports_ ## flag() \