aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/perfmon/intel/bundle
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/perfmon/intel/bundle')
-rw-r--r--src/plugins/perfmon/intel/bundle/backend_bound_core.c100
-rw-r--r--src/plugins/perfmon/intel/bundle/backend_bound_mem.c102
-rw-r--r--src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c90
-rw-r--r--src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c89
-rw-r--r--src/plugins/perfmon/intel/bundle/frontend_bound_lat.c99
-rw-r--r--src/plugins/perfmon/intel/bundle/iio_bw.c263
-rw-r--r--src/plugins/perfmon/intel/bundle/membw_bound.c59
-rw-r--r--src/plugins/perfmon/intel/bundle/topdown_icelake.c176
-rw-r--r--src/plugins/perfmon/intel/bundle/topdown_metrics.c172
-rw-r--r--src/plugins/perfmon/intel/bundle/topdown_tremont.c85
10 files changed, 1130 insertions, 105 deletions
diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_core.c b/src/plugins/perfmon/intel/bundle/backend_bound_core.c
new file mode 100644
index 00000000000..16905235119
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/backend_bound_core.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ PORT0 = 0,
+ PORT1 = 1,
+ PORT5 = 2,
+ PORT6 = 3,
+ PORT2_3 = 4,
+ PORT4_9 = 5,
+ PORT7_8 = 6,
+ DISTRIBUTED = 7,
+};
+
+static u8 *
+format_intel_backend_bound_core (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+
+ if (!ss->n_packets)
+ return s;
+
+ if (0 == row)
+ {
+ sv = ss->value[DISTRIBUTED] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[PORT0] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 2:
+ sv = ss->value[PORT1] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 3:
+ sv = ss->value[PORT5] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 4:
+ sv = ss->value[PORT6] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 5:
+ sv = (ss->value[PORT2_3]) / (f64) (2 * ss->value[DISTRIBUTED]);
+ break;
+ case 6:
+ sv = (ss->value[PORT4_9] + ss->value[PORT7_8]) /
+ (f64) (4 * ss->value[DISTRIBUTED]);
+ break;
+ }
+
+ sv = clib_max (sv * 100, 0);
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t backend_bound_core_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_core) = {
+ .name = "td-backend-core",
+ .description = "Topdown BackEnd-bound Core - % cycles core resources busy",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_0, /* 0xFF */
+ .events[1] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_1, /* 0xFF */
+ .events[2] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_5, /* 0xFF */
+ .events[3] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_6, /* 0xFF */
+ .events[4] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_2_3, /* 0xFF */
+ .events[5] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_4_9, /* 0xFF */
+ .events[6] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_7_8, /* 0xFF */
+ .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_DISTRIBUTED, /* 0xFF */
+ .n_events = 8,
+ .format_fn = format_intel_backend_bound_core,
+ .cpu_supports = backend_bound_core_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (backend_bound_core_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Port0", "%Port1",
+ "%Port5", "%Port6", "%Load", "%Store"),
+};
diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_mem.c b/src/plugins/perfmon/intel/bundle/backend_bound_mem.c
new file mode 100644
index 00000000000..ccf1ed12153
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/backend_bound_mem.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ STALLS_L1D_MISS = 0,
+ STALLS_L2_MISS = 1,
+ STALLS_L3_MISS = 2,
+ STALLS_MEM_ANY = 3,
+ STALLS_TOTAL = 4,
+ BOUND_ON_STORES = 5,
+ FB_FULL = 6,
+ THREAD = 7,
+};
+
+static u8 *
+format_intel_backend_bound_mem (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+
+ if (!ss->n_packets)
+ return s;
+
+ if (0 == row)
+ {
+ sv = ss->value[THREAD] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[BOUND_ON_STORES];
+ break;
+ case 2:
+ sv = ss->value[STALLS_MEM_ANY] - ss->value[STALLS_L1D_MISS];
+ break;
+ case 3:
+ sv = ss->value[FB_FULL];
+ break;
+ case 4:
+ sv = ss->value[STALLS_L1D_MISS] - ss->value[STALLS_L2_MISS];
+ break;
+ case 5:
+ sv = ss->value[STALLS_L2_MISS] - ss->value[STALLS_L3_MISS];
+ break;
+ case 6:
+ sv = ss->value[STALLS_L3_MISS];
+ break;
+ }
+
+ sv = clib_max ((sv / ss->value[THREAD]) * 100, 0);
+
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t backend_bound_mem_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_mem) = {
+ .name = "td-backend-mem",
+ .description = "Topdown BackEnd-bound Memory - % cycles not retiring "
+ "instructions due to memory stalls",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L1D_MISS, /* 0x0F */
+ .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L2_MISS, /* 0x0F */
+ .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L3_MISS, /* 0x0F */
+ .events[3] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, /* 0xFF */
+ .events[4] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL, /* 0xFF */
+ .events[5] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES, /* 0xFF */
+ .events[6] = INTEL_CORE_E_L1D_PEND_MISS_FB_FULL, /* 0x0F */
+ .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* 0xFF */
+ .n_events = 8,
+ .format_fn = format_intel_backend_bound_mem,
+ .cpu_supports = backend_bound_mem_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (backend_bound_mem_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Store Bound",
+ "%L1 Bound", "%FB Full", "%L2 Bound",
+ "%L3 Bound", "%DRAM Bound"),
+};
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c
new file mode 100644
index 00000000000..3db4ca9c0f3
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ DSB_UOPS,
+ MS_UOPS,
+ MITE_UOPS,
+ LSD_UOPS,
+};
+
+static u8 *
+format_intel_frontend_bound_bw_src (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+ f64 uops = ss->value[DSB_UOPS] + ss->value[MS_UOPS] + ss->value[MITE_UOPS] +
+ ss->value[LSD_UOPS];
+
+ if (!ss->n_packets)
+ return s;
+
+ if (row == 0)
+ {
+ sv = uops / ss->n_packets;
+ s = format (s, "%.0f", sv);
+
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = (ss->value[DSB_UOPS] / uops) * 100;
+ break;
+ case 2:
+ sv = (ss->value[MS_UOPS] / uops) * 100;
+ break;
+ case 3:
+ sv = (ss->value[MITE_UOPS] / uops) * 100;
+ break;
+ case 4:
+ sv = (ss->value[LSD_UOPS] / uops) * 100;
+ break;
+ }
+
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_src[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_src) = {
+ .name = "td-frontend-bw-src",
+ .description =
+ "Topdown FrontEnd-bound BandWidth - % uops from each uop fetch source",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_IDQ_DSB_UOPS, /* 0x0F */
+ .events[1] = INTEL_CORE_E_IDQ_MS_UOPS, /* 0x0F */
+ .events[2] = INTEL_CORE_E_IDQ_MITE_UOPS, /* 0x0F */
+ .events[3] = INTEL_CORE_E_LSD_UOPS, /* 0x0F */
+ .n_events = 4,
+ .format_fn = format_intel_frontend_bound_bw_src,
+ .cpu_supports = frontend_bound_bw_cpu_supports_src,
+ .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_src),
+ .column_headers = PERFMON_STRINGS ("UOPs/PKT", "% DSB UOPS", "% MS UOPS",
+ "% MITE UOPS", "% LSD UOPS"),
+ .footer =
+ "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+ "Optimization Reference Manual section on the Front End.",
+};
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c
new file mode 100644
index 00000000000..6bf08af8154
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ THREAD_P,
+ THREE_UOP,
+ TWO_UOP,
+ ONE_UOP,
+ NO_UOP,
+ FOUR_UOP,
+};
+
+static u8 *
+format_intel_frontend_bound_bw_uops (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+ f64 cycles = ss->value[THREAD_P];
+
+ switch (row)
+ {
+ case 0:
+ sv = (ss->value[FOUR_UOP] / cycles) * 100;
+ break;
+ case 1:
+ sv = ((ss->value[THREE_UOP] - ss->value[TWO_UOP]) / cycles) * 100;
+ break;
+ case 2:
+ sv = ((ss->value[TWO_UOP] - ss->value[ONE_UOP]) / cycles) * 100;
+ break;
+ case 3:
+ sv = ((ss->value[ONE_UOP] - ss->value[NO_UOP]) / cycles) * 100;
+ break;
+ case 4:
+ sv = (ss->value[NO_UOP] / cycles) * 100;
+ break;
+ }
+
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_uops[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_uops) = {
+ .name = "td-frontend-bw-uops",
+ .description = "Topdown FrontEnd-bound BandWidth - distribution of "
+ "uops delivered to frontend",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* 0x0F */
+ .events[1] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV_CORE, /* 0xFF */
+ .events[2] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV_CORE, /* 0xFF */
+ .events[3] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV_CORE, /* 0xFF */
+ .events[4] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV_CORE, /* 0xFF */
+ .events[5] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_FE_WAS_OK, /* 0xFF */
+ .n_events = 6,
+ .format_fn = format_intel_frontend_bound_bw_uops,
+ .cpu_supports = frontend_bound_bw_cpu_supports_uops,
+ .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_uops),
+ .column_headers = PERFMON_STRINGS ("% 4 UOPS", "% 3 UOPS", "% 2 UOPS",
+ "% 1 UOPS", "% 0 UOPS"),
+ .footer =
+ "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+ "Optimization Reference Manual section on the Front End.",
+};
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c b/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c
new file mode 100644
index 00000000000..aea2149663f
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+static const int MS_Switches_Cost = 3;
+static const int BA_Clear_Cost = 10;
+
+enum
+{
+ ICACHE_MISS,
+ DSB_SWITCHES,
+ RESTEER,
+ MS_SWITCHES,
+ BACLEARS,
+ THREAD,
+};
+
+static u8 *
+format_intel_frontend_bound_lat (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+ f64 cycles = ss->value[THREAD];
+
+ if (!ss->n_packets)
+ return s;
+
+ if (!row)
+ {
+ sv = ss->value[THREAD] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[ICACHE_MISS] / cycles;
+ break;
+ case 2:
+ sv = ss->value[DSB_SWITCHES] / cycles;
+ break;
+ case 3:
+ sv =
+ (ss->value[RESTEER] + (ss->value[BACLEARS] * BA_Clear_Cost)) / cycles;
+ break;
+ case 4:
+ sv = (ss->value[MS_SWITCHES] * MS_Switches_Cost) / cycles;
+ break;
+ }
+
+ s = format (s, "%04.1f", sv * 100);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_lat_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_lat) = {
+ .name = "td-frontend-lat",
+ .description = "Topdown FrontEnd-bound Latency - % cycles not retiring uops "
+ "due to frontend latency",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_ICACHE_16B_IFDATA_STALL, /* 0x0F */
+ .events[1] = INTEL_CORE_E_DSB2MITE_SWITCHES_PENALTY_CYCLES, /* 0x0F */
+ .events[2] = INTEL_CORE_E_INT_MISC_CLEAR_RESTEER_CYCLES, /* 0xFF */
+ .events[3] = INTEL_CORE_E_IDQ_MS_SWITCHES, /* 0x0F */
+ .events[4] = INTEL_CORE_E_BACLEARS_ANY, /* 0x0F */
+ .events[5] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* FIXED */
+ .n_events = 6,
+ .format_fn = format_intel_frontend_bound_lat,
+ .cpu_supports = frontend_bound_lat_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (frontend_bound_lat_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "% iCache Miss",
+ "% DSB Switch", "% Branch Resteer",
+ "% MS Switch"),
+ .footer =
+ "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+ "Optimization Reference Manual on the Front End.",
+};
diff --git a/src/plugins/perfmon/intel/bundle/iio_bw.c b/src/plugins/perfmon/intel/bundle/iio_bw.c
new file mode 100644
index 00000000000..f746ca2c906
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/iio_bw.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/uncore.h>
+#include <vlib/pci/pci.h>
+#include <vppinfra/format.h>
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <math.h>
+
+typedef struct
+{
+ u8 socket_id;
+ u8 sad_id;
+ u8 iio_unit_id;
+} iio_uncore_sad_t;
+typedef u32 index_t;
+
+static const char *procfs_pci_path = "/proc/bus/pci";
+
+#define PCM_INTEL_PCI_VENDOR_ID 0x8086
+#define SNR_ICX_SAD_CONTROL_CFG_OFFSET 0x3F4
+#define SNR_ICX_MESH2IIO_MMAP_DID 0x09A2
+
+static const u8 icx_sad_to_pmu_id_mapping[] = { 5, 0, 1, 2, 3, 4 };
+
+static const char *iio_bw_footer_message =
+ "* this bundle currently only measures x8 and x16 PCIe devices on Port #0\n"
+ "or Port #2. Please see the \"Intel® Xeon® Processor Scalable Memory\n"
+ "Family Uncore Performance Monitoring Reference Manual(336274)\"\n"
+ "Section 2.4 for more information.";
+
+static u32
+get_sad_ctrl_cfg (vlib_pci_addr_t *addr)
+{
+ int fd = 0;
+ u32 value;
+ u8 *dev_node_name = format (0, "%s/%02x/%02x.%x", procfs_pci_path, addr->bus,
+ addr->slot, addr->function);
+
+ fd = open ((char *) dev_node_name, O_RDWR);
+ if (fd < 0)
+ return -1;
+
+ if (pread (fd, &value, sizeof (u32), SNR_ICX_SAD_CONTROL_CFG_OFFSET) <
+ sizeof (u32))
+ value = -1;
+
+ close (fd);
+
+ return value;
+}
+
+static u64
+get_bus_to_sad_mappings (vlib_main_t *vm, index_t **ph, iio_uncore_sad_t **pp)
+{
+ index_t *h = 0;
+ iio_uncore_sad_t *p = 0, *e = 0;
+ vlib_pci_addr_t *addr = 0, *addrs;
+
+ addrs = vlib_pci_get_all_dev_addrs ();
+
+ vec_foreach (addr, addrs)
+ {
+ vlib_pci_device_info_t *d;
+ d = vlib_pci_get_device_info (vm, addr, 0);
+
+ if (!d)
+ continue;
+
+ if (d->vendor_id == PCM_INTEL_PCI_VENDOR_ID &&
+ d->device_id == SNR_ICX_MESH2IIO_MMAP_DID)
+ {
+
+ u32 sad_ctrl_cfg = get_sad_ctrl_cfg (addr);
+ if (sad_ctrl_cfg == 0xFFFFFFFF)
+ {
+ vlib_pci_free_device_info (d);
+ continue;
+ }
+
+ pool_get_zero (p, e);
+
+ e->socket_id = (sad_ctrl_cfg & 0xf);
+ e->sad_id = (sad_ctrl_cfg >> 4) & 0x7;
+ e->iio_unit_id = icx_sad_to_pmu_id_mapping[e->sad_id];
+
+ hash_set (h, addr->bus, e - p);
+ }
+
+ vlib_pci_free_device_info (d);
+ }
+
+ vec_free (addrs);
+
+ *ph = h;
+ *pp = p;
+
+ return 0;
+}
+
+u8 *
+format_stack_socket (u8 *s, va_list *va)
+{
+ iio_uncore_sad_t *e, *p = va_arg (*va, iio_uncore_sad_t *);
+ index_t *h = va_arg (*va, index_t *);
+ vlib_pci_addr_t root_bus, *addr = va_arg (*va, vlib_pci_addr_t *);
+ clib_error_t *err = vlib_pci_get_device_root_bus (addr, &root_bus);
+ if (err)
+ {
+ clib_error_free (err);
+ return s;
+ }
+
+ uword *pu = hash_get (h, root_bus.bus);
+ if (pu)
+ {
+ e = pool_elt_at_index (p, (index_t) pu[0]);
+
+ s = format (s, "IIO%u/%u", e->socket_id, e->iio_unit_id);
+ }
+ else
+ {
+ s = format (s, "[ERR: hash lookup for bus '%u' failed]", root_bus.bus);
+ }
+ return s;
+}
+
+static clib_error_t *
+init_intel_uncore_iio_bw (vlib_main_t *vm, struct perfmon_bundle *b)
+{
+ index_t *h = 0;
+ iio_uncore_sad_t *p = 0;
+ vlib_pci_addr_t *addr = 0, *addrs;
+ u8 *s = 0;
+
+ get_bus_to_sad_mappings (vm, &h, &p);
+
+ s = format (0, "%-10s%-5s%-13s%-12s%-14s%-16s%s\n", "Stack", "Port",
+ "Address", "VID:PID", "Link Speed", "Driver", "Product Name");
+
+ addrs = vlib_pci_get_all_dev_addrs ();
+
+ vec_foreach (addr, addrs)
+ {
+ vlib_pci_device_info_t *d;
+ d = vlib_pci_get_device_info (vm, addr, 0);
+
+ if (!d)
+ continue;
+
+ if (d->device_class != PCI_CLASS_NETWORK_ETHERNET)
+ continue;
+
+ s = format (
+ s, "%-10U%-5U%-13U%04x:%04x %-14U%-16s%v\n", format_stack_socket, p,
+ h, addr, format_vlib_pci_link_port, &d->config, format_vlib_pci_addr,
+ addr, d->vendor_id, d->device_id, format_vlib_pci_link_speed, d,
+ d->driver_name ? (char *) d->driver_name : "", d->product_name);
+
+ vlib_pci_free_device_info (d);
+ }
+
+ b->footer = (char *) format (s, "\n%s", iio_bw_footer_message);
+
+ vec_free (addrs);
+ pool_free (p);
+ hash_free (h);
+
+ return 0;
+}
+
+static u8 *
+format_intel_uncore_iio_bw (u8 *s, va_list *args)
+{
+ perfmon_reading_t *r = va_arg (*args, perfmon_reading_t *);
+ int col = va_arg (*args, int);
+ f64 tr = r->time_running * 1e-9;
+ f64 value = 0;
+
+ switch (col)
+ {
+ case 0:
+ s = format (s, "%9.2f", tr);
+ break;
+ default:
+ if (r->time_running)
+ {
+ value = r->value[col - 1] * 4 / tr;
+
+ if (value > 1.0e6)
+ s = format (s, "%9.0fM", value * 1e-6);
+ else if (value > 1.0e3)
+ s = format (s, "%9.0fK", value * 1e-3);
+ else
+ s = format (s, "%9.0f ", value);
+ }
+
+ break;
+ }
+
+ return s;
+}
+
+/*
+ * This bundle is currently only supported and tested on Intel Icelake.
+ */
+static int
+is_icelake ()
+{
+ return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b ();
+}
+
+static perfmon_cpu_supports_t iio_bw_cpu_supports[] = {
+ { is_icelake, PERFMON_BUNDLE_TYPE_SYSTEM }
+};
+
+PERFMON_REGISTER_BUNDLE (intel_uncore_iio_bw_pci) = {
+ .name = "iio-bandwidth-pci",
+ .description = "pci iio memory reads and writes per iio stack *",
+ .source = "intel-uncore",
+ .events[0] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_OF_CPU_PART0_RD,
+ .events[1] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_WR,
+ .events[2] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_RD,
+ .events[3] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_WR,
+ .n_events = 4,
+ .cpu_supports = iio_bw_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (iio_bw_cpu_supports),
+ .format_fn = format_intel_uncore_iio_bw,
+ .init_fn = init_intel_uncore_iio_bw,
+ .column_headers = PERFMON_STRINGS ("RunTime", "PCIe Rd/P0", "PCIe Wr/P0",
+ "PCIe Rd/P2", "PCIe Wr/P2")
+};
+
+PERFMON_REGISTER_BUNDLE (intel_uncore_iio_bw_cpu) = {
+ .name = "iio-bandwidth-cpu",
+ .description = "cpu iio memory reads and writes per iio stack *",
+ .source = "intel-uncore",
+ .events[0] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_RD,
+ .events[1] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_WR,
+ .events[2] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_RD,
+ .events[3] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_WR,
+ .n_events = 4,
+ .cpu_supports = iio_bw_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (iio_bw_cpu_supports),
+ .format_fn = format_intel_uncore_iio_bw,
+ .init_fn = init_intel_uncore_iio_bw,
+ .column_headers = PERFMON_STRINGS ("RunTime", "CPU Rd/P0", "CPU Wr/P0",
+ "CPU Rd/P2", "CPU Wr/P2")
+};
diff --git a/src/plugins/perfmon/intel/bundle/membw_bound.c b/src/plugins/perfmon/intel/bundle/membw_bound.c
deleted file mode 100644
index 2e4b4aa57bb..00000000000
--- a/src/plugins/perfmon/intel/bundle/membw_bound.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <perfmon/perfmon.h>
-#include <perfmon/intel/core.h>
-
-static u8 *
-format_intel_membw_bound (u8 *s, va_list *args)
-{
- perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
- int row = va_arg (*args, int);
- f64 sv = 0;
-
- if (!ss->n_packets)
- return s;
-
- sv = ss->value[row] / ss->n_packets;
-
- s = format (s, "%5.0f", sv);
-
- return s;
-}
-
-static perfmon_cpu_supports_t membw_bound_cpu_supports[] = {
- { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
-};
-
-PERFMON_REGISTER_BUNDLE (intel_core_membw_bound) = {
- .name = "membw-bound",
- .description = "memory bandwidth boundedness",
- .source = "intel-core",
- .events[0] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* FIXED */
- .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE, /*CMask: 0xFF*/
- .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, /*CMask: 0xFF*/
- .events[3] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L1D_MISS, /*CMask: 0xF*/
- .events[4] = INTEL_CORE_E_L1D_PEND_MISS_FB_FULL, /*CMask: 0xF*/
- .events[5] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L3_MISS, /*CMask: 0xF*/
- .events[6] = INTEL_CORE_E_SQ_MISC_SQ_FULL, /*CMask: 0xF*/
- .n_events = 7,
- .format_fn = format_intel_membw_bound,
- .cpu_supports = membw_bound_cpu_supports,
- .n_cpu_supports = ARRAY_LEN (membw_bound_cpu_supports),
- .column_headers = PERFMON_STRINGS ("Cycles/Packet", "Cycles Stall/Packet",
- "Mem Stall/Packet",
- "L1D Miss Stall/Packet", "FB Full/Packet",
- "L3 Miss Stall/Packet", "SQ Full/Packet"),
-};
diff --git a/src/plugins/perfmon/intel/bundle/topdown_icelake.c b/src/plugins/perfmon/intel/bundle/topdown_icelake.c
new file mode 100644
index 00000000000..a3392e52f0a
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/topdown_icelake.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+static int
+is_icelake ()
+{
+ return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b ();
+}
+
+static perfmon_cpu_supports_t topdown_lvl2_cpu_supports_icx[] = {
+ { is_icelake, PERFMON_BUNDLE_TYPE_THREAD }
+};
+
+#define GET_METRIC(m, i) (f64) (((m) >> (i * 8)) & 0xff)
+
+enum
+{
+ TD_SLOTS = 0,
+ STALLS_MEM_ANY,
+ STALLS_TOTAL,
+ BOUND_ON_STORES,
+ RECOVERY_CYCLES,
+ UOP_DROPPING,
+ UOP_NOT_DELIVERED,
+ TD_RETIRING,
+ TD_BAD_SPEC,
+ TD_FE_BOUND,
+ TD_BE_BOUND,
+};
+
+static_always_inline f64
+memory_bound_fraction (perfmon_reading_t *ss)
+{
+ return (ss->value[STALLS_MEM_ANY] + ss->value[BOUND_ON_STORES]) /
+ (f64) (ss->value[STALLS_TOTAL] + ss->value[BOUND_ON_STORES]);
+}
+
+static_always_inline f64
+perf_metrics_sum (perfmon_reading_t *ss)
+{
+ return ss->value[TD_RETIRING] + ss->value[TD_BAD_SPEC] +
+ ss->value[TD_FE_BOUND] + ss->value[TD_BE_BOUND];
+}
+
+static_always_inline f64
+retiring (perfmon_reading_t *ss)
+{
+ return ss->value[TD_RETIRING] / perf_metrics_sum (ss);
+}
+
+static_always_inline f64
+bad_speculation (perfmon_reading_t *ss)
+{
+ return ss->value[TD_BAD_SPEC] / perf_metrics_sum (ss);
+}
+
+static_always_inline f64
+frontend_bound (perfmon_reading_t *ss)
+{
+ return (ss->value[TD_FE_BOUND] / perf_metrics_sum (ss)) -
+ (ss->value[UOP_DROPPING] / perf_metrics_sum (ss));
+}
+
+static_always_inline f64
+backend_bound (perfmon_reading_t *ss)
+{
+ return (ss->value[TD_BE_BOUND] / perf_metrics_sum (ss)) +
+ ((5 * ss->value[RECOVERY_CYCLES]) / perf_metrics_sum (ss));
+}
+
+static_always_inline f64
+fetch_latency (perfmon_reading_t *ss)
+{
+ f64 r = ((5 * ss->value[UOP_NOT_DELIVERED] - ss->value[UOP_DROPPING]) /
+ (f64) ss->value[TD_SLOTS]);
+ return r;
+}
+
+static_always_inline f64
+fetch_bandwidth (perfmon_reading_t *ss)
+{
+ return clib_max (0, frontend_bound (ss) - fetch_latency (ss));
+}
+
+static_always_inline f64
+memory_bound (perfmon_reading_t *ss)
+{
+ return backend_bound (ss) * memory_bound_fraction (ss);
+}
+
+static_always_inline f64
+core_bound (perfmon_reading_t *ss)
+{
+ return backend_bound (ss) - memory_bound (ss);
+}
+
+static u8 *
+format_topdown_lvl2_icx (u8 *s, va_list *args)
+{
+ perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *);
+ u64 idx = va_arg (*args, int);
+ f64 sv = 0;
+
+ switch (idx)
+ {
+ case 0:
+ sv = retiring (ss);
+ break;
+ case 1:
+ sv = bad_speculation (ss);
+ break;
+ case 2:
+ sv = frontend_bound (ss);
+ break;
+ case 3:
+ sv = backend_bound (ss);
+ break;
+ case 4:
+ sv = fetch_latency (ss);
+ break;
+ case 5:
+ sv = fetch_bandwidth (ss);
+ break;
+ case 6:
+ sv = memory_bound (ss);
+ break;
+ case 7:
+ sv = core_bound (ss);
+ break;
+ }
+
+ s = format (s, "%f", sv * 100);
+
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric_icx) = {
+ .name = "topdown",
+ .description = "Top-down Microarchitecture Analysis Level 1 & 2",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
+ .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY,
+ .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL,
+ .events[3] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES,
+ .events[4] = INTEL_CORE_E_INT_MISC_RECOVERY_CYCLES,
+ .events[5] = INTEL_CORE_E_INT_MISC_UOP_DROPPING,
+ .events[6] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CORE,
+ .events[7] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
+ .events[8] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
+ .events[9] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
+ .events[10] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
+ .n_events = 11,
+ .cpu_supports = topdown_lvl2_cpu_supports_icx,
+ .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports_icx),
+ .format_fn = format_topdown_lvl2_icx,
+ .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% FE.FL",
+ "% FE.FB", "% BE.MB", "% BE.CB"),
+ .footer = "Retiring (RT), Bad Speculation (BS),\n"
+ " FrontEnd bound (FE), BackEnd bound (BE),\n"
+ " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
+ " Memory Bound (MB), Core Bound (CB)",
+};
diff --git a/src/plugins/perfmon/intel/bundle/topdown_metrics.c b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
index 6913781be90..a464dfe1c88 100644
--- a/src/plugins/perfmon/intel/bundle/topdown_metrics.c
+++ b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
@@ -14,16 +14,17 @@
*/
#include <vnet/vnet.h>
+#include <vppinfra/math.h>
#include <perfmon/perfmon.h>
#include <perfmon/intel/core.h>
#define GET_METRIC(m, i) (((m) >> (i * 8)) & 0xff)
#define GET_RATIO(m, i) (((m) >> (i * 32)) & 0xffffffff)
-#define RDPMC_FIXED_SLOTS (1 << 30) /* fixed slots */
-#define RDPMC_L1_METRICS (1 << 29) /* l1 metric counters */
+#define RDPMC_SLOTS (1 << 30) /* fixed slots */
+#define RDPMC_METRICS (1 << 29) /* l1 & l2 metric counters */
#define FIXED_COUNTER_SLOTS 3
-#define METRIC_COUNTER_TOPDOWN_L1 0
+#define METRIC_COUNTER_TOPDOWN_L1_L2 0
typedef enum
{
@@ -31,7 +32,16 @@ typedef enum
TOPDOWN_E_BAD_SPEC,
TOPDOWN_E_FE_BOUND,
TOPDOWN_E_BE_BOUND,
-} topdown_lvl1_t;
+ TOPDOWN_E_HEAVYOPS,
+ TOPDOWN_E_LIGHTOPS,
+ TOPDOWN_E_BMISPRED,
+ TOPDOWN_E_MCHCLEAR,
+ TOPDOWN_E_FETCHLAT,
+ TOPDOWN_E_FETCH_BW,
+ TOPDOWN_E_MEMBOUND,
+ TOPDOWN_E_CORBOUND,
+ TOPDOWN_E_MAX,
+} topdown_e_t;
enum
{
@@ -39,11 +49,11 @@ enum
TOPDOWN_E_RDPMC_METRICS,
};
-typedef f64 (topdown_lvl1_parse_fn_t) (void *, topdown_lvl1_t);
+typedef f64 (topdown_lvl1_parse_fn_t) (void *, topdown_e_t);
/* Parse thread level states from perfmon_reading */
static_always_inline f64
-topdown_lvl1_perf_reading (void *ps, topdown_lvl1_t e)
+topdown_lvl1_perf_reading (void *ps, topdown_e_t e)
{
perfmon_reading_t *ss = (perfmon_reading_t *) ps;
@@ -52,7 +62,7 @@ topdown_lvl1_perf_reading (void *ps, topdown_lvl1_t e)
}
static_always_inline f64
-topdown_lvl1_rdpmc_metric (void *ps, topdown_lvl1_t e)
+topdown_lvl1_rdpmc_metric (void *ps, topdown_e_t e)
{
perfmon_node_stats_t *ss = (perfmon_node_stats_t *) ps;
f64 slots_t0 =
@@ -69,8 +79,87 @@ topdown_lvl1_rdpmc_metric (void *ps, topdown_lvl1_t e)
return (slots_t1 / slots_delta) * 100;
}
+/* Convert the TopDown enum to the perf reading index */
+#define TO_LVL2_PERF_IDX(e) \
+ ({ \
+ u8 to_idx[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 5, 5, 6, 6, 7, 7, 8, 8 }; \
+ to_idx[e]; \
+ })
+
+/* Parse thread level stats from perfmon_reading */
+static_always_inline f64
+topdown_lvl2_perf_reading (void *ps, topdown_e_t e)
+{
+ perfmon_reading_t *ss = (perfmon_reading_t *) ps;
+ u64 value = ss->value[TO_LVL2_PERF_IDX (e)];
+
+ /* If it is an L1 metric, call L1 format */
+ if (TOPDOWN_E_BE_BOUND >= e)
+ {
+ return topdown_lvl1_perf_reading (ps, e);
+ }
+
+ /* all the odd metrics, are inferred from even and L1 metrics */
+ if (e & 0x1)
+ {
+ topdown_e_t e1 = TO_LVL2_PERF_IDX (e) - 4;
+ value = ss->value[e1] - value;
+ }
+
+ return (f64) value / ss->value[0] * 100;
+}
+
+/* Convert the TopDown enum to the rdpmc metric byte position */
+#define TO_LVL2_METRIC_BYTE(e) \
+ ({ \
+ u8 to_metric[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 4, 4, 5, 5, 6, 6, 7, 7 }; \
+ to_metric[e]; \
+ })
+
+/* Convert the TopDown L2 enum to the reference TopDown L1 enum */
+#define TO_LVL1_REF(e) \
+ ({ \
+ u8 to_lvl1[TOPDOWN_E_MAX] = { -1, \
+ -1, \
+ -1, \
+ -1, \
+ TOPDOWN_E_RETIRING, \
+ TOPDOWN_E_RETIRING, \
+ TOPDOWN_E_BAD_SPEC, \
+ TOPDOWN_E_BAD_SPEC, \
+ TOPDOWN_E_FE_BOUND, \
+ TOPDOWN_E_FE_BOUND, \
+ TOPDOWN_E_BE_BOUND, \
+ TOPDOWN_E_BE_BOUND }; \
+ to_lvl1[e]; \
+ })
+
+static_always_inline f64
+topdown_lvl2_rdpmc_metric (void *ps, topdown_e_t e)
+{
+ f64 r, l1_value = 0;
+
+ /* If it is an L1 metric, call L1 format */
+ if (TOPDOWN_E_BE_BOUND >= e)
+ {
+ return topdown_lvl1_rdpmc_metric (ps, e);
+ }
+
+ /* all the odd metrics, are inferred from even and L1 metrics */
+ if (e & 0x1)
+ {
+ /* get the L1 reference metric */
+ l1_value = topdown_lvl1_rdpmc_metric (ps, TO_LVL1_REF (e));
+ }
+
+ /* calculate the l2 metric */
+ r =
+ fabs (l1_value - topdown_lvl1_rdpmc_metric (ps, TO_LVL2_METRIC_BYTE (e)));
+ return r;
+}
+
static u8 *
-format_topdown_lvl1 (u8 *s, va_list *args)
+format_topdown_lvl2 (u8 *s, va_list *args)
{
void *ps = va_arg (*args, void *);
u64 idx = va_arg (*args, int);
@@ -78,58 +167,49 @@ format_topdown_lvl1 (u8 *s, va_list *args)
f64 sv = 0;
topdown_lvl1_parse_fn_t *parse_fn,
- *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl1_rdpmc_metric,
- topdown_lvl1_perf_reading, 0 };
+ *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl2_rdpmc_metric,
+ topdown_lvl2_perf_reading, 0 };
+
parse_fn = parse_fns[type];
ASSERT (parse_fn);
- switch (idx)
- {
- case 0:
- sv =
- parse_fn (ps, TOPDOWN_E_BAD_SPEC) + parse_fn (ps, TOPDOWN_E_RETIRING);
- break;
- case 1:
- sv =
- parse_fn (ps, TOPDOWN_E_BE_BOUND) + parse_fn (ps, TOPDOWN_E_FE_BOUND);
- break;
- default:
- sv = parse_fn (ps, (topdown_lvl1_t) idx - 2);
- break;
- }
-
+ sv = parse_fn (ps, (topdown_e_t) idx);
s = format (s, "%f", sv);
return s;
}
-static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = {
- /* Intel SNR supports papi/thread only */
- { clib_cpu_supports_movdiri, PERFMON_BUNDLE_TYPE_THREAD },
- /* Intel ICX supports papi/thread or rdpmc/node */
- { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }
+static perfmon_cpu_supports_t topdown_lvl2_cpu_supports[] = {
+ /* Intel SPR supports papi/thread or rdpmc/node */
+ { clib_cpu_supports_avx512_fp16, PERFMON_BUNDLE_TYPE_NODE_OR_THREAD }
};
-PERFMON_REGISTER_BUNDLE (topdown_lvl1_metric) = {
- .name = "topdown-level1",
- .description = "Top-down Microarchitecture Analysis Level 1",
+PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric) = {
+ .name = "topdown",
+ .description = "Top-down Microarchitecture Analysis Level 1 & 2",
.source = "intel-core",
- .offset_type = PERFMON_OFFSET_TYPE_METRICS,
.events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
.events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
.events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
.events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
.events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
- .n_events = 5,
- .metrics[0] = RDPMC_FIXED_SLOTS | FIXED_COUNTER_SLOTS,
- .metrics[1] = RDPMC_L1_METRICS | METRIC_COUNTER_TOPDOWN_L1,
- .n_metrics = 2,
- .cpu_supports = topdown_lvl1_cpu_supports,
- .n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports),
- .format_fn = format_topdown_lvl1,
- .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
- "% ST.FE", "% ST.BE"),
- .footer = "Not Stalled (NS),STalled (ST),\n"
- " Retiring (RT), Bad Speculation (BS),\n"
- " FrontEnd bound (FE), BackEnd bound (BE)",
+ .events[5] = INTEL_CORE_E_TOPDOWN_L2_HEAVYOPS_METRIC,
+ .events[6] = INTEL_CORE_E_TOPDOWN_L2_BMISPRED_METRIC,
+ .events[7] = INTEL_CORE_E_TOPDOWN_L2_FETCHLAT_METRIC,
+ .events[8] = INTEL_CORE_E_TOPDOWN_L2_MEMBOUND_METRIC,
+ .n_events = 9,
+ .preserve_samples = 0x1FF,
+ .cpu_supports = topdown_lvl2_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports),
+ .format_fn = format_topdown_lvl2,
+ .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% RT.HO",
+ "% RT.LO", "% BS.BM", "% BS.MC",
+ "% FE.FL", "% FE.FB", "% BE.MB",
+ "% BE.CB"),
+ .footer = "Retiring (RT), Bad Speculation (BS),\n"
+ " FrontEnd bound (1FE), BackEnd bound (BE),\n"
+ " Light Operations (LO), Heavy Operations (HO),\n"
+ " Branch Misprediction (BM), Machine Clears (MC),\n"
+ " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
+ " Memory Bound (MB), Core Bound (CB)",
};
diff --git a/src/plugins/perfmon/intel/bundle/topdown_tremont.c b/src/plugins/perfmon/intel/bundle/topdown_tremont.c
new file mode 100644
index 00000000000..b2626eb0480
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/topdown_tremont.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+typedef enum
+{
+ TOPDOWN_E_RETIRING = 0,
+ TOPDOWN_E_BAD_SPEC,
+ TOPDOWN_E_FE_BOUND,
+ TOPDOWN_E_BE_BOUND,
+ TOPDOWN_E_MAX,
+} topdown_lvl1_t;
+
+static u8 *
+format_topdown_lvl1 (u8 *s, va_list *args)
+{
+ perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *);
+ u64 idx = va_arg (*args, int);
+ f64 sv = 0;
+ u64 total = 0;
+
+ for (int i = 0; i < TOPDOWN_E_MAX; i++)
+ total += ss->value[i];
+
+ switch (idx)
+ {
+ case 0:
+ sv = (f64) ss->value[TOPDOWN_E_RETIRING] + ss->value[TOPDOWN_E_BAD_SPEC];
+ break;
+ case 1:
+ sv = (f64) ss->value[TOPDOWN_E_FE_BOUND] + ss->value[TOPDOWN_E_BE_BOUND];
+ break;
+ default:
+ sv = (f64) ss->value[idx - 2];
+ break;
+ }
+
+ sv = (sv / total) * 100;
+ s = format (s, "%f", sv);
+ return s;
+}
+
+static int
+is_tremont ()
+{
+ return clib_cpu_supports_movdir64b () && !clib_cpu_supports_avx2 ();
+}
+
+static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = {
+ { is_tremont, PERFMON_BUNDLE_TYPE_THREAD }
+};
+
+PERFMON_REGISTER_BUNDLE (topdown_lvl1_tremont) = {
+ .name = "topdown-level1",
+ .description = "Top-down Microarchitecture Analysis Level 1",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_TREMONT,
+ .events[1] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_TREMONT,
+ .events[2] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_TREMONT,
+ .events[3] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_TREMONT,
+ .n_events = 4,
+ .cpu_supports = topdown_lvl1_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports),
+ .format_fn = format_topdown_lvl1,
+ .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
+ "% ST.FE", "% ST.BE"),
+ .footer = "Not Stalled (NS),STalled (ST),\n"
+ " Retiring (RT), Bad Speculation (BS),\n"
+ " FrontEnd bound (FE), BackEnd bound (BE)",
+};