aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/perfmon
diff options
context:
space:
mode:
authorRay Kinsella <mdr@ashroe.eu>2022-01-28 08:56:57 +0000
committerDamjan Marion <dmarion@me.com>2022-01-30 14:43:34 +0000
commitfe85d872358a7c7874803ac428b2067f66c2a5d8 (patch)
tree2f95458b6d4689339d5ec27f976a02e07a5bbbd0 /src/plugins/perfmon
parent1f2070a0fe41d2fe947aeed2f101ea7acc5d973a (diff)
perfmon: topdown backend bound core bundle
Add a bundle to measure topdown backend bound core cycles, will indicate if any given execution port has contention. Type: improvement Signed-off-by: Ray Kinsella <mdr@ashroe.eu> Change-Id: I37d1b38c101ac42d51c10fa4452b822d34b729c9
Diffstat (limited to 'src/plugins/perfmon')
-rw-r--r--src/plugins/perfmon/CMakeLists.txt1
-rw-r--r--src/plugins/perfmon/intel/bundle/backend_bound_core.c100
-rw-r--r--src/plugins/perfmon/intel/core.h17
3 files changed, 118 insertions, 0 deletions
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt
index e262984b610..05c280f64ac 100644
--- a/src/plugins/perfmon/CMakeLists.txt
+++ b/src/plugins/perfmon/CMakeLists.txt
@@ -24,6 +24,7 @@ add_vpp_plugin(perfmon
intel/core.c
intel/uncore.c
intel/bundle/backend_bound_mem.c
+ intel/bundle/backend_bound_core.c
intel/bundle/inst_and_clock.c
intel/bundle/load_blocks.c
intel/bundle/mem_bw.c
diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_core.c b/src/plugins/perfmon/intel/bundle/backend_bound_core.c
new file mode 100644
index 00000000000..16905235119
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/backend_bound_core.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ PORT0 = 0,
+ PORT1 = 1,
+ PORT5 = 2,
+ PORT6 = 3,
+ PORT2_3 = 4,
+ PORT4_9 = 5,
+ PORT7_8 = 6,
+ DISTRIBUTED = 7,
+};
+
+static u8 *
+format_intel_backend_bound_core (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+
+ if (!ss->n_packets)
+ return s;
+
+ if (0 == row)
+ {
+ sv = ss->value[DISTRIBUTED] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[PORT0] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 2:
+ sv = ss->value[PORT1] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 3:
+ sv = ss->value[PORT5] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 4:
+ sv = ss->value[PORT6] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 5:
+ sv = (ss->value[PORT2_3]) / (f64) (2 * ss->value[DISTRIBUTED]);
+ break;
+ case 6:
+ sv = (ss->value[PORT4_9] + ss->value[PORT7_8]) /
+ (f64) (4 * ss->value[DISTRIBUTED]);
+ break;
+ }
+
+ sv = clib_max (sv * 100, 0);
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t backend_bound_core_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_core) = {
+ .name = "td-backend-core",
+ .description = "Topdown BackEnd-bound Core - % cycles core resources busy",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_0, /* 0xFF */
+ .events[1] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_1, /* 0xFF */
+ .events[2] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_5, /* 0xFF */
+ .events[3] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_6, /* 0xFF */
+ .events[4] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_2_3, /* 0xFF */
+ .events[5] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_4_9, /* 0xFF */
+ .events[6] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_7_8, /* 0xFF */
+ .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_DISTRIBUTED, /* 0xFF */
+ .n_events = 8,
+ .format_fn = format_intel_backend_bound_core,
+ .cpu_supports = backend_bound_core_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (backend_bound_core_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Port0", "%Port1",
+ "%Port5", "%Port6", "%Load", "%Store"),
+};
diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h
index 31daf273517..971dc3465fa 100644
--- a/src/plugins/perfmon/intel/core.h
+++ b/src/plugins/perfmon/intel/core.h
@@ -149,6 +149,20 @@
_ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \
"Uops not delivered to Resource Allocation Table (RAT) per thread when " \
"backend of the machine is not stalled") \
+ _ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0, \
+ "Number of uops executed on port 0") \
+ _ (0xA1, 0x02, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_1, \
+ "Number of uops executed on port 1") \
+ _ (0xA1, 0x04, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_2_3, \
+ "Number of uops executed on port 2 and 3") \
+ _ (0xA1, 0x10, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_4_9, \
+ "Number of uops executed on port 4 and 9") \
+ _ (0xA1, 0x20, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_5, \
+ "Number of uops executed on port 5") \
+ _ (0xA1, 0x40, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_6, \
+ "Number of uops executed on port 6") \
+ _ (0xA1, 0x80, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_7_8, \
+ "Number of uops executed on port 7 and 8") \
_ (0xA2, 0x08, 0, 0, 0, 0x00, RESOURCE_STALLS, SB, \
"Counts allocation stall cycles caused by the store buffer (SB) being " \
"full. This counts cycles that the pipeline back-end blocked uop " \
@@ -230,6 +244,9 @@
"Counts the total number when the front end is resteered, mainly when " \
"the BPU cannot provide a correct prediction and this is corrected by " \
"other branch handling mechanisms at the front end.") \
+ _ (0xEC, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, DISTRIBUTED, \
+ "Cycle counts are evenly distributed between active threads in the " \
+ " Core") \
_ (0xF0, 0x40, 0, 0, 0, 0x00, L2_TRANS, L2_WB, \
"L2 writebacks that access L2 cache") \
_ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \