aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/perfmon/intel
diff options
context:
space:
mode:
authorRay Kinsella <mdr@ashroe.eu>2021-06-09 11:35:26 +0100
committerDamjan Marion <dmarion@me.com>2021-09-08 14:30:03 +0000
commit710bdef43c2bfe6a6fd208edd080c75f06c5dfb7 (patch)
treeb5cd741b43ec616818a1f0607b8c6008773139a1 /src/plugins/perfmon/intel
parentc3cb2075deef7cd35401f9d2f00f3aeb23ed16d5 (diff)
perfmon: add membw-bound bundle
Added memory bandwidth boundedness bundle, closely related to cache-hierarchy. This bundle works on ICX only, due to an ICX specific counter. Type: improvement Signed-off-by: Ray Kinsella <mdr@ashroe.eu> Change-Id: Id385bd5f4e645ac020774e311c623afb64b79b1e
Diffstat (limited to 'src/plugins/perfmon/intel')
-rw-r--r--src/plugins/perfmon/intel/bundle/membw_bound.c59
-rw-r--r--src/plugins/perfmon/intel/core.h19
2 files changed, 78 insertions, 0 deletions
diff --git a/src/plugins/perfmon/intel/bundle/membw_bound.c b/src/plugins/perfmon/intel/bundle/membw_bound.c
new file mode 100644
index 00000000000..2e4b4aa57bb
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/membw_bound.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+static u8 *
+format_intel_membw_bound (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+
+ if (!ss->n_packets)
+ return s;
+
+ sv = ss->value[row] / ss->n_packets;
+
+ s = format (s, "%5.0f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t membw_bound_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_membw_bound) = {
+ .name = "membw-bound",
+ .description = "memory bandwidth boundedness",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* FIXED */
+ .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE, /*CMask: 0xFF*/
+ .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, /*CMask: 0xFF*/
+ .events[3] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L1D_MISS, /*CMask: 0xF*/
+ .events[4] = INTEL_CORE_E_L1D_PEND_MISS_FB_FULL, /*CMask: 0xF*/
+ .events[5] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L3_MISS, /*CMask: 0xF*/
+ .events[6] = INTEL_CORE_E_SQ_MISC_SQ_FULL, /*CMask: 0xF*/
+ .n_events = 7,
+ .format_fn = format_intel_membw_bound,
+ .cpu_supports = membw_bound_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (membw_bound_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Cycles/Packet", "Cycles Stall/Packet",
+ "Mem Stall/Packet",
+ "L1D Miss Stall/Packet", "FB Full/Packet",
+ "L3 Miss Stall/Packet", "SQ Full/Packet"),
+};
diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h
index a6a5269d772..0e29022bfdf 100644
--- a/src/plugins/perfmon/intel/core.h
+++ b/src/plugins/perfmon/intel/core.h
@@ -105,6 +105,22 @@
_ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \
"Uops not delivered to Resource Allocation Table (RAT) per thread when " \
"backend of the machine is not stalled") \
+ _ (0xA2, 0x08, 0, 0, 0, 0x00, RESOURCE_STALLS, SB, \
+ "Counts allocation stall cycles caused by the store buffer (SB) being " \
+ "full. This counts cycles that the pipeline back-end blocked uop " \
+ "delivery" \
+ "from the front-end.") \
+ _ (0xA3, 0x04, 0, 0, 0, 0x04, CYCLE_ACTIVITY, CYCLES_NO_EXECUTE, \
+ "This event counts cycles during which no instructions were executed in" \
+ " the execution stage of the pipeline.") \
+ _ (0xA3, 0x05, 0, 0, 0, 0x05, CYCLE_ACTIVITY, STALLS_L2_MISS, \
+ "Execution stalls while L2 cache miss demand load is outstanding") \
+ _ (0xA3, 0x06, 0, 0, 0, 0x06, CYCLE_ACTIVITY, STALLS_L3_MISS, \
+ "Execution stalls while L3 cache miss demand load is outstanding") \
+ _ (0xA3, 0x0C, 0, 0, 0, 0x0C, CYCLE_ACTIVITY, STALLS_L1D_MISS, \
+ "Execution stalls while L1 cache miss demand load is outstanding") \
+ _ (0xA3, 0x14, 0, 0, 0, 0x14, CYCLE_ACTIVITY, STALLS_MEM_ANY, \
+ "Execution stalls while memory subsystem has an outstanding load.") \
_ (0xC0, 0x00, 0, 0, 0, 0x00, INST_RETIRED, ANY_P, \
"Number of instructions retired. General Counter - architectural event") \
_ (0xC2, 0x02, 0, 0, 0, 0x00, UOPS_RETIRED, RETIRE_SLOTS, \
@@ -162,6 +178,9 @@
"L2 writebacks that access L2 cache") \
_ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \
"L2 cache lines filling L2") \
+ _ (0xF4, 0x04, 0, 0, 0, 0x00, SQ_MISC, SQ_FULL, \
+ "Counts the cycles for which the thread is active and the superQ cannot" \
+ "take any more entries.") \
_ (0xFE, 0x02, 0, 0, 0, 0x00, IDI_MISC, WB_UPGRADE, \
"Counts number of cache lines that are allocated and written back to L3" \
" with the intention that they are more likely to be reused shortly") \