diff options
author | Ray Kinsella <mdr@ashroe.eu> | 2021-06-09 11:35:26 +0100 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2021-09-08 14:30:03 +0000 |
commit | 710bdef43c2bfe6a6fd208edd080c75f06c5dfb7 (patch) | |
tree | b5cd741b43ec616818a1f0607b8c6008773139a1 /src/plugins | |
parent | c3cb2075deef7cd35401f9d2f00f3aeb23ed16d5 (diff) |
perfmon: add membw-bound bundle
Added memory bandwidth boundedness bundle, closely related to cache-hierarchy.
This bundle works on ICX only, due to an ICX specific counter.
Type: improvement
Signed-off-by: Ray Kinsella <mdr@ashroe.eu>
Change-Id: Id385bd5f4e645ac020774e311c623afb64b79b1e
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/perfmon/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/membw_bound.c | 59 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/core.h | 19 | ||||
-rw-r--r-- | src/plugins/perfmon/perfmon.h | 2 |
4 files changed, 80 insertions, 1 deletions
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt index af0bd3c3f51..6b8e7c80044 100644 --- a/src/plugins/perfmon/CMakeLists.txt +++ b/src/plugins/perfmon/CMakeLists.txt @@ -24,6 +24,7 @@ add_vpp_plugin(perfmon table.c intel/core.c intel/uncore.c + intel/bundle/membw_bound.c intel/bundle/inst_and_clock.c intel/bundle/load_blocks.c intel/bundle/mem_bw.c diff --git a/src/plugins/perfmon/intel/bundle/membw_bound.c b/src/plugins/perfmon/intel/bundle/membw_bound.c new file mode 100644 index 00000000000..2e4b4aa57bb --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/membw_bound.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +static u8 * +format_intel_membw_bound (u8 *s, va_list *args) +{ + perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *); + int row = va_arg (*args, int); + f64 sv = 0; + + if (!ss->n_packets) + return s; + + sv = ss->value[row] / ss->n_packets; + + s = format (s, "%5.0f", sv); + + return s; +} + +static perfmon_cpu_supports_t membw_bound_cpu_supports[] = { + { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }, +}; + +PERFMON_REGISTER_BUNDLE (intel_core_membw_bound) = { + .name = "membw-bound", + .description = "memory bandwidth boundedness", + .source = "intel-core", + .events[0] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* FIXED */ + .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE, /*CMask: 0xFF*/ + .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, /*CMask: 0xFF*/ + .events[3] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L1D_MISS, /*CMask: 0xF*/ + .events[4] = INTEL_CORE_E_L1D_PEND_MISS_FB_FULL, /*CMask: 0xF*/ + .events[5] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L3_MISS, /*CMask: 0xF*/ + .events[6] = INTEL_CORE_E_SQ_MISC_SQ_FULL, /*CMask: 0xF*/ + .n_events = 7, + .format_fn = format_intel_membw_bound, + .cpu_supports = membw_bound_cpu_supports, + .n_cpu_supports = ARRAY_LEN (membw_bound_cpu_supports), + .column_headers = PERFMON_STRINGS ("Cycles/Packet", "Cycles Stall/Packet", + "Mem Stall/Packet", + "L1D Miss Stall/Packet", "FB Full/Packet", + "L3 Miss Stall/Packet", "SQ Full/Packet"), +}; diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h index a6a5269d772..0e29022bfdf 100644 --- a/src/plugins/perfmon/intel/core.h +++ b/src/plugins/perfmon/intel/core.h @@ -105,6 +105,22 @@ _ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \ "Uops not delivered to Resource Allocation Table (RAT) per thread when " \ "backend of the machine is not stalled") \ + _ (0xA2, 0x08, 0, 0, 0, 0x00, RESOURCE_STALLS, SB, \ + "Counts allocation stall cycles caused by the store buffer (SB) being " \ + "full. This counts cycles that the pipeline back-end blocked uop " \ + "delivery" \ + "from the front-end.") \ + _ (0xA3, 0x04, 0, 0, 0, 0x04, CYCLE_ACTIVITY, CYCLES_NO_EXECUTE, \ + "This event counts cycles during which no instructions were executed in" \ + " the execution stage of the pipeline.") \ + _ (0xA3, 0x05, 0, 0, 0, 0x05, CYCLE_ACTIVITY, STALLS_L2_MISS, \ + "Execution stalls while L2 cache miss demand load is outstanding") \ + _ (0xA3, 0x06, 0, 0, 0, 0x06, CYCLE_ACTIVITY, STALLS_L3_MISS, \ + "Execution stalls while L3 cache miss demand load is outstanding") \ + _ (0xA3, 0x0C, 0, 0, 0, 0x0C, CYCLE_ACTIVITY, STALLS_L1D_MISS, \ + "Execution stalls while L1 cache miss demand load is outstanding") \ + _ (0xA3, 0x14, 0, 0, 0, 0x14, CYCLE_ACTIVITY, STALLS_MEM_ANY, \ + "Execution stalls while memory subsystem has an outstanding load.") \ _ (0xC0, 0x00, 0, 0, 0, 0x00, INST_RETIRED, ANY_P, \ "Number of instructions retired. General Counter - architectural event") \ _ (0xC2, 0x02, 0, 0, 0, 0x00, UOPS_RETIRED, RETIRE_SLOTS, \ @@ -162,6 +178,9 @@ "L2 writebacks that access L2 cache") \ _ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \ "L2 cache lines filling L2") \ + _ (0xF4, 0x04, 0, 0, 0, 0x00, SQ_MISC, SQ_FULL, \ + "Counts the cycles for which the thread is active and the superQ cannot" \ + "take any more entries.") \ _ (0xFE, 0x02, 0, 0, 0, 0x00, IDI_MISC, WB_UPGRADE, \ "Counts number of cache lines that are allocated and written back to L3" \ " with the intention that they are more likely to be reused shortly") \ diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h index 967b92e0e4a..ffcf2fd5e46 100644 --- a/src/plugins/perfmon/perfmon.h +++ b/src/plugins/perfmon/perfmon.h @@ -23,7 +23,7 @@ #include <vppinfra/cpu.h> #include <vlib/vlib.h> -#define PERF_MAX_EVENTS 7 /* 3 fixed and 4 programmable */ +#define PERF_MAX_EVENTS 8 /* 4 fixed and 8 programmable on ICX */ typedef enum { |