diff options
author | Ray Kinsella <mdr@ashroe.eu> | 2022-01-28 08:56:57 +0000 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2022-01-30 14:43:34 +0000 |
commit | fe85d872358a7c7874803ac428b2067f66c2a5d8 (patch) | |
tree | 2f95458b6d4689339d5ec27f976a02e07a5bbbd0 /src/plugins | |
parent | 1f2070a0fe41d2fe947aeed2f101ea7acc5d973a (diff) |
perfmon: topdown backend bound core bundle
Add a bundle to measure topdown backend bound core cycles, will indicate if any
given execution port has contention.
Type: improvement
Signed-off-by: Ray Kinsella <mdr@ashroe.eu>
Change-Id: I37d1b38c101ac42d51c10fa4452b822d34b729c9
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/perfmon/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/backend_bound_core.c | 100 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/core.h | 17 |
3 files changed, 118 insertions, 0 deletions
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt index e262984b610..05c280f64ac 100644 --- a/src/plugins/perfmon/CMakeLists.txt +++ b/src/plugins/perfmon/CMakeLists.txt @@ -24,6 +24,7 @@ add_vpp_plugin(perfmon intel/core.c intel/uncore.c intel/bundle/backend_bound_mem.c + intel/bundle/backend_bound_core.c intel/bundle/inst_and_clock.c intel/bundle/load_blocks.c intel/bundle/mem_bw.c diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_core.c b/src/plugins/perfmon/intel/bundle/backend_bound_core.c new file mode 100644 index 00000000000..16905235119 --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/backend_bound_core.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2022 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +enum +{ + PORT0 = 0, + PORT1 = 1, + PORT5 = 2, + PORT6 = 3, + PORT2_3 = 4, + PORT4_9 = 5, + PORT7_8 = 6, + DISTRIBUTED = 7, +}; + +static u8 * +format_intel_backend_bound_core (u8 *s, va_list *args) +{ + perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *); + int row = va_arg (*args, int); + f64 sv = 0; + + if (!ss->n_packets) + return s; + + if (0 == row) + { + sv = ss->value[DISTRIBUTED] / ss->n_packets; + + s = format (s, "%.0f", sv); + return s; + } + + switch (row) + { + case 1: + sv = ss->value[PORT0] / (f64) ss->value[DISTRIBUTED]; + break; + case 2: + sv = ss->value[PORT1] / (f64) ss->value[DISTRIBUTED]; + break; + case 3: + sv = ss->value[PORT5] / (f64) ss->value[DISTRIBUTED]; + break; + case 4: + sv = ss->value[PORT6] / (f64) ss->value[DISTRIBUTED]; + break; + case 5: + sv = (ss->value[PORT2_3]) / (f64) (2 * ss->value[DISTRIBUTED]); + break; + case 6: + sv = (ss->value[PORT4_9] + ss->value[PORT7_8]) / + (f64) (4 * ss->value[DISTRIBUTED]); + break; + } + + sv = clib_max (sv * 100, 0); + s = format (s, "%04.1f", sv); + + return s; +} + +static perfmon_cpu_supports_t backend_bound_core_cpu_supports[] = { + { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }, +}; + +PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_core) = { + .name = "td-backend-core", + .description = "Topdown BackEnd-bound Core - % cycles core resources busy", + .source = "intel-core", + .events[0] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_0, /* 0xFF */ + .events[1] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_1, /* 0xFF */ + .events[2] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_5, /* 0xFF */ + .events[3] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_6, /* 0xFF */ + .events[4] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_2_3, /* 0xFF */ + .events[5] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_4_9, /* 0xFF */ + .events[6] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_7_8, /* 0xFF */ + .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_DISTRIBUTED, /* 0xFF */ + .n_events = 8, + .format_fn = format_intel_backend_bound_core, + .cpu_supports = backend_bound_core_cpu_supports, + .n_cpu_supports = ARRAY_LEN (backend_bound_core_cpu_supports), + .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Port0", "%Port1", + "%Port5", "%Port6", "%Load", "%Store"), +}; diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h index 31daf273517..971dc3465fa 100644 --- a/src/plugins/perfmon/intel/core.h +++ b/src/plugins/perfmon/intel/core.h @@ -149,6 +149,20 @@ _ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \ "Uops not delivered to Resource Allocation Table (RAT) per thread when " \ "backend of the machine is not stalled") \ + _ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0, \ + "Number of uops executed on port 0") \ + _ (0xA1, 0x02, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_1, \ + "Number of uops executed on port 1") \ + _ (0xA1, 0x04, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_2_3, \ + "Number of uops executed on port 2 and 3") \ + _ (0xA1, 0x10, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_4_9, \ + "Number of uops executed on port 4 and 9") \ + _ (0xA1, 0x20, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_5, \ + "Number of uops executed on port 5") \ + _ (0xA1, 0x40, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_6, \ + "Number of uops executed on port 6") \ + _ (0xA1, 0x80, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_7_8, \ + "Number of uops executed on port 7 and 8") \ _ (0xA2, 0x08, 0, 0, 0, 0x00, RESOURCE_STALLS, SB, \ "Counts allocation stall cycles caused by the store buffer (SB) being " \ "full. This counts cycles that the pipeline back-end blocked uop " \ @@ -230,6 +244,9 @@ "Counts the total number when the front end is resteered, mainly when " \ "the BPU cannot provide a correct prediction and this is corrected by " \ "other branch handling mechanisms at the front end.") \ + _ (0xEC, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, DISTRIBUTED, \ + "Cycle counts are evenly distributed between active threads in the " \ + " Core") \ _ (0xF0, 0x40, 0, 0, 0, 0x00, L2_TRANS, L2_WB, \ "L2 writebacks that access L2 cache") \ _ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \ |