diff options
Diffstat (limited to 'src/plugins/perfmon/intel/bundle')
-rw-r--r-- | src/plugins/perfmon/intel/bundle/backend_bound_core.c | 100 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/backend_bound_mem.c | 102 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c | 90 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c | 89 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/frontend_bound_lat.c | 99 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/iio_bw.c | 263 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/membw_bound.c | 59 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/topdown_icelake.c | 176 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/topdown_metrics.c | 172 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/topdown_tremont.c | 85 |
10 files changed, 1130 insertions, 105 deletions
diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_core.c b/src/plugins/perfmon/intel/bundle/backend_bound_core.c new file mode 100644 index 00000000000..16905235119 --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/backend_bound_core.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2022 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +enum +{ + PORT0 = 0, + PORT1 = 1, + PORT5 = 2, + PORT6 = 3, + PORT2_3 = 4, + PORT4_9 = 5, + PORT7_8 = 6, + DISTRIBUTED = 7, +}; + +static u8 * +format_intel_backend_bound_core (u8 *s, va_list *args) +{ + perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *); + int row = va_arg (*args, int); + f64 sv = 0; + + if (!ss->n_packets) + return s; + + if (0 == row) + { + sv = ss->value[DISTRIBUTED] / ss->n_packets; + + s = format (s, "%.0f", sv); + return s; + } + + switch (row) + { + case 1: + sv = ss->value[PORT0] / (f64) ss->value[DISTRIBUTED]; + break; + case 2: + sv = ss->value[PORT1] / (f64) ss->value[DISTRIBUTED]; + break; + case 3: + sv = ss->value[PORT5] / (f64) ss->value[DISTRIBUTED]; + break; + case 4: + sv = ss->value[PORT6] / (f64) ss->value[DISTRIBUTED]; + break; + case 5: + sv = (ss->value[PORT2_3]) / (f64) (2 * ss->value[DISTRIBUTED]); + break; + case 6: + sv = (ss->value[PORT4_9] + ss->value[PORT7_8]) / + (f64) (4 * ss->value[DISTRIBUTED]); + break; + } + + sv = clib_max (sv * 100, 0); + s = format (s, "%04.1f", sv); + + return s; +} + +static perfmon_cpu_supports_t backend_bound_core_cpu_supports[] = { + { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }, +}; + +PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_core) = { + .name = "td-backend-core", + .description = "Topdown BackEnd-bound Core - % cycles core resources busy", + .source = "intel-core", + .events[0] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_0, /* 0xFF */ + .events[1] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_1, /* 0xFF */ + .events[2] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_5, /* 0xFF */ + .events[3] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_6, /* 0xFF */ + .events[4] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_2_3, /* 0xFF */ + .events[5] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_4_9, /* 0xFF */ + .events[6] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_7_8, /* 0xFF */ + .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_DISTRIBUTED, /* 0xFF */ + .n_events = 8, + .format_fn = format_intel_backend_bound_core, + .cpu_supports = backend_bound_core_cpu_supports, + .n_cpu_supports = ARRAY_LEN (backend_bound_core_cpu_supports), + .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Port0", "%Port1", + "%Port5", "%Port6", "%Load", "%Store"), +}; diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_mem.c b/src/plugins/perfmon/intel/bundle/backend_bound_mem.c new file mode 100644 index 00000000000..ccf1ed12153 --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/backend_bound_mem.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2021 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +enum +{ + STALLS_L1D_MISS = 0, + STALLS_L2_MISS = 1, + STALLS_L3_MISS = 2, + STALLS_MEM_ANY = 3, + STALLS_TOTAL = 4, + BOUND_ON_STORES = 5, + FB_FULL = 6, + THREAD = 7, +}; + +static u8 * +format_intel_backend_bound_mem (u8 *s, va_list *args) +{ + perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *); + int row = va_arg (*args, int); + f64 sv = 0; + + if (!ss->n_packets) + return s; + + if (0 == row) + { + sv = ss->value[THREAD] / ss->n_packets; + + s = format (s, "%.0f", sv); + return s; + } + + switch (row) + { + case 1: + sv = ss->value[BOUND_ON_STORES]; + break; + case 2: + sv = ss->value[STALLS_MEM_ANY] - ss->value[STALLS_L1D_MISS]; + break; + case 3: + sv = ss->value[FB_FULL]; + break; + case 4: + sv = ss->value[STALLS_L1D_MISS] - ss->value[STALLS_L2_MISS]; + break; + case 5: + sv = ss->value[STALLS_L2_MISS] - ss->value[STALLS_L3_MISS]; + break; + case 6: + sv = ss->value[STALLS_L3_MISS]; + break; + } + + sv = clib_max ((sv / ss->value[THREAD]) * 100, 0); + + s = format (s, "%04.1f", sv); + + return s; +} + +static perfmon_cpu_supports_t backend_bound_mem_cpu_supports[] = { + { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }, +}; + +PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_mem) = { + .name = "td-backend-mem", + .description = "Topdown BackEnd-bound Memory - % cycles not retiring " + "instructions due to memory stalls", + .source = "intel-core", + .events[0] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L1D_MISS, /* 0x0F */ + .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L2_MISS, /* 0x0F */ + .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L3_MISS, /* 0x0F */ + .events[3] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, /* 0xFF */ + .events[4] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL, /* 0xFF */ + .events[5] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES, /* 0xFF */ + .events[6] = INTEL_CORE_E_L1D_PEND_MISS_FB_FULL, /* 0x0F */ + .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* 0xFF */ + .n_events = 8, + .format_fn = format_intel_backend_bound_mem, + .cpu_supports = backend_bound_mem_cpu_supports, + .n_cpu_supports = ARRAY_LEN (backend_bound_mem_cpu_supports), + .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Store Bound", + "%L1 Bound", "%FB Full", "%L2 Bound", + "%L3 Bound", "%DRAM Bound"), +}; diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c new file mode 100644 index 00000000000..3db4ca9c0f3 --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +enum +{ + DSB_UOPS, + MS_UOPS, + MITE_UOPS, + LSD_UOPS, +}; + +static u8 * +format_intel_frontend_bound_bw_src (u8 *s, va_list *args) +{ + perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *); + int row = va_arg (*args, int); + f64 sv = 0; + f64 uops = ss->value[DSB_UOPS] + ss->value[MS_UOPS] + ss->value[MITE_UOPS] + + ss->value[LSD_UOPS]; + + if (!ss->n_packets) + return s; + + if (row == 0) + { + sv = uops / ss->n_packets; + s = format (s, "%.0f", sv); + + return s; + } + + switch (row) + { + case 1: + sv = (ss->value[DSB_UOPS] / uops) * 100; + break; + case 2: + sv = (ss->value[MS_UOPS] / uops) * 100; + break; + case 3: + sv = (ss->value[MITE_UOPS] / uops) * 100; + break; + case 4: + sv = (ss->value[LSD_UOPS] / uops) * 100; + break; + } + + s = format (s, "%04.1f", sv); + + return s; +} + +static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_src[] = { + { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }, +}; + +PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_src) = { + .name = "td-frontend-bw-src", + .description = + "Topdown FrontEnd-bound BandWidth - % uops from each uop fetch source", + .source = "intel-core", + .events[0] = INTEL_CORE_E_IDQ_DSB_UOPS, /* 0x0F */ + .events[1] = INTEL_CORE_E_IDQ_MS_UOPS, /* 0x0F */ + .events[2] = INTEL_CORE_E_IDQ_MITE_UOPS, /* 0x0F */ + .events[3] = INTEL_CORE_E_LSD_UOPS, /* 0x0F */ + .n_events = 4, + .format_fn = format_intel_frontend_bound_bw_src, + .cpu_supports = frontend_bound_bw_cpu_supports_src, + .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_src), + .column_headers = PERFMON_STRINGS ("UOPs/PKT", "% DSB UOPS", "% MS UOPS", + "% MITE UOPS", "% LSD UOPS"), + .footer = + "For more information, see the Intel(R) 64 and IA-32 Architectures\n" + "Optimization Reference Manual section on the Front End.", +}; diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c new file mode 100644 index 00000000000..6bf08af8154 --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +enum +{ + THREAD_P, + THREE_UOP, + TWO_UOP, + ONE_UOP, + NO_UOP, + FOUR_UOP, +}; + +static u8 * +format_intel_frontend_bound_bw_uops (u8 *s, va_list *args) +{ + perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *); + int row = va_arg (*args, int); + f64 sv = 0; + f64 cycles = ss->value[THREAD_P]; + + switch (row) + { + case 0: + sv = (ss->value[FOUR_UOP] / cycles) * 100; + break; + case 1: + sv = ((ss->value[THREE_UOP] - ss->value[TWO_UOP]) / cycles) * 100; + break; + case 2: + sv = ((ss->value[TWO_UOP] - ss->value[ONE_UOP]) / cycles) * 100; + break; + case 3: + sv = ((ss->value[ONE_UOP] - ss->value[NO_UOP]) / cycles) * 100; + break; + case 4: + sv = (ss->value[NO_UOP] / cycles) * 100; + break; + } + + s = format (s, "%04.1f", sv); + + return s; +} + +static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_uops[] = { + { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }, +}; + +PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_uops) = { + .name = "td-frontend-bw-uops", + .description = "Topdown FrontEnd-bound BandWidth - distribution of " + "uops delivered to frontend", + .source = "intel-core", + .events[0] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* 0x0F */ + .events[1] = + INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV_CORE, /* 0xFF */ + .events[2] = + INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV_CORE, /* 0xFF */ + .events[3] = + INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV_CORE, /* 0xFF */ + .events[4] = + INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV_CORE, /* 0xFF */ + .events[5] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_FE_WAS_OK, /* 0xFF */ + .n_events = 6, + .format_fn = format_intel_frontend_bound_bw_uops, + .cpu_supports = frontend_bound_bw_cpu_supports_uops, + .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_uops), + .column_headers = PERFMON_STRINGS ("% 4 UOPS", "% 3 UOPS", "% 2 UOPS", + "% 1 UOPS", "% 0 UOPS"), + .footer = + "For more information, see the Intel(R) 64 and IA-32 Architectures\n" + "Optimization Reference Manual section on the Front End.", +}; diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c b/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c new file mode 100644 index 00000000000..aea2149663f --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2022 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +static const int MS_Switches_Cost = 3; +static const int BA_Clear_Cost = 10; + +enum +{ + ICACHE_MISS, + DSB_SWITCHES, + RESTEER, + MS_SWITCHES, + BACLEARS, + THREAD, +}; + +static u8 * +format_intel_frontend_bound_lat (u8 *s, va_list *args) +{ + perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *); + int row = va_arg (*args, int); + f64 sv = 0; + f64 cycles = ss->value[THREAD]; + + if (!ss->n_packets) + return s; + + if (!row) + { + sv = ss->value[THREAD] / ss->n_packets; + + s = format (s, "%.0f", sv); + + return s; + } + + switch (row) + { + case 1: + sv = ss->value[ICACHE_MISS] / cycles; + break; + case 2: + sv = ss->value[DSB_SWITCHES] / cycles; + break; + case 3: + sv = + (ss->value[RESTEER] + (ss->value[BACLEARS] * BA_Clear_Cost)) / cycles; + break; + case 4: + sv = (ss->value[MS_SWITCHES] * MS_Switches_Cost) / cycles; + break; + } + + s = format (s, "%04.1f", sv * 100); + + return s; +} + +static perfmon_cpu_supports_t frontend_bound_lat_cpu_supports[] = { + { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }, +}; + +PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_lat) = { + .name = "td-frontend-lat", + .description = "Topdown FrontEnd-bound Latency - % cycles not retiring uops " + "due to frontend latency", + .source = "intel-core", + .events[0] = INTEL_CORE_E_ICACHE_16B_IFDATA_STALL, /* 0x0F */ + .events[1] = INTEL_CORE_E_DSB2MITE_SWITCHES_PENALTY_CYCLES, /* 0x0F */ + .events[2] = INTEL_CORE_E_INT_MISC_CLEAR_RESTEER_CYCLES, /* 0xFF */ + .events[3] = INTEL_CORE_E_IDQ_MS_SWITCHES, /* 0x0F */ + .events[4] = INTEL_CORE_E_BACLEARS_ANY, /* 0x0F */ + .events[5] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* FIXED */ + .n_events = 6, + .format_fn = format_intel_frontend_bound_lat, + .cpu_supports = frontend_bound_lat_cpu_supports, + .n_cpu_supports = ARRAY_LEN (frontend_bound_lat_cpu_supports), + .column_headers = PERFMON_STRINGS ("Clocks/Packet", "% iCache Miss", + "% DSB Switch", "% Branch Resteer", + "% MS Switch"), + .footer = + "For more information, see the Intel(R) 64 and IA-32 Architectures\n" + "Optimization Reference Manual on the Front End.", +}; diff --git a/src/plugins/perfmon/intel/bundle/iio_bw.c b/src/plugins/perfmon/intel/bundle/iio_bw.c new file mode 100644 index 00000000000..f746ca2c906 --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/iio_bw.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2021 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <perfmon/perfmon.h> +#include <perfmon/intel/uncore.h> +#include <vlib/pci/pci.h> +#include <vppinfra/format.h> +#include <linux/limits.h> +#include <fcntl.h> +#include <math.h> + +typedef struct +{ + u8 socket_id; + u8 sad_id; + u8 iio_unit_id; +} iio_uncore_sad_t; +typedef u32 index_t; + +static const char *procfs_pci_path = "/proc/bus/pci"; + +#define PCM_INTEL_PCI_VENDOR_ID 0x8086 +#define SNR_ICX_SAD_CONTROL_CFG_OFFSET 0x3F4 +#define SNR_ICX_MESH2IIO_MMAP_DID 0x09A2 + +static const u8 icx_sad_to_pmu_id_mapping[] = { 5, 0, 1, 2, 3, 4 }; + +static const char *iio_bw_footer_message = + "* this bundle currently only measures x8 and x16 PCIe devices on Port #0\n" + "or Port #2. Please see the \"Intel® Xeon® Processor Scalable Memory\n" + "Family Uncore Performance Monitoring Reference Manual(336274)\"\n" + "Section 2.4 for more information."; + +static u32 +get_sad_ctrl_cfg (vlib_pci_addr_t *addr) +{ + int fd = 0; + u32 value; + u8 *dev_node_name = format (0, "%s/%02x/%02x.%x", procfs_pci_path, addr->bus, + addr->slot, addr->function); + + fd = open ((char *) dev_node_name, O_RDWR); + if (fd < 0) + return -1; + + if (pread (fd, &value, sizeof (u32), SNR_ICX_SAD_CONTROL_CFG_OFFSET) < + sizeof (u32)) + value = -1; + + close (fd); + + return value; +} + +static u64 +get_bus_to_sad_mappings (vlib_main_t *vm, index_t **ph, iio_uncore_sad_t **pp) +{ + index_t *h = 0; + iio_uncore_sad_t *p = 0, *e = 0; + vlib_pci_addr_t *addr = 0, *addrs; + + addrs = vlib_pci_get_all_dev_addrs (); + + vec_foreach (addr, addrs) + { + vlib_pci_device_info_t *d; + d = vlib_pci_get_device_info (vm, addr, 0); + + if (!d) + continue; + + if (d->vendor_id == PCM_INTEL_PCI_VENDOR_ID && + d->device_id == SNR_ICX_MESH2IIO_MMAP_DID) + { + + u32 sad_ctrl_cfg = get_sad_ctrl_cfg (addr); + if (sad_ctrl_cfg == 0xFFFFFFFF) + { + vlib_pci_free_device_info (d); + continue; + } + + pool_get_zero (p, e); + + e->socket_id = (sad_ctrl_cfg & 0xf); + e->sad_id = (sad_ctrl_cfg >> 4) & 0x7; + e->iio_unit_id = icx_sad_to_pmu_id_mapping[e->sad_id]; + + hash_set (h, addr->bus, e - p); + } + + vlib_pci_free_device_info (d); + } + + vec_free (addrs); + + *ph = h; + *pp = p; + + return 0; +} + +u8 * +format_stack_socket (u8 *s, va_list *va) +{ + iio_uncore_sad_t *e, *p = va_arg (*va, iio_uncore_sad_t *); + index_t *h = va_arg (*va, index_t *); + vlib_pci_addr_t root_bus, *addr = va_arg (*va, vlib_pci_addr_t *); + clib_error_t *err = vlib_pci_get_device_root_bus (addr, &root_bus); + if (err) + { + clib_error_free (err); + return s; + } + + uword *pu = hash_get (h, root_bus.bus); + if (pu) + { + e = pool_elt_at_index (p, (index_t) pu[0]); + + s = format (s, "IIO%u/%u", e->socket_id, e->iio_unit_id); + } + else + { + s = format (s, "[ERR: hash lookup for bus '%u' failed]", root_bus.bus); + } + return s; +} + +static clib_error_t * +init_intel_uncore_iio_bw (vlib_main_t *vm, struct perfmon_bundle *b) +{ + index_t *h = 0; + iio_uncore_sad_t *p = 0; + vlib_pci_addr_t *addr = 0, *addrs; + u8 *s = 0; + + get_bus_to_sad_mappings (vm, &h, &p); + + s = format (0, "%-10s%-5s%-13s%-12s%-14s%-16s%s\n", "Stack", "Port", + "Address", "VID:PID", "Link Speed", "Driver", "Product Name"); + + addrs = vlib_pci_get_all_dev_addrs (); + + vec_foreach (addr, addrs) + { + vlib_pci_device_info_t *d; + d = vlib_pci_get_device_info (vm, addr, 0); + + if (!d) + continue; + + if (d->device_class != PCI_CLASS_NETWORK_ETHERNET) + continue; + + s = format ( + s, "%-10U%-5U%-13U%04x:%04x %-14U%-16s%v\n", format_stack_socket, p, + h, addr, format_vlib_pci_link_port, &d->config, format_vlib_pci_addr, + addr, d->vendor_id, d->device_id, format_vlib_pci_link_speed, d, + d->driver_name ? (char *) d->driver_name : "", d->product_name); + + vlib_pci_free_device_info (d); + } + + b->footer = (char *) format (s, "\n%s", iio_bw_footer_message); + + vec_free (addrs); + pool_free (p); + hash_free (h); + + return 0; +} + +static u8 * +format_intel_uncore_iio_bw (u8 *s, va_list *args) +{ + perfmon_reading_t *r = va_arg (*args, perfmon_reading_t *); + int col = va_arg (*args, int); + f64 tr = r->time_running * 1e-9; + f64 value = 0; + + switch (col) + { + case 0: + s = format (s, "%9.2f", tr); + break; + default: + if (r->time_running) + { + value = r->value[col - 1] * 4 / tr; + + if (value > 1.0e6) + s = format (s, "%9.0fM", value * 1e-6); + else if (value > 1.0e3) + s = format (s, "%9.0fK", value * 1e-3); + else + s = format (s, "%9.0f ", value); + } + + break; + } + + return s; +} + +/* + * This bundle is currently only supported and tested on Intel Icelake. + */ +static int +is_icelake () +{ + return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b (); +} + +static perfmon_cpu_supports_t iio_bw_cpu_supports[] = { + { is_icelake, PERFMON_BUNDLE_TYPE_SYSTEM } +}; + +PERFMON_REGISTER_BUNDLE (intel_uncore_iio_bw_pci) = { + .name = "iio-bandwidth-pci", + .description = "pci iio memory reads and writes per iio stack *", + .source = "intel-uncore", + .events[0] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_OF_CPU_PART0_RD, + .events[1] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_WR, + .events[2] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_RD, + .events[3] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_WR, + .n_events = 4, + .cpu_supports = iio_bw_cpu_supports, + .n_cpu_supports = ARRAY_LEN (iio_bw_cpu_supports), + .format_fn = format_intel_uncore_iio_bw, + .init_fn = init_intel_uncore_iio_bw, + .column_headers = PERFMON_STRINGS ("RunTime", "PCIe Rd/P0", "PCIe Wr/P0", + "PCIe Rd/P2", "PCIe Wr/P2") +}; + +PERFMON_REGISTER_BUNDLE (intel_uncore_iio_bw_cpu) = { + .name = "iio-bandwidth-cpu", + .description = "cpu iio memory reads and writes per iio stack *", + .source = "intel-uncore", + .events[0] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_RD, + .events[1] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_WR, + .events[2] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_RD, + .events[3] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_WR, + .n_events = 4, + .cpu_supports = iio_bw_cpu_supports, + .n_cpu_supports = ARRAY_LEN (iio_bw_cpu_supports), + .format_fn = format_intel_uncore_iio_bw, + .init_fn = init_intel_uncore_iio_bw, + .column_headers = PERFMON_STRINGS ("RunTime", "CPU Rd/P0", "CPU Wr/P0", + "CPU Rd/P2", "CPU Wr/P2") +}; diff --git a/src/plugins/perfmon/intel/bundle/membw_bound.c b/src/plugins/perfmon/intel/bundle/membw_bound.c deleted file mode 100644 index 2e4b4aa57bb..00000000000 --- a/src/plugins/perfmon/intel/bundle/membw_bound.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2021 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <perfmon/perfmon.h> -#include <perfmon/intel/core.h> - -static u8 * -format_intel_membw_bound (u8 *s, va_list *args) -{ - perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *); - int row = va_arg (*args, int); - f64 sv = 0; - - if (!ss->n_packets) - return s; - - sv = ss->value[row] / ss->n_packets; - - s = format (s, "%5.0f", sv); - - return s; -} - -static perfmon_cpu_supports_t membw_bound_cpu_supports[] = { - { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE }, -}; - -PERFMON_REGISTER_BUNDLE (intel_core_membw_bound) = { - .name = "membw-bound", - .description = "memory bandwidth boundedness", - .source = "intel-core", - .events[0] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* FIXED */ - .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE, /*CMask: 0xFF*/ - .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, /*CMask: 0xFF*/ - .events[3] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L1D_MISS, /*CMask: 0xF*/ - .events[4] = INTEL_CORE_E_L1D_PEND_MISS_FB_FULL, /*CMask: 0xF*/ - .events[5] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L3_MISS, /*CMask: 0xF*/ - .events[6] = INTEL_CORE_E_SQ_MISC_SQ_FULL, /*CMask: 0xF*/ - .n_events = 7, - .format_fn = format_intel_membw_bound, - .cpu_supports = membw_bound_cpu_supports, - .n_cpu_supports = ARRAY_LEN (membw_bound_cpu_supports), - .column_headers = PERFMON_STRINGS ("Cycles/Packet", "Cycles Stall/Packet", - "Mem Stall/Packet", - "L1D Miss Stall/Packet", "FB Full/Packet", - "L3 Miss Stall/Packet", "SQ Full/Packet"), -}; diff --git a/src/plugins/perfmon/intel/bundle/topdown_icelake.c b/src/plugins/perfmon/intel/bundle/topdown_icelake.c new file mode 100644 index 00000000000..a3392e52f0a --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/topdown_icelake.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2022 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +static int +is_icelake () +{ + return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b (); +} + +static perfmon_cpu_supports_t topdown_lvl2_cpu_supports_icx[] = { + { is_icelake, PERFMON_BUNDLE_TYPE_THREAD } +}; + +#define GET_METRIC(m, i) (f64) (((m) >> (i * 8)) & 0xff) + +enum +{ + TD_SLOTS = 0, + STALLS_MEM_ANY, + STALLS_TOTAL, + BOUND_ON_STORES, + RECOVERY_CYCLES, + UOP_DROPPING, + UOP_NOT_DELIVERED, + TD_RETIRING, + TD_BAD_SPEC, + TD_FE_BOUND, + TD_BE_BOUND, +}; + +static_always_inline f64 +memory_bound_fraction (perfmon_reading_t *ss) +{ + return (ss->value[STALLS_MEM_ANY] + ss->value[BOUND_ON_STORES]) / + (f64) (ss->value[STALLS_TOTAL] + ss->value[BOUND_ON_STORES]); +} + +static_always_inline f64 +perf_metrics_sum (perfmon_reading_t *ss) +{ + return ss->value[TD_RETIRING] + ss->value[TD_BAD_SPEC] + + ss->value[TD_FE_BOUND] + ss->value[TD_BE_BOUND]; +} + +static_always_inline f64 +retiring (perfmon_reading_t *ss) +{ + return ss->value[TD_RETIRING] / perf_metrics_sum (ss); +} + +static_always_inline f64 +bad_speculation (perfmon_reading_t *ss) +{ + return ss->value[TD_BAD_SPEC] / perf_metrics_sum (ss); +} + +static_always_inline f64 +frontend_bound (perfmon_reading_t *ss) +{ + return (ss->value[TD_FE_BOUND] / perf_metrics_sum (ss)) - + (ss->value[UOP_DROPPING] / perf_metrics_sum (ss)); +} + +static_always_inline f64 +backend_bound (perfmon_reading_t *ss) +{ + return (ss->value[TD_BE_BOUND] / perf_metrics_sum (ss)) + + ((5 * ss->value[RECOVERY_CYCLES]) / perf_metrics_sum (ss)); +} + +static_always_inline f64 +fetch_latency (perfmon_reading_t *ss) +{ + f64 r = ((5 * ss->value[UOP_NOT_DELIVERED] - ss->value[UOP_DROPPING]) / + (f64) ss->value[TD_SLOTS]); + return r; +} + +static_always_inline f64 +fetch_bandwidth (perfmon_reading_t *ss) +{ + return clib_max (0, frontend_bound (ss) - fetch_latency (ss)); +} + +static_always_inline f64 +memory_bound (perfmon_reading_t *ss) +{ + return backend_bound (ss) * memory_bound_fraction (ss); +} + +static_always_inline f64 +core_bound (perfmon_reading_t *ss) +{ + return backend_bound (ss) - memory_bound (ss); +} + +static u8 * +format_topdown_lvl2_icx (u8 *s, va_list *args) +{ + perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *); + u64 idx = va_arg (*args, int); + f64 sv = 0; + + switch (idx) + { + case 0: + sv = retiring (ss); + break; + case 1: + sv = bad_speculation (ss); + break; + case 2: + sv = frontend_bound (ss); + break; + case 3: + sv = backend_bound (ss); + break; + case 4: + sv = fetch_latency (ss); + break; + case 5: + sv = fetch_bandwidth (ss); + break; + case 6: + sv = memory_bound (ss); + break; + case 7: + sv = core_bound (ss); + break; + } + + s = format (s, "%f", sv * 100); + + return s; +} + +PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric_icx) = { + .name = "topdown", + .description = "Top-down Microarchitecture Analysis Level 1 & 2", + .source = "intel-core", + .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS, + .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, + .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL, + .events[3] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES, + .events[4] = INTEL_CORE_E_INT_MISC_RECOVERY_CYCLES, + .events[5] = INTEL_CORE_E_INT_MISC_UOP_DROPPING, + .events[6] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CORE, + .events[7] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC, + .events[8] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC, + .events[9] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC, + .events[10] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC, + .n_events = 11, + .cpu_supports = topdown_lvl2_cpu_supports_icx, + .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports_icx), + .format_fn = format_topdown_lvl2_icx, + .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% FE.FL", + "% FE.FB", "% BE.MB", "% BE.CB"), + .footer = "Retiring (RT), Bad Speculation (BS),\n" + " FrontEnd bound (FE), BackEnd bound (BE),\n" + " Fetch Latency (FL), Fetch Bandwidth (FB),\n" + " Memory Bound (MB), Core Bound (CB)", +}; diff --git a/src/plugins/perfmon/intel/bundle/topdown_metrics.c b/src/plugins/perfmon/intel/bundle/topdown_metrics.c index 6913781be90..a464dfe1c88 100644 --- a/src/plugins/perfmon/intel/bundle/topdown_metrics.c +++ b/src/plugins/perfmon/intel/bundle/topdown_metrics.c @@ -14,16 +14,17 @@ */ #include <vnet/vnet.h> +#include <vppinfra/math.h> #include <perfmon/perfmon.h> #include <perfmon/intel/core.h> #define GET_METRIC(m, i) (((m) >> (i * 8)) & 0xff) #define GET_RATIO(m, i) (((m) >> (i * 32)) & 0xffffffff) -#define RDPMC_FIXED_SLOTS (1 << 30) /* fixed slots */ -#define RDPMC_L1_METRICS (1 << 29) /* l1 metric counters */ +#define RDPMC_SLOTS (1 << 30) /* fixed slots */ +#define RDPMC_METRICS (1 << 29) /* l1 & l2 metric counters */ #define FIXED_COUNTER_SLOTS 3 -#define METRIC_COUNTER_TOPDOWN_L1 0 +#define METRIC_COUNTER_TOPDOWN_L1_L2 0 typedef enum { @@ -31,7 +32,16 @@ typedef enum TOPDOWN_E_BAD_SPEC, TOPDOWN_E_FE_BOUND, TOPDOWN_E_BE_BOUND, -} topdown_lvl1_t; + TOPDOWN_E_HEAVYOPS, + TOPDOWN_E_LIGHTOPS, + TOPDOWN_E_BMISPRED, + TOPDOWN_E_MCHCLEAR, + TOPDOWN_E_FETCHLAT, + TOPDOWN_E_FETCH_BW, + TOPDOWN_E_MEMBOUND, + TOPDOWN_E_CORBOUND, + TOPDOWN_E_MAX, +} topdown_e_t; enum { @@ -39,11 +49,11 @@ enum TOPDOWN_E_RDPMC_METRICS, }; -typedef f64 (topdown_lvl1_parse_fn_t) (void *, topdown_lvl1_t); +typedef f64 (topdown_lvl1_parse_fn_t) (void *, topdown_e_t); /* Parse thread level states from perfmon_reading */ static_always_inline f64 -topdown_lvl1_perf_reading (void *ps, topdown_lvl1_t e) +topdown_lvl1_perf_reading (void *ps, topdown_e_t e) { perfmon_reading_t *ss = (perfmon_reading_t *) ps; @@ -52,7 +62,7 @@ topdown_lvl1_perf_reading (void *ps, topdown_lvl1_t e) } static_always_inline f64 -topdown_lvl1_rdpmc_metric (void *ps, topdown_lvl1_t e) +topdown_lvl1_rdpmc_metric (void *ps, topdown_e_t e) { perfmon_node_stats_t *ss = (perfmon_node_stats_t *) ps; f64 slots_t0 = @@ -69,8 +79,87 @@ topdown_lvl1_rdpmc_metric (void *ps, topdown_lvl1_t e) return (slots_t1 / slots_delta) * 100; } +/* Convert the TopDown enum to the perf reading index */ +#define TO_LVL2_PERF_IDX(e) \ + ({ \ + u8 to_idx[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 5, 5, 6, 6, 7, 7, 8, 8 }; \ + to_idx[e]; \ + }) + +/* Parse thread level stats from perfmon_reading */ +static_always_inline f64 +topdown_lvl2_perf_reading (void *ps, topdown_e_t e) +{ + perfmon_reading_t *ss = (perfmon_reading_t *) ps; + u64 value = ss->value[TO_LVL2_PERF_IDX (e)]; + + /* If it is an L1 metric, call L1 format */ + if (TOPDOWN_E_BE_BOUND >= e) + { + return topdown_lvl1_perf_reading (ps, e); + } + + /* all the odd metrics, are inferred from even and L1 metrics */ + if (e & 0x1) + { + topdown_e_t e1 = TO_LVL2_PERF_IDX (e) - 4; + value = ss->value[e1] - value; + } + + return (f64) value / ss->value[0] * 100; +} + +/* Convert the TopDown enum to the rdpmc metric byte position */ +#define TO_LVL2_METRIC_BYTE(e) \ + ({ \ + u8 to_metric[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 4, 4, 5, 5, 6, 6, 7, 7 }; \ + to_metric[e]; \ + }) + +/* Convert the TopDown L2 enum to the reference TopDown L1 enum */ +#define TO_LVL1_REF(e) \ + ({ \ + u8 to_lvl1[TOPDOWN_E_MAX] = { -1, \ + -1, \ + -1, \ + -1, \ + TOPDOWN_E_RETIRING, \ + TOPDOWN_E_RETIRING, \ + TOPDOWN_E_BAD_SPEC, \ + TOPDOWN_E_BAD_SPEC, \ + TOPDOWN_E_FE_BOUND, \ + TOPDOWN_E_FE_BOUND, \ + TOPDOWN_E_BE_BOUND, \ + TOPDOWN_E_BE_BOUND }; \ + to_lvl1[e]; \ + }) + +static_always_inline f64 +topdown_lvl2_rdpmc_metric (void *ps, topdown_e_t e) +{ + f64 r, l1_value = 0; + + /* If it is an L1 metric, call L1 format */ + if (TOPDOWN_E_BE_BOUND >= e) + { + return topdown_lvl1_rdpmc_metric (ps, e); + } + + /* all the odd metrics, are inferred from even and L1 metrics */ + if (e & 0x1) + { + /* get the L1 reference metric */ + l1_value = topdown_lvl1_rdpmc_metric (ps, TO_LVL1_REF (e)); + } + + /* calculate the l2 metric */ + r = + fabs (l1_value - topdown_lvl1_rdpmc_metric (ps, TO_LVL2_METRIC_BYTE (e))); + return r; +} + static u8 * -format_topdown_lvl1 (u8 *s, va_list *args) +format_topdown_lvl2 (u8 *s, va_list *args) { void *ps = va_arg (*args, void *); u64 idx = va_arg (*args, int); @@ -78,58 +167,49 @@ format_topdown_lvl1 (u8 *s, va_list *args) f64 sv = 0; topdown_lvl1_parse_fn_t *parse_fn, - *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl1_rdpmc_metric, - topdown_lvl1_perf_reading, 0 }; + *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl2_rdpmc_metric, + topdown_lvl2_perf_reading, 0 }; + parse_fn = parse_fns[type]; ASSERT (parse_fn); - switch (idx) - { - case 0: - sv = - parse_fn (ps, TOPDOWN_E_BAD_SPEC) + parse_fn (ps, TOPDOWN_E_RETIRING); - break; - case 1: - sv = - parse_fn (ps, TOPDOWN_E_BE_BOUND) + parse_fn (ps, TOPDOWN_E_FE_BOUND); - break; - default: - sv = parse_fn (ps, (topdown_lvl1_t) idx - 2); - break; - } - + sv = parse_fn (ps, (topdown_e_t) idx); s = format (s, "%f", sv); return s; } -static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = { - /* Intel SNR supports papi/thread only */ - { clib_cpu_supports_movdiri, PERFMON_BUNDLE_TYPE_THREAD }, - /* Intel ICX supports papi/thread or rdpmc/node */ - { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE } +static perfmon_cpu_supports_t topdown_lvl2_cpu_supports[] = { + /* Intel SPR supports papi/thread or rdpmc/node */ + { clib_cpu_supports_avx512_fp16, PERFMON_BUNDLE_TYPE_NODE_OR_THREAD } }; -PERFMON_REGISTER_BUNDLE (topdown_lvl1_metric) = { - .name = "topdown-level1", - .description = "Top-down Microarchitecture Analysis Level 1", +PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric) = { + .name = "topdown", + .description = "Top-down Microarchitecture Analysis Level 1 & 2", .source = "intel-core", - .offset_type = PERFMON_OFFSET_TYPE_METRICS, .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS, .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC, .events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC, .events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC, .events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC, - .n_events = 5, - .metrics[0] = RDPMC_FIXED_SLOTS | FIXED_COUNTER_SLOTS, - .metrics[1] = RDPMC_L1_METRICS | METRIC_COUNTER_TOPDOWN_L1, - .n_metrics = 2, - .cpu_supports = topdown_lvl1_cpu_supports, - .n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports), - .format_fn = format_topdown_lvl1, - .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS", - "% ST.FE", "% ST.BE"), - .footer = "Not Stalled (NS),STalled (ST),\n" - " Retiring (RT), Bad Speculation (BS),\n" - " FrontEnd bound (FE), BackEnd bound (BE)", + .events[5] = INTEL_CORE_E_TOPDOWN_L2_HEAVYOPS_METRIC, + .events[6] = INTEL_CORE_E_TOPDOWN_L2_BMISPRED_METRIC, + .events[7] = INTEL_CORE_E_TOPDOWN_L2_FETCHLAT_METRIC, + .events[8] = INTEL_CORE_E_TOPDOWN_L2_MEMBOUND_METRIC, + .n_events = 9, + .preserve_samples = 0x1FF, + .cpu_supports = topdown_lvl2_cpu_supports, + .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports), + .format_fn = format_topdown_lvl2, + .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% RT.HO", + "% RT.LO", "% BS.BM", "% BS.MC", + "% FE.FL", "% FE.FB", "% BE.MB", + "% BE.CB"), + .footer = "Retiring (RT), Bad Speculation (BS),\n" + " FrontEnd bound (1FE), BackEnd bound (BE),\n" + " Light Operations (LO), Heavy Operations (HO),\n" + " Branch Misprediction (BM), Machine Clears (MC),\n" + " Fetch Latency (FL), Fetch Bandwidth (FB),\n" + " Memory Bound (MB), Core Bound (CB)", }; diff --git a/src/plugins/perfmon/intel/bundle/topdown_tremont.c b/src/plugins/perfmon/intel/bundle/topdown_tremont.c new file mode 100644 index 00000000000..b2626eb0480 --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/topdown_tremont.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +typedef enum +{ + TOPDOWN_E_RETIRING = 0, + TOPDOWN_E_BAD_SPEC, + TOPDOWN_E_FE_BOUND, + TOPDOWN_E_BE_BOUND, + TOPDOWN_E_MAX, +} topdown_lvl1_t; + +static u8 * +format_topdown_lvl1 (u8 *s, va_list *args) +{ + perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *); + u64 idx = va_arg (*args, int); + f64 sv = 0; + u64 total = 0; + + for (int i = 0; i < TOPDOWN_E_MAX; i++) + total += ss->value[i]; + + switch (idx) + { + case 0: + sv = (f64) ss->value[TOPDOWN_E_RETIRING] + ss->value[TOPDOWN_E_BAD_SPEC]; + break; + case 1: + sv = (f64) ss->value[TOPDOWN_E_FE_BOUND] + ss->value[TOPDOWN_E_BE_BOUND]; + break; + default: + sv = (f64) ss->value[idx - 2]; + break; + } + + sv = (sv / total) * 100; + s = format (s, "%f", sv); + return s; +} + +static int +is_tremont () +{ + return clib_cpu_supports_movdir64b () && !clib_cpu_supports_avx2 (); +} + +static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = { + { is_tremont, PERFMON_BUNDLE_TYPE_THREAD } +}; + +PERFMON_REGISTER_BUNDLE (topdown_lvl1_tremont) = { + .name = "topdown-level1", + .description = "Top-down Microarchitecture Analysis Level 1", + .source = "intel-core", + .events[0] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_TREMONT, + .events[1] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_TREMONT, + .events[2] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_TREMONT, + .events[3] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_TREMONT, + .n_events = 4, + .cpu_supports = topdown_lvl1_cpu_supports, + .n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports), + .format_fn = format_topdown_lvl1, + .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS", + "% ST.FE", "% ST.BE"), + .footer = "Not Stalled (NS),STalled (ST),\n" + " Retiring (RT), Bad Speculation (BS),\n" + " FrontEnd bound (FE), BackEnd bound (BE)", +}; |