diff options
author | Damjan Marion <damarion@cisco.com> | 2020-11-27 20:15:17 +0100 |
---|---|---|
committer | Florin Coras <florin.coras@gmail.com> | 2020-12-18 17:20:28 +0000 |
commit | 8b60fb0fe6e29aac1847c0b381c0f84165b27b61 (patch) | |
tree | 0d805a148109bad0906a3570b2d22767ec50f95d /src/plugins/perfmon/intel | |
parent | f5b27cbcc7cae5279aac512f805be73591f58eaa (diff) |
perfmon: new perfmon plugin
Type: feature
Change-Id: I2c14f82393d11fc05c6d229f5c58603ab5c0f14d
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/plugins/perfmon/intel')
-rw-r--r-- | src/plugins/perfmon/intel/bundle/inst_and_clock.c | 63 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/load_blocks.c | 66 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/bundle/mem_bw.c | 64 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/core.c | 81 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/core.h | 165 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/uncore.c | 182 | ||||
-rw-r--r-- | src/plugins/perfmon/intel/uncore.h | 52 |
7 files changed, 673 insertions, 0 deletions
diff --git a/src/plugins/perfmon/intel/bundle/inst_and_clock.c b/src/plugins/perfmon/intel/bundle/inst_and_clock.c new file mode 100644 index 00000000000..e08d21a3fa5 --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/inst_and_clock.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +static u8 * +format_inst_and_clock (u8 *s, va_list *args) +{ + perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *); + int row = va_arg (*args, int); + + switch (row) + { + case 0: + s = format (s, "%lu", ns->n_calls); + break; + case 1: + s = format (s, "%lu", ns->n_packets); + break; + case 2: + s = format (s, "%.2f", (f64) ns->n_packets / ns->n_calls); + break; + case 3: + s = format (s, "%.2f", (f64) ns->value[1] / ns->n_packets); + break; + case 4: + s = format (s, "%.2f", (f64) ns->value[0] / ns->n_packets); + break; + case 5: + s = format (s, "%.2f", (f64) ns->value[0] / ns->value[1]); + break; + } + return s; +} + +PERFMON_REGISTER_BUNDLE (inst_and_clock) = { + .name = "inst-and-clock", + .description = "instructions/packet, cycles/packet and IPC", + .source = "intel-core", + .type = PERFMON_BUNDLE_TYPE_NODE, + .events[0] = INTEL_CORE_E_INST_RETIRED_ANY_P, + .events[1] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, + .events[2] = INTEL_CORE_E_CPU_CLK_UNHALTED_REF_TSC, + .n_events = 3, + .format_fn = format_inst_and_clock, + .column_headers = PERFMON_STRINGS ("Calls", "Packets", "Packets/Call", + "Clocks/Packet", "Instructions/Packet", + "IPC"), +}; diff --git a/src/plugins/perfmon/intel/bundle/load_blocks.c b/src/plugins/perfmon/intel/bundle/load_blocks.c new file mode 100644 index 00000000000..d02ef3a0555 --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/load_blocks.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> + +static u8 * +format_load_blocks (u8 *s, va_list *args) +{ + perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *); + int row = va_arg (*args, int); + + switch (row) + { + case 0: + s = format (s, "%12lu", ns->n_calls); + break; + case 1: + s = format (s, "%12lu", ns->n_packets); + break; + case 2: + s = format (s, "%9.2f", (f64) ns->value[0] / ns->n_calls); + break; + case 3: + s = format (s, "%9.2f", (f64) ns->value[1] / ns->n_calls); + break; + case 4: + s = format (s, "%9.2f", (f64) ns->value[2] / ns->n_calls); + break; + } + return s; +} + +PERFMON_REGISTER_BUNDLE (load_blocks) = { + .name = "load-blocks", + .description = "load operations blocked due to various uarch reasons", + .source = "intel-core", + .type = PERFMON_BUNDLE_TYPE_NODE, + .events[0] = INTEL_CORE_E_LD_BLOCKS_STORE_FORWARD, + .events[1] = INTEL_CORE_E_LD_BLOCKS_NO_SR, + .events[2] = INTEL_CORE_E_LD_BLOCKS_PARTIAL_ADDRESS_ALIAS, + .n_events = 3, + .format_fn = format_load_blocks, + .column_headers = PERFMON_STRINGS ("Calls", "Packets", "[1]", "[2]", "[3]"), + .footer = "Per node call statistics:\n" + "[1] Loads blocked due to overlapping with a preceding store that " + "cannot be forwarded.\n" + "[2] The number of times that split load operations are " + "temporarily blocked because\n" + " all resources for handling the split accesses are in use\n" + "[3] False dependencies in Memory Order Buffer (MOB) due to " + "partial compare on address.\n", +}; diff --git a/src/plugins/perfmon/intel/bundle/mem_bw.c b/src/plugins/perfmon/intel/bundle/mem_bw.c new file mode 100644 index 00000000000..672dbb0677d --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/mem_bw.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vppinfra/linux/sysfs.h> +#include <perfmon/perfmon.h> +#include <perfmon/intel/uncore.h> + +static u8 * +format_intel_uncore_imc_bw (u8 *s, va_list *args) +{ + perfmon_reading_t *r = va_arg (*args, perfmon_reading_t *); + int col = va_arg (*args, int); + f64 tr = r->time_running * 1e-9; + + switch (col) + { + case 0: + s = format (s, "%9.2f", tr); + break; + case 1: + if (r->time_running) + s = format (s, "%9.2f", (f64) r->value[0] * 64 * 1e-6 / tr); + break; + case 2: + if (r->time_running) + s = format (s, "%9.2f", (f64) r->value[1] * 64 * 1e-6 / tr); + break; + case 3: + if (r->time_running) + s = format (s, "%9.2f", + (f64) (r->value[0] + r->value[1]) * 64 * 1e-6 / tr); + break; + default: + break; + } + + return s; +} + +PERFMON_REGISTER_BUNDLE (intel_uncore_imc_bw) = { + .name = "memory-bandwidth", + .description = "memory reads and writes per memory controller channel", + .source = "intel-uncore", + .type = PERFMON_BUNDLE_TYPE_SYSTEM, + .events[0] = INTEL_UNCORE_E_IMC_UNC_M_CAS_COUNT_RD, + .events[1] = INTEL_UNCORE_E_IMC_UNC_M_CAS_COUNT_WR, + .n_events = 2, + .format_fn = format_intel_uncore_imc_bw, + .column_headers = PERFMON_STRINGS ("RunTime", "Reads (MB/s)", + "Writes (MB/s)", "Total (MB/s)"), +}; diff --git a/src/plugins/perfmon/intel/core.c b/src/plugins/perfmon/intel/core.c new file mode 100644 index 00000000000..b4267e8170c --- /dev/null +++ b/src/plugins/perfmon/intel/core.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> +#include <linux/perf_event.h> + +static perfmon_event_t events[] = { +#define _(event, umask, edge, any, inv, cmask, n, suffix, desc) \ + [INTEL_CORE_E_##n##_##suffix] = { \ + .type = PERF_TYPE_RAW, \ + .config = PERF_INTEL_CODE (event, umask, edge, any, inv, cmask), \ + .name = #n "." #suffix, \ + .description = desc, \ + }, + + foreach_perf_intel_core_event +#undef _ +}; + +u8 * +format_intel_core_config (u8 *s, va_list *args) +{ + u64 config = va_arg (*args, u64); + u8 v; + + s = format (s, "event=0x%02x, umask=0x%02x", config & 0xff, + (config >> 8) & 0xff); + + if ((v = (config >> 18) & 1)) + s = format (s, ", edge=%u", v); + + if ((v = (config >> 19) & 1)) + s = format (s, ", pc=%u", v); + + if ((v = (config >> 21) & 1)) + s = format (s, ", any=%u", v); + + if ((v = (config >> 23) & 1)) + s = format (s, ", inv=%u", v); + + if ((v = (config >> 24) & 0xff)) + s = format (s, ", cmask=0x%02x", v); + + return s; +} + +static clib_error_t * +intel_core_init (vlib_main_t *vm, perfmon_source_t *src) +{ + u32 eax, ebx, ecx, edx; + if (__get_cpuid (0, &eax, &ebx, &ecx, &edx) == 0) + return clib_error_return (0, "unknown CPU (missing cpuid)"); + + // GenuineIntel + if (ebx != 0x756e6547 || ecx != 0x6c65746e || edx != 0x49656e69) + return clib_error_return (0, "not a IA-32 CPU"); + return 0; +} + +PERFMON_REGISTER_SOURCE (intel_core) = { + .name = "intel-core", + .description = "intel arch core events", + .events = events, + .n_events = ARRAY_LEN (events), + .init_fn = intel_core_init, + .format_config = format_intel_core_config, +}; diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h new file mode 100644 index 00000000000..d7fd9e03538 --- /dev/null +++ b/src/plugins/perfmon/intel/core.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __perfmon_intel_h +#define __perfmon_intel_h + +#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask) \ + ((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \ + (cmask) << 24) + +/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask + * counter_unit, name, suffix, description */ +#define foreach_perf_intel_core_event \ + _ (0x00, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD, \ + "Core cycles when the thread is not in halt state") \ + _ (0x00, 0x03, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, REF_TSC, \ + "Reference cycles when the core is not in halt state.") \ + _ (0x03, 0x02, 0, 0, 0, 0x00, LD_BLOCKS, STORE_FORWARD, \ + "Loads blocked due to overlapping with a preceding store that cannot be" \ + " forwarded.") \ + _ (0x03, 0x08, 0, 0, 0, 0x00, LD_BLOCKS, NO_SR, \ + "The number of times that split load operations are temporarily " \ + "blocked " \ + "because all resources for handling the split accesses are in use.") \ + _ (0x07, 0x01, 0, 0, 0, 0x00, LD_BLOCKS_PARTIAL, ADDRESS_ALIAS, \ + "False dependencies in MOB due to partial compare on address.") \ + _ (0x08, 0x01, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, MISS_CAUSES_A_WALK, \ + "Load misses in all DTLB levels that cause page walks") \ + _ (0x08, 0x02, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED_4K, \ + "Page walk completed due to a demand data load to a 4K page") \ + _ (0x08, 0x04, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED_2M_4M, \ + "Page walk completed due to a demand data load to a 2M/4M page") \ + _ (0x08, 0x08, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED_1G, \ + "Page walk completed due to a demand data load to a 1G page") \ + _ (0x08, 0x0E, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED, \ + "Load miss in all TLB levels causes a page walk that completes. (All " \ + "page sizes)") \ + _ (0x08, 0x10, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_PENDING, \ + "Counts 1 per cycle for each PMH that is busy with a page walk for a " \ + "load. EPT page walk duration are excluded in Skylake.") \ + _ (0x08, 0x20, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, STLB_HIT, \ + "Loads that miss the DTLB and hit the STLB.") \ + _ (0x0D, 0x01, 0, 0, 0, 0x00, INT_MISC, RECOVERY_CYCLES, \ + "Core cycles the allocator was stalled due to recovery from earlier " \ + "clear event for this thread (e.g. misprediction or memory nuke)") \ + _ (0x0E, 0x01, 0, 0, 0, 0x00, UOPS_ISSUED, ANY, \ + "Uops that Resource Allocation Table (RAT) issues to Reservation " \ + "Station (RS)") \ + _ (0x28, 0x07, 0, 0, 0, 0x00, CORE_POWER, LVL0_TURBO_LICENSE, \ + "Core cycles where the core was running in a manner where Turbo may be " \ + "clipped to the Non-AVX turbo schedule.") \ + _ (0x28, 0x18, 0, 0, 0, 0x00, CORE_POWER, LVL1_TURBO_LICENSE, \ + "Core cycles where the core was running in a manner where Turbo may be " \ + "clipped to the AVX2 turbo schedule.") \ + _ (0x28, 0x20, 0, 0, 0, 0x00, CORE_POWER, LVL2_TURBO_LICENSE, \ + "Core cycles where the core was running in a manner where Turbo may be " \ + "clipped to the AVX512 turbo schedule.") \ + _ (0x28, 0x40, 0, 0, 0, 0x00, CORE_POWER, THROTTLE, \ + "Core cycles the core was throttled due to a pending power level " \ + "request.") \ + _ (0x3C, 0x00, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD_P, \ + "Thread cycles when thread is not in halt state") \ + _ (0x3C, 0x00, 0, 1, 0, 0x00, CPU_CLK_UNHALTED, THREAD_P_ANY, \ + "Core cycles when at least one thread on the physical core is not in " \ + "halt state.") \ + _ (0x3C, 0x00, 1, 0, 0, 0x01, CPU_CLK_UNHALTED, RING0_TRANS, \ + "Counts when there is a transition from ring 1, 2 or 3 to ring 0.") \ + _ (0x48, 0x01, 0, 0, 0, 0x01, L1D_PEND_MISS, PENDING_CYCLES, \ + "Cycles with L1D load Misses outstanding.") \ + _ (0x48, 0x01, 0, 0, 0, 0x00, L1D_PEND_MISS, PENDING, \ + "L1D miss outstandings duration in cycles") \ + _ (0x48, 0x02, 0, 0, 0, 0x00, L1D_PEND_MISS, FB_FULL, \ + "Number of times a request needed a FB entry but there was no entry " \ + "available for it. That is the FB unavailability was dominant reason " \ + "for blocking the request. A request includes cacheable/uncacheable " \ + "demands that is load, store or SW prefetch.") \ + _ (0x51, 0x01, 0, 0, 0, 0x00, L1D, REPLACEMENT, \ + "L1D data line replacements") \ + _ (0x51, 0x04, 0, 0, 0, 0x00, L1D, M_EVICT, "L1D data line evictions") \ + _ (0x83, 0x02, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_MISS, \ + "Instruction fetch tag lookups that miss in the instruction cache " \ + "(L1I). Counts at 64-byte cache-line granularity.") \ + _ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \ + "Uops not delivered to Resource Allocation Table (RAT) per thread when " \ + "backend of the machine is not stalled") \ + _ (0xC0, 0x00, 0, 0, 0, 0x00, INST_RETIRED, ANY_P, \ + "Number of instructions retired. General Counter - architectural event") \ + _ (0xC2, 0x02, 0, 0, 0, 0x00, UOPS_RETIRED, RETIRE_SLOTS, \ + "Retirement slots used.") \ + _ (0xD0, 0x81, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_LOADS, \ + "All retired load instructions.") \ + _ (0xD0, 0x82, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_STORES, \ + "All retired store instructions.") \ + _ (0xD1, 0x01, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L1_HIT, \ + "Retired load instructions with L1 cache hits as data sources") \ + _ (0xD1, 0x02, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L2_HIT, \ + "Retired load instructions with L2 cache hits as data sources") \ + _ (0xD1, 0x04, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L3_HIT, \ + "Retired load instructions with L3 cache hits as data sources") \ + _ (0xD1, 0x08, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L1_MISS, \ + "Retired load instructions missed L1 cache as data sources") \ + _ (0xD1, 0x10, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L2_MISS, \ + "Retired load instructions missed L2 cache as data sources") \ + _ (0xD1, 0x20, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L3_MISS, \ + "Retired load instructions missed L3 cache as data sources") \ + _ (0xD1, 0x40, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, FB_HIT, \ + "Retired load instructions which data sources were load missed L1 but " \ + "hit FB due to preceding miss to the same cache line with data not " \ + "ready") \ + _ (0xD2, 0x01, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_MISS, \ + "Retired load instructions which data sources were L3 hit and cross-" \ + "core snoop missed in on-pkg core cache.") \ + _ (0xD2, 0x02, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_HIT, \ + "Retired load instructions which data sources were L3 and cross-core " \ + "snoop hits in on-pkg core cache") \ + _ (0xD2, 0x04, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_HITM, \ + "Retired load instructions which data sources were HitM responses from " \ + "shared L3") \ + _ (0xD2, 0x08, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_NONE, \ + "Retired load instructions which data sources were hits in L3 without " \ + "snoops required") \ + _ (0xD3, 0x01, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, LOCAL_DRAM, \ + "Retired load instructions which data sources missed L3 but serviced " \ + "from local dram") \ + _ (0xD3, 0x02, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_DRAM, \ + "Retired load instructions which data sources missed L3 but serviced " \ + "from remote dram") \ + _ (0xD3, 0x04, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_HITM, \ + "Retired load instructions whose data sources was remote HITM") \ + _ (0xD3, 0x08, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_FWD, \ + "Retired load instructions whose data sources was forwarded from a " \ + "remote cache") \ + _ (0xF0, 0x40, 0, 0, 0, 0x00, L2_TRANS, L2_WB, \ + "L2 writebacks that access L2 cache") \ + _ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \ + "L2 cache lines filling L2") \ + _ (0xFE, 0x02, 0, 0, 0, 0x00, IDI_MISC, WB_UPGRADE, \ + "Counts number of cache lines that are allocated and written back to L3" \ + " with the intention that they are more likely to be reused shortly") \ + _ (0xFE, 0x04, 0, 0, 0, 0x00, IDI_MISC, WB_DOWNGRADE, \ + "Counts number of cache lines that are dropped and not written back to " \ + "L3 as they are deemed to be less likely to be reused shortly") + +typedef enum +{ +#define _(event, umask, edge, any, inv, cmask, name, suffix, desc) \ + INTEL_CORE_E_##name##_##suffix, + foreach_perf_intel_core_event +#undef _ + INTEL_CORE_N_EVENTS, +} perf_intel_core_event_t; + +#endif diff --git a/src/plugins/perfmon/intel/uncore.c b/src/plugins/perfmon/intel/uncore.c new file mode 100644 index 00000000000..e8939cb67c9 --- /dev/null +++ b/src/plugins/perfmon/intel/uncore.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vppinfra/linux/sysfs.h> +#include <perfmon/perfmon.h> +#include <perfmon/intel/core.h> +#include <perfmon/intel/uncore.h> + +VLIB_REGISTER_LOG_CLASS (if_intel_uncore_log, static) = { + .class_name = "perfmon", + .subclass_name = "intel-uncore", +}; + +#define log_debug(fmt, ...) \ + vlib_log_debug (if_intel_uncore_log.class, fmt, __VA_ARGS__) +#define log_warn(fmt, ...) \ + vlib_log_warn (if_intel_uncore_log.class, fmt, __VA_ARGS__) +#define log_err(fmt, ...) \ + vlib_log_err (if_intel_uncore_log.class, fmt, __VA_ARGS__) + +#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask) \ + ((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \ + (cmask) << 24) + +static perfmon_event_t intel_uncore_events[] = { +#define _(unit, event, umask, n, suffix, desc) \ + [INTEL_UNCORE_E_##unit##_##n##_##suffix] = { \ + .config = (event) | (umask) << 8, \ + .name = #n "." #suffix, \ + .description = desc, \ + .type_from_instance = 1, \ + .instance_type = INTEL_UNCORE_UNIT_##unit, \ + }, + + foreach_intel_uncore_event +#undef _ +}; + +static int +intel_uncore_instance_name_cmp (void *v1, void *v2) +{ + perfmon_instance_t *i1 = v1; + perfmon_instance_t *i2 = v2; + return strcmp (i1->name, i2->name); +} + +static void +intel_uncore_add_unit (perfmon_source_t *src, intel_uncore_unit_type_t u, + char *name, char *type_str, char *fmt, + int *socket_by_cpu_id) +{ + static char *base_path = "/sys/bus/event_source/devices/uncore"; + clib_error_t *err; + clib_bitmap_t *cpumask = 0; + perfmon_instance_t *in; + perfmon_instance_type_t *it; + u8 *s = 0; + int i = 0, j; + u32 perf_type; + + vec_validate (src->instances_by_type, u); + it = vec_elt_at_index (src->instances_by_type, u); + it->name = type_str; + + while (1) + { + s = format (s, "%s_%s_%u/type%c", base_path, name, i, 0); + if ((err = clib_sysfs_read ((char *) s, "%u", &perf_type))) + break; + vec_reset_length (s); + + s = format (s, "%s_%s_%u/cpumask%c", base_path, name, i, 0); + if ((err = clib_sysfs_read ((char *) s, "%U", unformat_bitmap_list, + &cpumask))) + break; + vec_reset_length (s); + + clib_bitmap_foreach (j, cpumask) + { + vec_add2 (it->instances, in, 1); + in->type = perf_type; + in->cpu = j; + in->pid = -1; + in->name = (char *) format (0, fmt, socket_by_cpu_id[j], i); + vec_terminate_c_string (in->name); + log_debug ("found %s %s", type_str, in->name); + } + i++; + }; + clib_error_free (err); + clib_bitmap_free (cpumask); + vec_free (s); +} + +static clib_error_t * +intel_uncore_init (vlib_main_t *vm, perfmon_source_t *src) +{ + clib_error_t *err = 0; + clib_bitmap_t *node_bitmap = 0, *cpumask = 0; + int *numa_by_cpu_id = 0; + u32 i, j; + u8 *s = 0; + + if ((err = clib_sysfs_read ("/sys/devices/system/node/has_cpu", "%U", + unformat_bitmap_list, &node_bitmap))) + { + clib_error_free (err); + return clib_error_return (0, "failed to discover numa topology"); + } + + clib_bitmap_foreach (i, node_bitmap) + { + s = format (s, "/sys/devices/system/node/node%u/cpulist%c", i, 0); + if ((err = clib_sysfs_read ((char *) s, "%U", unformat_bitmap_list, + &cpumask))) + { + clib_error_free (err); + err = clib_error_return (0, "failed to discover numa topology"); + goto done; + } + + clib_bitmap_foreach (j, cpumask) + { + vec_validate_init_empty (numa_by_cpu_id, j, -1); + numa_by_cpu_id[j] = i; + } + clib_bitmap_free (cpumask); + vec_reset_length (s); + } + +#define _(t, n, name, fmt) \ + intel_uncore_add_unit (src, INTEL_UNCORE_UNIT_##t, n, name, fmt, \ + numa_by_cpu_id); + foreach_intel_uncore_unit_type; +#undef _ + + for (i = 0, j = 0; i < vec_len (src->instances_by_type); i++) + { + perfmon_instance_type_t *it; + + it = vec_elt_at_index (src->instances_by_type, i); + vec_sort_with_function (it->instances, intel_uncore_instance_name_cmp); + j += vec_len (it->instances); + } + + if (j == 0) + { + vec_free (src->instances_by_type); + return clib_error_return (0, "no uncore units found"); + } + +done: + vec_free (s); + vec_free (cpumask); + vec_free (node_bitmap); + vec_free (numa_by_cpu_id); + return err; +} + +format_function_t format_intel_core_config; + +PERFMON_REGISTER_SOURCE (intel_uncore) = { + .name = "intel-uncore", + .description = "intel uncore events", + .events = intel_uncore_events, + .n_events = INTEL_UNCORE_N_EVENTS, + .init_fn = intel_uncore_init, + .format_config = format_intel_core_config, +}; diff --git a/src/plugins/perfmon/intel/uncore.h b/src/plugins/perfmon/intel/uncore.h new file mode 100644 index 00000000000..03227d6069c --- /dev/null +++ b/src/plugins/perfmon/intel/uncore.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __perfmon_intel_uncore_h__ +#define __perfmon_intel_uncore_h__ + +#define foreach_intel_uncore_unit_type \ + _ (IMC, "imc", "integrated Memory Controller (iMC)", "iMC%u/%u") \ + _ (UPI, "upi", "Ultra Path Interconnect (UPI)", "UPI%u/%u") + +typedef enum +{ +#define _(t, n, name, fmt) INTEL_UNCORE_UNIT_##t, + foreach_intel_uncore_unit_type +#undef _ + INTEL_UNCORE_N_UNITS, +} intel_uncore_unit_type_t; + +#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask) \ + ((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \ + (cmask) << 24) + +/* Type, EventCode, UMask, name, suffix, description */ +#define foreach_intel_uncore_event \ + _ (IMC, 0x04, 0x03, UNC_M_CAS_COUNT, RD, \ + "All DRAM Read CAS Commands issued (including underfills)") \ + _ (IMC, 0x04, 0x0c, UNC_M_CAS_COUNT, WR, \ + "All DRAM Write CAS commands issued") \ + _ (IMC, 0x04, 0x0f, UNC_M_CAS_COUNT, ALL, "All DRAM CAS commands issued") + +typedef enum +{ +#define _(unit, event, umask, name, suffix, desc) \ + INTEL_UNCORE_E_##unit##_##name##_##suffix, + foreach_intel_uncore_event +#undef _ + INTEL_UNCORE_N_EVENTS, +} perfmon_intel_uncore_event_index_t; + +#endif |