aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/perfmon/intel/core.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/perfmon/intel/core.h')
-rw-r--r--src/plugins/perfmon/intel/core.h120
1 files changed, 105 insertions, 15 deletions
diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h
index 0e29022bfdf..b2b0434acb3 100644
--- a/src/plugins/perfmon/intel/core.h
+++ b/src/plugins/perfmon/intel/core.h
@@ -16,19 +16,15 @@
#ifndef __perfmon_intel_h
#define __perfmon_intel_h
+u8 intel_bundle_supported (perfmon_bundle_t *b);
+
#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask) \
((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \
(cmask) << 24)
/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
* counter_unit, name, suffix, description */
-#define foreach_perf_intel_core_event \
- _ (0x00, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD, \
- "Core cycles when the thread is not in halt state") \
- _ (0x00, 0x03, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, REF_TSC, \
- "Reference cycles when the core is not in halt state.") \
- _ (0x00, 0x04, 0, 0, 0, 0x00, TOPDOWN, SLOTS, \
- "TMA slots available for an unhalted logical processor.") \
+#define foreach_perf_intel_peusdo_event \
_ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_RETIRING_METRIC, \
"TMA retiring slots for an unhalted logical processor.") \
_ (0x00, 0x81, 0, 0, 0, 0x00, TOPDOWN, L1_BAD_SPEC_METRIC, \
@@ -37,6 +33,36 @@
"TMA fe bound slots for an unhalted logical processor.") \
_ (0x00, 0x83, 0, 0, 0, 0x00, TOPDOWN, L1_BE_BOUND_METRIC, \
"TMA be bound slots for an unhalted logical processor.") \
+ _ (0x00, 0x84, 0, 0, 0, 0x00, TOPDOWN, L2_HEAVYOPS_METRIC, \
+ "TMA heavy operations for an unhalted logical processor.") \
+ _ (0x00, 0x85, 0, 0, 0, 0x00, TOPDOWN, L2_BMISPRED_METRIC, \
+ "TMA branch misprediction slots or an unhalted logical processor.") \
+ _ (0x00, 0x86, 0, 0, 0, 0x00, TOPDOWN, L2_FETCHLAT_METRIC, \
+ "TMA fetch latency slots for an unhalted logical processor.") \
+ _ (0x00, 0x87, 0, 0, 0, 0x00, TOPDOWN, L2_MEMBOUND_METRIC, \
+ "TMA mem bound slots for an unhalted logical processor.")
+
+/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
+ * counter_unit, name, suffix, description */
+#define foreach_perf_intel_tremont_event \
+ _ (0xc2, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_RETIRING_TREMONT, \
+ "TMA retiring slots for an unhalted logical processor.") \
+ _ (0x71, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_FE_BOUND_TREMONT, \
+ "TMA fe bound slots for an unhalted logical processor.") \
+ _ (0x73, 0x06, 0, 0, 0, 0x00, TOPDOWN, L1_BAD_SPEC_TREMONT, \
+ "TMA bad spec slots or an unhalted logical processor.") \
+ _ (0x74, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_BE_BOUND_TREMONT, \
+ "TMA be bound slots for an unhalted logical processor.")
+
+/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
+ * counter_unit, name, suffix, description */
+#define foreach_perf_intel_core_event \
+ _ (0x00, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD, \
+ "Core cycles when the thread is not in halt state") \
+ _ (0x00, 0x03, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, REF_TSC, \
+ "Reference cycles when the core is not in halt state.") \
+ _ (0x00, 0x04, 0, 0, 0, 0x00, TOPDOWN, SLOTS, \
+ "TMA slots available for an unhalted logical processor.") \
_ (0x03, 0x02, 0, 0, 0, 0x00, LD_BLOCKS, STORE_FORWARD, \
"Loads blocked due to overlapping with a preceding store that cannot be" \
" forwarded.") \
@@ -65,6 +91,12 @@
_ (0x0D, 0x01, 0, 0, 0, 0x00, INT_MISC, RECOVERY_CYCLES, \
"Core cycles the allocator was stalled due to recovery from earlier " \
"clear event for this thread (e.g. misprediction or memory nuke)") \
+ _ (0x0D, 0x10, 0, 0, 0, 0x00, INT_MISC, UOP_DROPPING, \
+ "Estimated number of Top-down Microarchitecture Analysis slots that got" \
+ " due to non front-end reasons") \
+ _ (0x0D, 0x80, 0, 0, 0, 0x00, INT_MISC, CLEAR_RESTEER_CYCLES, \
+ "Counts cycles after recovery from a branch misprediction or machine" \
+ "clear till the first uop is issued from the resteered path.") \
_ (0x0E, 0x01, 0, 0, 0, 0x00, UOPS_ISSUED, ANY, \
"Uops that Resource Allocation Table (RAT) issues to Reservation " \
"Station (RS)") \
@@ -99,20 +131,61 @@
_ (0x51, 0x01, 0, 0, 0, 0x00, L1D, REPLACEMENT, \
"L1D data line replacements") \
_ (0x51, 0x04, 0, 0, 0, 0x00, L1D, M_EVICT, "L1D data line evictions") \
+ _ (0x79, 0x04, 0, 0, 0, 0x00, IDQ, MITE_UOPS, \
+ "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
+ "from the MITE path.") \
+ _ (0x79, 0x08, 0, 0, 0, 0x00, IDQ, DSB_UOPS, \
+ "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
+ "from the Decode Stream Buffer (DSB) path.") \
+ _ (0x79, 0x30, 0, 0, 0, 0x00, IDQ, MS_UOPS, \
+ "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
+ "from the Microcode Sequencer (MS) path.") \
+ _ (0x79, 0x30, 1, 0, 0, 0x01, IDQ, MS_SWITCHES, \
+ "Number of switches from DSB or MITE to the MS") \
+ _ ( \
+ 0x80, 0x04, 0, 0, 0, 0x00, ICACHE_16B, IFDATA_STALL, \
+ "Cycles where a code fetch is stalled due to L1 instruction cache miss.") \
+ _ (0x83, 0x04, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_STALL, \
+ "Cycles where a code fetch is stalled due to L1 instruction cache tag " \
+ "miss.") \
_ (0x83, 0x02, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_MISS, \
"Instruction fetch tag lookups that miss in the instruction cache " \
"(L1I). Counts at 64-byte cache-line granularity.") \
- _ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \
+ _ (0x9C, 0x01, 0, 0, 0, 0x05, IDQ_UOPS_NOT_DELIVERED, CORE, \
"Uops not delivered to Resource Allocation Table (RAT) per thread when " \
"backend of the machine is not stalled") \
+ _ (0x9C, 0x01, 0, 0, 1, 0x01, IDQ_UOPS_NOT_DELIVERED, CYCLES_FE_WAS_OK, \
+ "Cycles with 4 uops delivered by the front end or Resource Allocation " \
+ "Table (RAT) was stalling FE.x") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x01, IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV, \
+ CORE, "Cycles with 3 uops delivered by the front end.") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x02, IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV, \
+ CORE, "Cycles with 2 uops delivered by the front end.") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x03, IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV, \
+ CORE, "Cycles with 1 uops delivered by the front end.") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x04, IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV, \
+ CORE, "Cycles with 0 uops delivered by the front end.") \
+ _ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0, \
+ "Number of uops executed on port 0") \
+ _ (0xA1, 0x02, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_1, \
+ "Number of uops executed on port 1") \
+ _ (0xA1, 0x04, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_2_3, \
+ "Number of uops executed on port 2 and 3") \
+ _ (0xA1, 0x10, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_4_9, \
+ "Number of uops executed on port 4 and 9") \
+ _ (0xA1, 0x20, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_5, \
+ "Number of uops executed on port 5") \
+ _ (0xA1, 0x40, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_6, \
+ "Number of uops executed on port 6") \
+ _ (0xA1, 0x80, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_7_8, \
+ "Number of uops executed on port 7 and 8") \
_ (0xA2, 0x08, 0, 0, 0, 0x00, RESOURCE_STALLS, SB, \
"Counts allocation stall cycles caused by the store buffer (SB) being " \
"full. This counts cycles that the pipeline back-end blocked uop " \
"delivery" \
"from the front-end.") \
- _ (0xA3, 0x04, 0, 0, 0, 0x04, CYCLE_ACTIVITY, CYCLES_NO_EXECUTE, \
- "This event counts cycles during which no instructions were executed in" \
- " the execution stage of the pipeline.") \
+ _ (0xA3, 0x04, 0, 0, 0, 0x04, CYCLE_ACTIVITY, STALLS_TOTAL, \
+ "Total execution stalls.") \
_ (0xA3, 0x05, 0, 0, 0, 0x05, CYCLE_ACTIVITY, STALLS_L2_MISS, \
"Execution stalls while L2 cache miss demand load is outstanding") \
_ (0xA3, 0x06, 0, 0, 0, 0x06, CYCLE_ACTIVITY, STALLS_L3_MISS, \
@@ -121,6 +194,17 @@
"Execution stalls while L1 cache miss demand load is outstanding") \
_ (0xA3, 0x14, 0, 0, 0, 0x14, CYCLE_ACTIVITY, STALLS_MEM_ANY, \
"Execution stalls while memory subsystem has an outstanding load.") \
+ _ (0xA6, 0x40, 0, 0, 0, 0x02, EXE_ACTIVITY, BOUND_ON_STORES, \
+ "Cycles where the Store Buffer was full and no loads caused an " \
+ "execution stall.") \
+ _ (0xA8, 0x01, 0, 0, 0, 0x00, LSD, UOPS, \
+ "Counts the number of uops delivered to the back-end by the LSD" \
+ "(Loop Stream Detector)") \
+ _ (0xAB, 0x02, 0, 0, 0, 0x00, DSB2MITE_SWITCHES, PENALTY_CYCLES, \
+ "This event counts fetch penalty cycles when a transition occurs from" \
+ "DSB to MITE.") \
+ _ (0xB1, 0x01, 0, 0, 0, 0x00, UOPS_EXECUTED, THREAD, \
+ "Counts the number of uops to be executed per-thread each cycle.") \
_ (0xC0, 0x00, 0, 0, 0, 0x00, INST_RETIRED, ANY_P, \
"Number of instructions retired. General Counter - architectural event") \
_ (0xC2, 0x02, 0, 0, 0, 0x00, UOPS_RETIRED, RETIRE_SLOTS, \
@@ -131,8 +215,6 @@
"All mispredicted macro branch instructions retired.") \
_ (0xC4, 0x20, 0, 0, 0, 0x00, BR_INST_RETIRED, NEAR_TAKEN, \
"Taken branch instructions retired.") \
- _ (0xD0, 0x81, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_LOADS, \
- "All retired load instructions.") \
_ (0xD0, 0x82, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_STORES, \
"All retired store instructions.") \
_ (0xD1, 0x01, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L1_HIT, \
@@ -174,6 +256,13 @@
_ (0xD3, 0x08, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_FWD, \
"Retired load instructions whose data sources was forwarded from a " \
"remote cache") \
+ _ (0xE6, 0x01, 0, 0, 0, 0x00, BACLEARS, ANY, \
+ "Counts the total number when the front end is resteered, mainly when " \
+ "the BPU cannot provide a correct prediction and this is corrected by " \
+ "other branch handling mechanisms at the front end.") \
+ _ (0xEC, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, DISTRIBUTED, \
+ "Cycle counts are evenly distributed between active threads in the " \
+ " Core") \
_ (0xF0, 0x40, 0, 0, 0, 0x00, L2_TRANS, L2_WB, \
"L2 writebacks that access L2 cache") \
_ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \
@@ -192,9 +281,10 @@ typedef enum
{
#define _(event, umask, edge, any, inv, cmask, name, suffix, desc) \
INTEL_CORE_E_##name##_##suffix,
- foreach_perf_intel_core_event
+ foreach_perf_intel_core_event foreach_perf_intel_peusdo_event
+ foreach_perf_intel_tremont_event
#undef _
- INTEL_CORE_N_EVENTS,
+ INTEL_CORE_N_EVENTS,
} perf_intel_core_event_t;
#endif