2 * Copyright (c) 2020 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #ifndef __perfmon_intel_h
17 #define __perfmon_intel_h
19 u8 intel_bundle_supported (perfmon_bundle_t *b);
21 #define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask) \
22 ((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \
25 /* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
26 * counter_unit, name, suffix, description */
27 #define foreach_perf_intel_peusdo_event \
28 _ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_RETIRING_METRIC, \
29 "TMA retiring slots for an unhalted logical processor.") \
30 _ (0x00, 0x81, 0, 0, 0, 0x00, TOPDOWN, L1_BAD_SPEC_METRIC, \
31 "TMA bad spec slots or an unhalted logical processor.") \
32 _ (0x00, 0x82, 0, 0, 0, 0x00, TOPDOWN, L1_FE_BOUND_METRIC, \
33 "TMA fe bound slots for an unhalted logical processor.") \
34 _ (0x00, 0x83, 0, 0, 0, 0x00, TOPDOWN, L1_BE_BOUND_METRIC, \
35 "TMA be bound slots for an unhalted logical processor.") \
36 _ (0x00, 0x84, 0, 0, 0, 0x00, TOPDOWN, L2_HEAVYOPS_METRIC, \
37 "TMA heavy operations for an unhalted logical processor.") \
38 _ (0x00, 0x85, 0, 0, 0, 0x00, TOPDOWN, L2_BMISPRED_METRIC, \
39 "TMA branch misprediction slots or an unhalted logical processor.") \
40 _ (0x00, 0x86, 0, 0, 0, 0x00, TOPDOWN, L2_FETCHLAT_METRIC, \
41 "TMA fetch latency slots for an unhalted logical processor.") \
42 _ (0x00, 0x87, 0, 0, 0, 0x00, TOPDOWN, L2_MEMBOUND_METRIC, \
43 "TMA mem bound slots for an unhalted logical processor.")
45 /* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
46 * counter_unit, name, suffix, description */
47 #define foreach_perf_intel_tremont_event \
48 _ (0xc2, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_RETIRING_TREMONT, \
49 "TMA retiring slots for an unhalted logical processor.") \
50 _ (0x71, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_FE_BOUND_TREMONT, \
51 "TMA fe bound slots for an unhalted logical processor.") \
52 _ (0x73, 0x06, 0, 0, 0, 0x00, TOPDOWN, L1_BAD_SPEC_TREMONT, \
53 "TMA bad spec slots or an unhalted logical processor.") \
54 _ (0x74, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_BE_BOUND_TREMONT, \
55 "TMA be bound slots for an unhalted logical processor.")
57 /* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
58 * counter_unit, name, suffix, description */
59 #define foreach_perf_intel_core_event \
60 _ (0x00, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD, \
61 "Core cycles when the thread is not in halt state") \
62 _ (0x00, 0x03, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, REF_TSC, \
63 "Reference cycles when the core is not in halt state.") \
64 _ (0x00, 0x04, 0, 0, 0, 0x00, TOPDOWN, SLOTS, \
65 "TMA slots available for an unhalted logical processor.") \
66 _ (0x03, 0x02, 0, 0, 0, 0x00, LD_BLOCKS, STORE_FORWARD, \
67 "Loads blocked due to overlapping with a preceding store that cannot be" \
69 _ (0x03, 0x08, 0, 0, 0, 0x00, LD_BLOCKS, NO_SR, \
70 "The number of times that split load operations are temporarily " \
72 "because all resources for handling the split accesses are in use.") \
73 _ (0x07, 0x01, 0, 0, 0, 0x00, LD_BLOCKS_PARTIAL, ADDRESS_ALIAS, \
74 "False dependencies in MOB due to partial compare on address.") \
75 _ (0x08, 0x01, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, MISS_CAUSES_A_WALK, \
76 "Load misses in all DTLB levels that cause page walks") \
77 _ (0x08, 0x02, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED_4K, \
78 "Page walk completed due to a demand data load to a 4K page") \
79 _ (0x08, 0x04, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED_2M_4M, \
80 "Page walk completed due to a demand data load to a 2M/4M page") \
81 _ (0x08, 0x08, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED_1G, \
82 "Page walk completed due to a demand data load to a 1G page") \
83 _ (0x08, 0x0E, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED, \
84 "Load miss in all TLB levels causes a page walk that completes. (All " \
86 _ (0x08, 0x10, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_PENDING, \
87 "Counts 1 per cycle for each PMH that is busy with a page walk for a " \
88 "load. EPT page walk duration are excluded in Skylake.") \
89 _ (0x08, 0x20, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, STLB_HIT, \
90 "Loads that miss the DTLB and hit the STLB.") \
91 _ (0x0D, 0x01, 0, 0, 0, 0x00, INT_MISC, RECOVERY_CYCLES, \
92 "Core cycles the allocator was stalled due to recovery from earlier " \
93 "clear event for this thread (e.g. misprediction or memory nuke)") \
94 _ (0x0D, 0x10, 0, 0, 0, 0x00, INT_MISC, UOP_DROPPING, \
95 "Estimated number of Top-down Microarchitecture Analysis slots that got" \
96 " due to non front-end reasons") \
97 _ (0x0D, 0x80, 0, 0, 0, 0x00, INT_MISC, CLEAR_RESTEER_CYCLES, \
98 "Counts cycles after recovery from a branch misprediction or machine" \
99 "clear till the first uop is issued from the resteered path.") \
100 _ (0x0E, 0x01, 0, 0, 0, 0x00, UOPS_ISSUED, ANY, \
101 "Uops that Resource Allocation Table (RAT) issues to Reservation " \
103 _ (0x28, 0x07, 0, 0, 0, 0x00, CORE_POWER, LVL0_TURBO_LICENSE, \
104 "Core cycles where the core was running in a manner where Turbo may be " \
105 "clipped to the Non-AVX turbo schedule.") \
106 _ (0x28, 0x18, 0, 0, 0, 0x00, CORE_POWER, LVL1_TURBO_LICENSE, \
107 "Core cycles where the core was running in a manner where Turbo may be " \
108 "clipped to the AVX2 turbo schedule.") \
109 _ (0x28, 0x20, 0, 0, 0, 0x00, CORE_POWER, LVL2_TURBO_LICENSE, \
110 "Core cycles where the core was running in a manner where Turbo may be " \
111 "clipped to the AVX512 turbo schedule.") \
112 _ (0x28, 0x40, 0, 0, 0, 0x00, CORE_POWER, THROTTLE, \
113 "Core cycles the core was throttled due to a pending power level " \
115 _ (0x3C, 0x00, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD_P, \
116 "Thread cycles when thread is not in halt state") \
117 _ (0x3C, 0x00, 0, 1, 0, 0x00, CPU_CLK_UNHALTED, THREAD_P_ANY, \
118 "Core cycles when at least one thread on the physical core is not in " \
120 _ (0x3C, 0x00, 1, 0, 0, 0x01, CPU_CLK_UNHALTED, RING0_TRANS, \
121 "Counts when there is a transition from ring 1, 2 or 3 to ring 0.") \
122 _ (0x48, 0x01, 0, 0, 0, 0x01, L1D_PEND_MISS, PENDING_CYCLES, \
123 "Cycles with L1D load Misses outstanding.") \
124 _ (0x48, 0x01, 0, 0, 0, 0x00, L1D_PEND_MISS, PENDING, \
125 "L1D miss outstandings duration in cycles") \
126 _ (0x48, 0x02, 0, 0, 0, 0x00, L1D_PEND_MISS, FB_FULL, \
127 "Number of times a request needed a FB entry but there was no entry " \
128 "available for it. That is the FB unavailability was dominant reason " \
129 "for blocking the request. A request includes cacheable/uncacheable " \
130 "demands that is load, store or SW prefetch.") \
131 _ (0x51, 0x01, 0, 0, 0, 0x00, L1D, REPLACEMENT, \
132 "L1D data line replacements") \
133 _ (0x51, 0x04, 0, 0, 0, 0x00, L1D, M_EVICT, "L1D data line evictions") \
134 _ (0x79, 0x04, 0, 0, 0, 0x00, IDQ, MITE_UOPS, \
135 "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
136 "from the MITE path.") \
137 _ (0x79, 0x08, 0, 0, 0, 0x00, IDQ, DSB_UOPS, \
138 "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
139 "from the Decode Stream Buffer (DSB) path.") \
140 _ (0x79, 0x30, 0, 0, 0, 0x00, IDQ, MS_UOPS, \
141 "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
142 "from the Microcode Sequencer (MS) path.") \
143 _ (0x79, 0x30, 1, 0, 0, 0x01, IDQ, MS_SWITCHES, \
144 "Number of switches from DSB or MITE to the MS") \
146 0x80, 0x04, 0, 0, 0, 0x00, ICACHE_16B, IFDATA_STALL, \
147 "Cycles where a code fetch is stalled due to L1 instruction cache miss.") \
148 _ (0x83, 0x04, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_STALL, \
149 "Cycles where a code fetch is stalled due to L1 instruction cache tag " \
151 _ (0x83, 0x02, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_MISS, \
152 "Instruction fetch tag lookups that miss in the instruction cache " \
153 "(L1I). Counts at 64-byte cache-line granularity.") \
154 _ (0x9C, 0x01, 0, 0, 0, 0x05, IDQ_UOPS_NOT_DELIVERED, CORE, \
155 "Uops not delivered to Resource Allocation Table (RAT) per thread when " \
156 "backend of the machine is not stalled") \
157 _ (0x9C, 0x01, 0, 0, 1, 0x01, IDQ_UOPS_NOT_DELIVERED, CYCLES_FE_WAS_OK, \
158 "Cycles with 4 uops delivered by the front end or Resource Allocation " \
159 "Table (RAT) was stalling FE.x") \
160 _ (0x9C, 0x01, 0, 0, 0, 0x01, IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV, \
161 CORE, "Cycles with 3 uops delivered by the front end.") \
162 _ (0x9C, 0x01, 0, 0, 0, 0x02, IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV, \
163 CORE, "Cycles with 2 uops delivered by the front end.") \
164 _ (0x9C, 0x01, 0, 0, 0, 0x03, IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV, \
165 CORE, "Cycles with 1 uops delivered by the front end.") \
166 _ (0x9C, 0x01, 0, 0, 0, 0x04, IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV, \
167 CORE, "Cycles with 0 uops delivered by the front end.") \
168 _ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0, \
169 "Number of uops executed on port 0") \
170 _ (0xA1, 0x02, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_1, \
171 "Number of uops executed on port 1") \
172 _ (0xA1, 0x04, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_2_3, \
173 "Number of uops executed on port 2 and 3") \
174 _ (0xA1, 0x10, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_4_9, \
175 "Number of uops executed on port 4 and 9") \
176 _ (0xA1, 0x20, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_5, \
177 "Number of uops executed on port 5") \
178 _ (0xA1, 0x40, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_6, \
179 "Number of uops executed on port 6") \
180 _ (0xA1, 0x80, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_7_8, \
181 "Number of uops executed on port 7 and 8") \
182 _ (0xA2, 0x08, 0, 0, 0, 0x00, RESOURCE_STALLS, SB, \
183 "Counts allocation stall cycles caused by the store buffer (SB) being " \
184 "full. This counts cycles that the pipeline back-end blocked uop " \
186 "from the front-end.") \
187 _ (0xA3, 0x04, 0, 0, 0, 0x04, CYCLE_ACTIVITY, STALLS_TOTAL, \
188 "Total execution stalls.") \
189 _ (0xA3, 0x05, 0, 0, 0, 0x05, CYCLE_ACTIVITY, STALLS_L2_MISS, \
190 "Execution stalls while L2 cache miss demand load is outstanding") \
191 _ (0xA3, 0x06, 0, 0, 0, 0x06, CYCLE_ACTIVITY, STALLS_L3_MISS, \
192 "Execution stalls while L3 cache miss demand load is outstanding") \
193 _ (0xA3, 0x0C, 0, 0, 0, 0x0C, CYCLE_ACTIVITY, STALLS_L1D_MISS, \
194 "Execution stalls while L1 cache miss demand load is outstanding") \
195 _ (0xA3, 0x14, 0, 0, 0, 0x14, CYCLE_ACTIVITY, STALLS_MEM_ANY, \
196 "Execution stalls while memory subsystem has an outstanding load.") \
197 _ (0xA6, 0x40, 0, 0, 0, 0x02, EXE_ACTIVITY, BOUND_ON_STORES, \
198 "Cycles where the Store Buffer was full and no loads caused an " \
199 "execution stall.") \
200 _ (0xA8, 0x01, 0, 0, 0, 0x00, LSD, UOPS, \
201 "Counts the number of uops delivered to the back-end by the LSD" \
202 "(Loop Stream Detector)") \
203 _ (0xAB, 0x02, 0, 0, 0, 0x00, DSB2MITE_SWITCHES, PENALTY_CYCLES, \
204 "This event counts fetch penalty cycles when a transition occurs from" \
206 _ (0xB1, 0x01, 0, 0, 0, 0x00, UOPS_EXECUTED, THREAD, \
207 "Counts the number of uops to be executed per-thread each cycle.") \
208 _ (0xC0, 0x00, 0, 0, 0, 0x00, INST_RETIRED, ANY_P, \
209 "Number of instructions retired. General Counter - architectural event") \
210 _ (0xC2, 0x02, 0, 0, 0, 0x00, UOPS_RETIRED, RETIRE_SLOTS, \
211 "Retirement slots used.") \
212 _ (0xC4, 0x00, 0, 0, 0, 0x00, BR_INST_RETIRED, ALL_BRANCHES, \
213 "Counts all (macro) branch instructions retired.") \
214 _ (0xC5, 0x00, 0, 0, 0, 0x00, BR_MISP_RETIRED, ALL_BRANCHES, \
215 "All mispredicted macro branch instructions retired.") \
216 _ (0xC4, 0x20, 0, 0, 0, 0x00, BR_INST_RETIRED, NEAR_TAKEN, \
217 "Taken branch instructions retired.") \
218 _ (0xD0, 0x82, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_STORES, \
219 "All retired store instructions.") \
220 _ (0xD1, 0x01, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L1_HIT, \
221 "Retired load instructions with L1 cache hits as data sources") \
222 _ (0xD1, 0x02, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L2_HIT, \
223 "Retired load instructions with L2 cache hits as data sources") \
224 _ (0xD1, 0x04, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L3_HIT, \
225 "Retired load instructions with L3 cache hits as data sources") \
226 _ (0xD1, 0x08, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L1_MISS, \
227 "Retired load instructions missed L1 cache as data sources") \
228 _ (0xD1, 0x10, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L2_MISS, \
229 "Retired load instructions missed L2 cache as data sources") \
230 _ (0xD1, 0x20, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L3_MISS, \
231 "Retired load instructions missed L3 cache as data sources") \
232 _ (0xD1, 0x40, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, FB_HIT, \
233 "Retired load instructions which data sources were load missed L1 but " \
234 "hit FB due to preceding miss to the same cache line with data not " \
236 _ (0xD2, 0x01, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_MISS, \
237 "Retired load instructions which data sources were L3 hit and cross-" \
238 "core snoop missed in on-pkg core cache.") \
239 _ (0xD2, 0x02, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_HIT, \
240 "Retired load instructions which data sources were L3 and cross-core " \
241 "snoop hits in on-pkg core cache") \
242 _ (0xD2, 0x04, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_HITM, \
243 "Retired load instructions which data sources were HitM responses from " \
245 _ (0xD2, 0x08, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_NONE, \
246 "Retired load instructions which data sources were hits in L3 without " \
248 _ (0xD3, 0x01, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, LOCAL_DRAM, \
249 "Retired load instructions which data sources missed L3 but serviced " \
251 _ (0xD3, 0x02, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_DRAM, \
252 "Retired load instructions which data sources missed L3 but serviced " \
253 "from remote dram") \
254 _ (0xD3, 0x04, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_HITM, \
255 "Retired load instructions whose data sources was remote HITM") \
256 _ (0xD3, 0x08, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_FWD, \
257 "Retired load instructions whose data sources was forwarded from a " \
259 _ (0xE6, 0x01, 0, 0, 0, 0x00, BACLEARS, ANY, \
260 "Counts the total number when the front end is resteered, mainly when " \
261 "the BPU cannot provide a correct prediction and this is corrected by " \
262 "other branch handling mechanisms at the front end.") \
263 _ (0xEC, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, DISTRIBUTED, \
264 "Cycle counts are evenly distributed between active threads in the " \
266 _ (0xF0, 0x40, 0, 0, 0, 0x00, L2_TRANS, L2_WB, \
267 "L2 writebacks that access L2 cache") \
268 _ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \
269 "L2 cache lines filling L2") \
270 _ (0xF4, 0x04, 0, 0, 0, 0x00, SQ_MISC, SQ_FULL, \
271 "Counts the cycles for which the thread is active and the superQ cannot" \
272 "take any more entries.") \
273 _ (0xFE, 0x02, 0, 0, 0, 0x00, IDI_MISC, WB_UPGRADE, \
274 "Counts number of cache lines that are allocated and written back to L3" \
275 " with the intention that they are more likely to be reused shortly") \
276 _ (0xFE, 0x04, 0, 0, 0, 0x00, IDI_MISC, WB_DOWNGRADE, \
277 "Counts number of cache lines that are dropped and not written back to " \
278 "L3 as they are deemed to be less likely to be reused shortly")
282 #define _(event, umask, edge, any, inv, cmask, name, suffix, desc) \
283 INTEL_CORE_E_##name##_##suffix,
284 foreach_perf_intel_core_event foreach_perf_intel_peusdo_event
285 foreach_perf_intel_tremont_event
288 } perf_intel_core_event_t;