2 * Copyright (c) 2021 Intel and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/vnet.h>
17 #include <vppinfra/math.h>
18 #include <perfmon/perfmon.h>
19 #include <perfmon/intel/core.h>
21 #define GET_METRIC(m, i) (((m) >> (i * 8)) & 0xff)
22 #define GET_RATIO(m, i) (((m) >> (i * 32)) & 0xffffffff)
23 #define RDPMC_SLOTS (1 << 30) /* fixed slots */
24 #define RDPMC_METRICS (1 << 29) /* l1 & l2 metric counters */
26 #define FIXED_COUNTER_SLOTS 3
27 #define METRIC_COUNTER_TOPDOWN_L1_L2 0
31 TOPDOWN_E_RETIRING = 0,
48 TOPDOWN_E_RDPMC_SLOTS = 0,
49 TOPDOWN_E_RDPMC_METRICS,
52 typedef f64 (topdown_lvl1_parse_fn_t) (void *, topdown_e_t);
54 /* Parse thread level states from perfmon_reading */
55 static_always_inline f64
56 topdown_lvl1_perf_reading (void *ps, topdown_e_t e)
58 perfmon_reading_t *ss = (perfmon_reading_t *) ps;
60 /* slots are at value[0], everthing else follows at +1 */
61 return ((f64) ss->value[e + 1] / ss->value[0]) * 100;
64 static_always_inline f64
65 topdown_lvl1_rdpmc_metric (void *ps, topdown_e_t e)
67 perfmon_node_stats_t *ss = (perfmon_node_stats_t *) ps;
69 ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS] *
70 ((f64) GET_METRIC (ss->t[0].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
72 ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] *
73 ((f64) GET_METRIC (ss->t[1].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
74 u64 slots_delta = ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] -
75 ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS];
77 slots_t1 = slots_t1 - slots_t0;
79 return (slots_t1 / slots_delta) * 100;
83 format_topdown_lvl1 (u8 *s, va_list *args)
85 void *ps = va_arg (*args, void *);
86 u64 idx = va_arg (*args, int);
87 perfmon_bundle_type_t type = va_arg (*args, perfmon_bundle_type_t);
90 topdown_lvl1_parse_fn_t *parse_fn,
91 *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl1_rdpmc_metric,
92 topdown_lvl1_perf_reading, 0 };
93 parse_fn = parse_fns[type];
100 parse_fn (ps, TOPDOWN_E_BAD_SPEC) + parse_fn (ps, TOPDOWN_E_RETIRING);
104 parse_fn (ps, TOPDOWN_E_BE_BOUND) + parse_fn (ps, TOPDOWN_E_FE_BOUND);
107 sv = parse_fn (ps, (topdown_e_t) idx - 2);
111 s = format (s, "%f", sv);
116 static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = {
117 /* Intel ICX supports papi/thread or rdpmc/node */
118 { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE_OR_THREAD }
121 PERFMON_REGISTER_BUNDLE (topdown_lvl1_metric) = {
122 .name = "topdown-level1",
123 .description = "Top-down Microarchitecture Analysis Level 1",
124 .source = "intel-core",
125 .offset_type = PERFMON_OFFSET_TYPE_METRICS,
126 .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
127 .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
128 .events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
129 .events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
130 .events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
132 .metrics[0] = RDPMC_SLOTS | FIXED_COUNTER_SLOTS,
133 .metrics[1] = RDPMC_METRICS | METRIC_COUNTER_TOPDOWN_L1_L2,
135 .cpu_supports = topdown_lvl1_cpu_supports,
136 .n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports),
137 .format_fn = format_topdown_lvl1,
138 .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
139 "% ST.FE", "% ST.BE"),
140 .footer = "Not Stalled (NS),STalled (ST),\n"
141 " Retiring (RT), Bad Speculation (BS),\n"
142 " FrontEnd bound (FE), BackEnd bound (BE)",
145 /* Convert the TopDown enum to the perf reading index */
146 #define TO_LVL2_PERF_IDX(e) \
148 u8 to_idx[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 5, 5, 6, 6, 7, 7, 8, 8 }; \
152 /* Parse thread level stats from perfmon_reading */
153 static_always_inline f64
154 topdown_lvl2_perf_reading (void *ps, topdown_e_t e)
156 perfmon_reading_t *ss = (perfmon_reading_t *) ps;
157 u64 value = ss->value[TO_LVL2_PERF_IDX (e)];
159 /* If it is an L1 metric, call L1 format */
160 if (TOPDOWN_E_BE_BOUND >= e)
162 return topdown_lvl1_perf_reading (ps, e);
165 /* all the odd metrics, are inferred from even and L1 metrics */
168 topdown_e_t e1 = TO_LVL2_PERF_IDX (e) - 4;
169 value = ss->value[e1] - value;
172 return (f64) value / ss->value[0] * 100;
175 /* Convert the TopDown enum to the rdpmc metric byte position */
176 #define TO_LVL2_METRIC_BYTE(e) \
178 u8 to_metric[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 4, 4, 5, 5, 6, 6, 7, 7 }; \
182 /* Convert the TopDown L2 enum to the reference TopDown L1 enum */
183 #define TO_LVL1_REF(e) \
185 u8 to_lvl1[TOPDOWN_E_MAX] = { -1, \
189 TOPDOWN_E_RETIRING, \
190 TOPDOWN_E_RETIRING, \
191 TOPDOWN_E_BAD_SPEC, \
192 TOPDOWN_E_BAD_SPEC, \
193 TOPDOWN_E_FE_BOUND, \
194 TOPDOWN_E_FE_BOUND, \
195 TOPDOWN_E_BE_BOUND, \
196 TOPDOWN_E_BE_BOUND }; \
200 static_always_inline f64
201 topdown_lvl2_rdpmc_metric (void *ps, topdown_e_t e)
205 /* If it is an L1 metric, call L1 format */
206 if (TOPDOWN_E_BE_BOUND >= e)
208 return topdown_lvl1_rdpmc_metric (ps, e);
211 /* all the odd metrics, are inferred from even and L1 metrics */
214 /* get the L1 reference metric */
215 l1_value = topdown_lvl1_rdpmc_metric (ps, TO_LVL1_REF (e));
218 /* calculate the l2 metric */
220 fabs (l1_value - topdown_lvl1_rdpmc_metric (ps, TO_LVL2_METRIC_BYTE (e)));
225 format_topdown_lvl2 (u8 *s, va_list *args)
227 void *ps = va_arg (*args, void *);
228 u64 idx = va_arg (*args, int);
229 perfmon_bundle_type_t type = va_arg (*args, perfmon_bundle_type_t);
232 topdown_lvl1_parse_fn_t *parse_fn,
233 *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl2_rdpmc_metric,
234 topdown_lvl2_perf_reading, 0 };
236 parse_fn = parse_fns[type];
239 sv = parse_fn (ps, (topdown_e_t) idx);
240 s = format (s, "%f", sv);
245 static perfmon_cpu_supports_t topdown_lvl2_cpu_supports[] = {
246 /* Intel SPR supports papi/thread or rdpmc/node */
247 { clib_cpu_supports_avx512_fp16, PERFMON_BUNDLE_TYPE_NODE_OR_THREAD }
250 PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric) = {
251 .name = "topdown-level2",
252 .description = "Top-down Microarchitecture Analysis Level 2",
253 .source = "intel-core",
254 .offset_type = PERFMON_OFFSET_TYPE_METRICS,
255 .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
256 .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
257 .events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
258 .events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
259 .events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
260 .events[5] = INTEL_CORE_E_TOPDOWN_L2_HEAVYOPS_METRIC,
261 .events[6] = INTEL_CORE_E_TOPDOWN_L2_BMISPRED_METRIC,
262 .events[7] = INTEL_CORE_E_TOPDOWN_L2_FETCHLAT_METRIC,
263 .events[8] = INTEL_CORE_E_TOPDOWN_L2_MEMBOUND_METRIC,
265 .metrics[0] = RDPMC_SLOTS | FIXED_COUNTER_SLOTS,
266 .metrics[1] = RDPMC_METRICS | METRIC_COUNTER_TOPDOWN_L1_L2,
268 .cpu_supports = topdown_lvl2_cpu_supports,
269 .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports),
270 .format_fn = format_topdown_lvl2,
271 .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% RT.HO",
272 "% RT.LO", "% BS.BM", "% BS.MC",
273 "% FE.FL", "% FE.FB", "% BE.MB",
275 .footer = "Retiring (RT), Bad Speculation (BS),\n"
276 " FrontEnd bound (1FE), BackEnd bound (BE),\n"
277 " Light Operations (LO), Heavy Operations (HO),\n"
278 " Branch Misprediction (BM), Machine Clears (MC),\n"
279 " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
280 " Memory Bound (MB), Core Bound (CB)",