2 * Copyright (c) 2022 Intel and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <perfmon/perfmon.h>
16 #include <perfmon/intel/core.h>
21 return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b ();
24 static perfmon_cpu_supports_t topdown_lvl2_cpu_supports_icx[] = {
25 { is_icelake, PERFMON_BUNDLE_TYPE_THREAD }
28 #define GET_METRIC(m, i) (f64) (((m) >> (i * 8)) & 0xff)
45 static_always_inline f64
46 memory_bound_fraction (perfmon_reading_t *ss)
48 return (ss->value[STALLS_MEM_ANY] + ss->value[BOUND_ON_STORES]) /
49 (f64) (ss->value[STALLS_TOTAL] + ss->value[BOUND_ON_STORES]);
52 static_always_inline f64
53 perf_metrics_sum (perfmon_reading_t *ss)
55 return ss->value[TD_RETIRING] + ss->value[TD_BAD_SPEC] +
56 ss->value[TD_FE_BOUND] + ss->value[TD_BE_BOUND];
59 static_always_inline f64
60 retiring (perfmon_reading_t *ss)
62 return ss->value[TD_RETIRING] / perf_metrics_sum (ss);
65 static_always_inline f64
66 bad_speculation (perfmon_reading_t *ss)
68 return ss->value[TD_BAD_SPEC] / perf_metrics_sum (ss);
71 static_always_inline f64
72 frontend_bound (perfmon_reading_t *ss)
74 return (ss->value[TD_FE_BOUND] / perf_metrics_sum (ss)) -
75 (ss->value[UOP_DROPPING] / perf_metrics_sum (ss));
78 static_always_inline f64
79 backend_bound (perfmon_reading_t *ss)
81 return (ss->value[TD_BE_BOUND] / perf_metrics_sum (ss)) +
82 ((5 * ss->value[RECOVERY_CYCLES]) / perf_metrics_sum (ss));
85 static_always_inline f64
86 fetch_latency (perfmon_reading_t *ss)
88 f64 r = ((5 * ss->value[UOP_NOT_DELIVERED] - ss->value[UOP_DROPPING]) /
89 (f64) ss->value[TD_SLOTS]);
93 static_always_inline f64
94 fetch_bandwidth (perfmon_reading_t *ss)
96 return clib_max (0, frontend_bound (ss) - fetch_latency (ss));
99 static_always_inline f64
100 memory_bound (perfmon_reading_t *ss)
102 return backend_bound (ss) * memory_bound_fraction (ss);
105 static_always_inline f64
106 core_bound (perfmon_reading_t *ss)
108 return backend_bound (ss) - memory_bound (ss);
112 format_topdown_lvl2_icx (u8 *s, va_list *args)
114 perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *);
115 u64 idx = va_arg (*args, int);
124 sv = bad_speculation (ss);
127 sv = frontend_bound (ss);
130 sv = backend_bound (ss);
133 sv = fetch_latency (ss);
136 sv = fetch_bandwidth (ss);
139 sv = memory_bound (ss);
142 sv = core_bound (ss);
146 s = format (s, "%f", sv * 100);
151 PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric_icx) = {
153 .description = "Top-down Microarchitecture Analysis Level 1 & 2",
154 .source = "intel-core",
155 .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
156 .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY,
157 .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL,
158 .events[3] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES,
159 .events[4] = INTEL_CORE_E_INT_MISC_RECOVERY_CYCLES,
160 .events[5] = INTEL_CORE_E_INT_MISC_UOP_DROPPING,
161 .events[6] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CORE,
162 .events[7] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
163 .events[8] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
164 .events[9] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
165 .events[10] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
167 .cpu_supports = topdown_lvl2_cpu_supports_icx,
168 .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports_icx),
169 .format_fn = format_topdown_lvl2_icx,
170 .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% FE.FL",
171 "% FE.FB", "% BE.MB", "% BE.CB"),
172 .footer = "Retiring (RT), Bad Speculation (BS),\n"
173 " FrontEnd bound (FE), BackEnd bound (BE),\n"
174 " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
175 " Memory Bound (MB), Core Bound (CB)",