From: Ray Kinsella Date: Thu, 27 Jan 2022 09:55:02 +0000 (+0000) Subject: perfmon: topdown level 1 and 2 for icx X-Git-Tag: v22.10-rc0~441 X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;h=9d0c638b0fa28b9aebd9e3c0c0bdf98361d50a50;p=vpp.git perfmon: topdown level 1 and 2 for icx Topdown level 1 and 2 for Intel Ice Lake (ICX). Limiting topdown support to THREAD for the moment on Ice Lake, as NODE support is still unreliable. Also removing Topdown Level 1 from Sapphire Rapids onwards, as Topdown LeveL 2 also shows Level 1 on Sapphire, and it reduces the overall number of bundles. Type: improvement Signed-off-by: Ray Kinsella Change-Id: Iaa68b711dc8b6fb1090880b411debadb3c37f8bc --- diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt index 05c280f64ac..66894b1c5d0 100644 --- a/src/plugins/perfmon/CMakeLists.txt +++ b/src/plugins/perfmon/CMakeLists.txt @@ -32,6 +32,7 @@ add_vpp_plugin(perfmon intel/bundle/branch_mispred.c intel/bundle/power_license.c intel/bundle/topdown_metrics.c + intel/bundle/topdown_icelake.c intel/bundle/topdown_tremont.c intel/bundle/frontend_bound_bw.c intel/bundle/frontend_bound_lat.c diff --git a/src/plugins/perfmon/intel/bundle/topdown_icelake.c b/src/plugins/perfmon/intel/bundle/topdown_icelake.c new file mode 100644 index 00000000000..a3392e52f0a --- /dev/null +++ b/src/plugins/perfmon/intel/bundle/topdown_icelake.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2022 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +static int +is_icelake () +{ + return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b (); +} + +static perfmon_cpu_supports_t topdown_lvl2_cpu_supports_icx[] = { + { is_icelake, PERFMON_BUNDLE_TYPE_THREAD } +}; + +#define GET_METRIC(m, i) (f64) (((m) >> (i * 8)) & 0xff) + +enum +{ + TD_SLOTS = 0, + STALLS_MEM_ANY, + STALLS_TOTAL, + BOUND_ON_STORES, + RECOVERY_CYCLES, + UOP_DROPPING, + UOP_NOT_DELIVERED, + TD_RETIRING, + TD_BAD_SPEC, + TD_FE_BOUND, + TD_BE_BOUND, +}; + +static_always_inline f64 +memory_bound_fraction (perfmon_reading_t *ss) +{ + return (ss->value[STALLS_MEM_ANY] + ss->value[BOUND_ON_STORES]) / + (f64) (ss->value[STALLS_TOTAL] + ss->value[BOUND_ON_STORES]); +} + +static_always_inline f64 +perf_metrics_sum (perfmon_reading_t *ss) +{ + return ss->value[TD_RETIRING] + ss->value[TD_BAD_SPEC] + + ss->value[TD_FE_BOUND] + ss->value[TD_BE_BOUND]; +} + +static_always_inline f64 +retiring (perfmon_reading_t *ss) +{ + return ss->value[TD_RETIRING] / perf_metrics_sum (ss); +} + +static_always_inline f64 +bad_speculation (perfmon_reading_t *ss) +{ + return ss->value[TD_BAD_SPEC] / perf_metrics_sum (ss); +} + +static_always_inline f64 +frontend_bound (perfmon_reading_t *ss) +{ + return (ss->value[TD_FE_BOUND] / perf_metrics_sum (ss)) - + (ss->value[UOP_DROPPING] / perf_metrics_sum (ss)); +} + +static_always_inline f64 +backend_bound (perfmon_reading_t *ss) +{ + return (ss->value[TD_BE_BOUND] / perf_metrics_sum (ss)) + + ((5 * ss->value[RECOVERY_CYCLES]) / perf_metrics_sum (ss)); +} + +static_always_inline f64 +fetch_latency (perfmon_reading_t *ss) +{ + f64 r = ((5 * ss->value[UOP_NOT_DELIVERED] - ss->value[UOP_DROPPING]) / + (f64) ss->value[TD_SLOTS]); + return r; +} + +static_always_inline f64 +fetch_bandwidth (perfmon_reading_t *ss) +{ + return clib_max (0, frontend_bound (ss) - fetch_latency (ss)); +} + +static_always_inline f64 +memory_bound (perfmon_reading_t *ss) +{ + return backend_bound (ss) * memory_bound_fraction (ss); +} + +static_always_inline f64 +core_bound (perfmon_reading_t *ss) +{ + return backend_bound (ss) - memory_bound (ss); +} + +static u8 * +format_topdown_lvl2_icx (u8 *s, va_list *args) +{ + perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *); + u64 idx = va_arg (*args, int); + f64 sv = 0; + + switch (idx) + { + case 0: + sv = retiring (ss); + break; + case 1: + sv = bad_speculation (ss); + break; + case 2: + sv = frontend_bound (ss); + break; + case 3: + sv = backend_bound (ss); + break; + case 4: + sv = fetch_latency (ss); + break; + case 5: + sv = fetch_bandwidth (ss); + break; + case 6: + sv = memory_bound (ss); + break; + case 7: + sv = core_bound (ss); + break; + } + + s = format (s, "%f", sv * 100); + + return s; +} + +PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric_icx) = { + .name = "topdown", + .description = "Top-down Microarchitecture Analysis Level 1 & 2", + .source = "intel-core", + .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS, + .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, + .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL, + .events[3] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES, + .events[4] = INTEL_CORE_E_INT_MISC_RECOVERY_CYCLES, + .events[5] = INTEL_CORE_E_INT_MISC_UOP_DROPPING, + .events[6] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CORE, + .events[7] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC, + .events[8] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC, + .events[9] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC, + .events[10] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC, + .n_events = 11, + .cpu_supports = topdown_lvl2_cpu_supports_icx, + .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports_icx), + .format_fn = format_topdown_lvl2_icx, + .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% FE.FL", + "% FE.FB", "% BE.MB", "% BE.CB"), + .footer = "Retiring (RT), Bad Speculation (BS),\n" + " FrontEnd bound (FE), BackEnd bound (BE),\n" + " Fetch Latency (FL), Fetch Bandwidth (FB),\n" + " Memory Bound (MB), Core Bound (CB)", +}; diff --git a/src/plugins/perfmon/intel/bundle/topdown_metrics.c b/src/plugins/perfmon/intel/bundle/topdown_metrics.c index a28c4e7ca52..a464dfe1c88 100644 --- a/src/plugins/perfmon/intel/bundle/topdown_metrics.c +++ b/src/plugins/perfmon/intel/bundle/topdown_metrics.c @@ -79,66 +79,6 @@ topdown_lvl1_rdpmc_metric (void *ps, topdown_e_t e) return (slots_t1 / slots_delta) * 100; } -static u8 * -format_topdown_lvl1 (u8 *s, va_list *args) -{ - void *ps = va_arg (*args, void *); - u64 idx = va_arg (*args, int); - perfmon_bundle_type_t type = va_arg (*args, perfmon_bundle_type_t); - f64 sv = 0; - - topdown_lvl1_parse_fn_t *parse_fn, - *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl1_rdpmc_metric, - topdown_lvl1_perf_reading, 0 }; - parse_fn = parse_fns[type]; - ASSERT (parse_fn); - - switch (idx) - { - case 0: - sv = - parse_fn (ps, TOPDOWN_E_BAD_SPEC) + parse_fn (ps, TOPDOWN_E_RETIRING); - break; - case 1: - sv = - parse_fn (ps, TOPDOWN_E_BE_BOUND) + parse_fn (ps, TOPDOWN_E_FE_BOUND); - break; - default: - sv = parse_fn (ps, (topdown_e_t) idx - 2); - break; - } - - s = format (s, "%f", sv); - - return s; -} - -static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = { - /* Intel ICX supports papi/thread or rdpmc/node */ - { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE_OR_THREAD } -}; - -PERFMON_REGISTER_BUNDLE (topdown_lvl1_metric) = { - .name = "topdown-level1", - .description = "Top-down Microarchitecture Analysis Level 1", - .source = "intel-core", - .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS, - .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC, - .events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC, - .events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC, - .events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC, - .n_events = 5, - .preserve_samples = 0x1F, - .cpu_supports = topdown_lvl1_cpu_supports, - .n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports), - .format_fn = format_topdown_lvl1, - .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS", - "% ST.FE", "% ST.BE"), - .footer = "Not Stalled (NS),STalled (ST),\n" - " Retiring (RT), Bad Speculation (BS),\n" - " FrontEnd bound (FE), BackEnd bound (BE)", -}; - /* Convert the TopDown enum to the perf reading index */ #define TO_LVL2_PERF_IDX(e) \ ({ \ @@ -245,8 +185,8 @@ static perfmon_cpu_supports_t topdown_lvl2_cpu_supports[] = { }; PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric) = { - .name = "topdown-level2", - .description = "Top-down Microarchitecture Analysis Level 2", + .name = "topdown", + .description = "Top-down Microarchitecture Analysis Level 1 & 2", .source = "intel-core", .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS, .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC, diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h index 971dc3465fa..108331674db 100644 --- a/src/plugins/perfmon/intel/core.h +++ b/src/plugins/perfmon/intel/core.h @@ -146,7 +146,10 @@ _ (0x83, 0x04, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_STALL, \ "Cycles where a code fetch is stalled due to L1 instruction cache tag " \ "miss.") \ - _ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \ + _ (0x83, 0x02, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_MISS, \ + "Instruction fetch tag lookups that miss in the instruction cache " \ + "(L1I). Counts at 64-byte cache-line granularity.") \ + _ (0x9C, 0x01, 0, 0, 0, 0x05, IDQ_UOPS_NOT_DELIVERED, CORE, \ "Uops not delivered to Resource Allocation Table (RAT) per thread when " \ "backend of the machine is not stalled") \ _ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0, \