perfmon: show distribution of uops delivered to frontend 70/35370/4
authorRay Kinsella <mdr@ashroe.eu>
Fri, 18 Feb 2022 04:17:56 +0000 (04:17 +0000)
committerDamjan Marion <dmarion@me.com>
Fri, 18 Feb 2022 14:50:07 +0000 (14:50 +0000)
Breakdown the distribution of uops delivered to the frontend.
Collerates directly with the source of the uops.

Type: improvement

Signed-off-by: Ray Kinsella <mdr@ashroe.eu>
Change-Id: I93a57dbe56dfa0f378527844aa4e63f45a548e55

src/plugins/perfmon/CMakeLists.txt
src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c [moved from src/plugins/perfmon/intel/bundle/frontend_bound_bw.c with 84% similarity]
src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c [new file with mode: 0644]
src/plugins/perfmon/intel/core.h

index 66894b1..65c8e4b 100644 (file)
@@ -34,7 +34,8 @@ add_vpp_plugin(perfmon
   intel/bundle/topdown_metrics.c
   intel/bundle/topdown_icelake.c
   intel/bundle/topdown_tremont.c
-  intel/bundle/frontend_bound_bw.c
+  intel/bundle/frontend_bound_bw_src.c
+  intel/bundle/frontend_bound_bw_uops.c
   intel/bundle/frontend_bound_lat.c
   intel/bundle/iio_bw.c
 
@@ -25,7 +25,7 @@ enum
 };
 
 static u8 *
-format_intel_frontend_bound_bw (u8 *s, va_list *args)
+format_intel_frontend_bound_bw_src (u8 *s, va_list *args)
 {
   perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
   int row = va_arg (*args, int);
@@ -65,12 +65,12 @@ format_intel_frontend_bound_bw (u8 *s, va_list *args)
   return s;
 }
 
-static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports[] = {
+static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_src[] = {
   { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
 };
 
-PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw) = {
-  .name = "td-frontend-bw",
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_src) = {
+  .name = "td-frontend-bw-src",
   .description =
     "Topdown FrontEnd-bound BandWidth - % uops from each uop fetch source",
   .source = "intel-core",
@@ -79,9 +79,9 @@ PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw) = {
   .events[2] = INTEL_CORE_E_IDQ_MITE_UOPS, /* 0x0F */
   .events[3] = INTEL_CORE_E_LSD_UOPS,     /* 0x0F */
   .n_events = 4,
-  .format_fn = format_intel_frontend_bound_bw,
-  .cpu_supports = frontend_bound_bw_cpu_supports,
-  .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports),
+  .format_fn = format_intel_frontend_bound_bw_src,
+  .cpu_supports = frontend_bound_bw_cpu_supports_src,
+  .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_src),
   .column_headers = PERFMON_STRINGS ("UOPs/PKT", "% DSB UOPS", "% MS UOPS",
                                     "% MITE UOPS", "% LSD UOPS"),
   .footer =
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c
new file mode 100644 (file)
index 0000000..6bf08af
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+  THREAD_P,
+  THREE_UOP,
+  TWO_UOP,
+  ONE_UOP,
+  NO_UOP,
+  FOUR_UOP,
+};
+
+static u8 *
+format_intel_frontend_bound_bw_uops (u8 *s, va_list *args)
+{
+  perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+  int row = va_arg (*args, int);
+  f64 sv = 0;
+  f64 cycles = ss->value[THREAD_P];
+
+  switch (row)
+    {
+    case 0:
+      sv = (ss->value[FOUR_UOP] / cycles) * 100;
+      break;
+    case 1:
+      sv = ((ss->value[THREE_UOP] - ss->value[TWO_UOP]) / cycles) * 100;
+      break;
+    case 2:
+      sv = ((ss->value[TWO_UOP] - ss->value[ONE_UOP]) / cycles) * 100;
+      break;
+    case 3:
+      sv = ((ss->value[ONE_UOP] - ss->value[NO_UOP]) / cycles) * 100;
+      break;
+    case 4:
+      sv = (ss->value[NO_UOP] / cycles) * 100;
+      break;
+    }
+
+  s = format (s, "%04.1f", sv);
+
+  return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_uops[] = {
+  { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_uops) = {
+  .name = "td-frontend-bw-uops",
+  .description = "Topdown FrontEnd-bound BandWidth - distribution of "
+                "uops delivered to frontend",
+  .source = "intel-core",
+  .events[0] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* 0x0F */
+  .events[1] =
+    INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV_CORE, /* 0xFF */
+  .events[2] =
+    INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV_CORE, /* 0xFF */
+  .events[3] =
+    INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV_CORE, /* 0xFF */
+  .events[4] =
+    INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV_CORE,     /* 0xFF */
+  .events[5] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_FE_WAS_OK, /* 0xFF */
+  .n_events = 6,
+  .format_fn = format_intel_frontend_bound_bw_uops,
+  .cpu_supports = frontend_bound_bw_cpu_supports_uops,
+  .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_uops),
+  .column_headers = PERFMON_STRINGS ("% 4 UOPS", "% 3 UOPS", "% 2 UOPS",
+                                    "% 1 UOPS", "% 0 UOPS"),
+  .footer =
+    "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+    "Optimization Reference Manual section on the Front End.",
+};
index 1083316..98ab9e5 100644 (file)
   _ (0x9C, 0x01, 0, 0, 0, 0x05, IDQ_UOPS_NOT_DELIVERED, CORE,                 \
      "Uops not delivered to Resource Allocation Table (RAT) per thread when " \
      "backend of the machine is not stalled")                                 \
+  _ (0x9C, 0x01, 0, 0, 1, 0x01, IDQ_UOPS_NOT_DELIVERED, CYCLES_FE_WAS_OK,     \
+     "Cycles with 4 uops delivered by the front end or Resource Allocation "  \
+     "Table (RAT) was stalling FE.x")                                         \
+  _ (0x9C, 0x01, 0, 0, 0, 0x01, IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV,    \
+     CORE, "Cycles with 3 uops delivered by the front end.")                  \
+  _ (0x9C, 0x01, 0, 0, 0, 0x02, IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV,    \
+     CORE, "Cycles with 2 uops delivered by the front end.")                  \
+  _ (0x9C, 0x01, 0, 0, 0, 0x03, IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV,    \
+     CORE, "Cycles with 1 uops delivered by the front end.")                  \
+  _ (0x9C, 0x01, 0, 0, 0, 0x04, IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV,    \
+     CORE, "Cycles with 0 uops delivered by the front end.")                  \
   _ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0,                      \
      "Number of uops executed on port 0")                                     \
   _ (0xA1, 0x02, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_1,                      \