perfmon: new perfmon plugin

author Damjan Marion <damarion@cisco.com>

Fri, 27 Nov 2020 19:15:17 +0000 (20:15 +0100)

committer Florin Coras <florin.coras@gmail.com>

Fri, 18 Dec 2020 17:20:28 +0000 (17:20 +0000)
author Damjan Marion <damarion@cisco.com>
Fri, 27 Nov 2020 19:15:17 +0000 (20:15 +0100)
committer Florin Coras <florin.coras@gmail.com>
Fri, 18 Dec 2020 17:20:28 +0000 (17:20 +0000)
diff --git a/MAINTAINERS b/MAINTAINERS

index 19a5763..a3d582f 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -461,6 +461,11 @@ I: marvell
  M:     Damjan Marion <damarion@cisco.com>
  F:     src/plugins/marvell/
  
+Plugin - performance counter
+I:     perfmon
+M:     Damjan Marion <damarion@cisco.com>
+F:     src/plugins/perfmon/
+
  Plugin - PPPoE
  I:     pppoe
  M:     Hongjun Ni <hongjun.ni@intel.com>
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt

new file mode 100644 (file)

index 0000000..c0d39a3
--- /dev/null
+++ b/src/plugins/perfmon/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright (c) 2020 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+  return()
+endif()
+
+add_vpp_plugin(perfmon
+  SOURCES
+  cli.c
+  dispatch_wrapper.c
+  linux.c
+  perfmon.c
+  table.c
+  intel/core.c
+  intel/uncore.c
+  intel/bundle/inst_and_clock.c
+  intel/bundle/load_blocks.c
+  intel/bundle/mem_bw.c
+)
diff --git a/src/plugins/perfmon/cli.c b/src/plugins/perfmon/cli.c

new file mode 100644 (file)

index 0000000..cb7debe
--- /dev/null
+++ b/src/plugins/perfmon/cli.c
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/table.h>
+
+uword
+unformat_perfmon_bundle_name (unformat_input_t *input, va_list *args)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  perfmon_bundle_t **b = va_arg (*args, perfmon_bundle_t **);
+  uword *p;
+  u8 *str = 0;
+
+  if (unformat (input, "%s", &str) == 0)
+    return 0;
+
+  p = hash_get_mem (pm->bundle_by_name, str);
+
+  if (p)
+    b[0] = (perfmon_bundle_t *) p[0];
+
+  vec_free (str);
+  return p ? 1 : 0;
+}
+
+uword
+unformat_perfmon_source_name (unformat_input_t *input, va_list *args)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  perfmon_source_t **b = va_arg (*args, perfmon_source_t **);
+  uword *p;
+  u8 *str = 0;
+
+  if (unformat (input, "%s", &str) == 0)
+    return 0;
+
+  p = hash_get_mem (pm->source_by_name, str);
+
+  if (p)
+    b[0] = (perfmon_source_t *) p[0];
+
+  vec_free (str);
+  return p ? 1 : 0;
+}
+
+u8 *
+format_perfmon_bundle (u8 *s, va_list *args)
+{
+  perfmon_bundle_t *b = va_arg (*args, perfmon_bundle_t *);
+  int verbose = va_arg (*args, int);
+
+  const char *bundle_type[] = {
+    [PERFMON_BUNDLE_TYPE_NODE] = "node",
+    [PERFMON_BUNDLE_TYPE_THREAD] = "thread",
+    [PERFMON_BUNDLE_TYPE_SYSTEM] = "system",
+  };
+
+  if (b == 0)
+    return format (s, "%-20s%-10s%-20s%s", "Name", "Type", "Source",
+                  "Description");
+
+  if (verbose)
+    {
+      s = format (s, "name: %s\n", b->name);
+      s = format (s, "description: %s\n", b->description);
+      s = format (s, "source: %s\n", b->src->name);
+      for (int i = 0; i < b->n_events; i++)
+       {
+         perfmon_event_t *e = b->src->events + b->events[i];
+         s = format (s, "event %u: %s\n", i, e->name);
+       }
+    }
+  else
+    s = format (s, "%-20s%-10s%-20s%s", b->name, bundle_type[b->type],
+               b->src->name, b->description);
+
+  return s;
+}
+
+static clib_error_t *
+show_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                               vlib_cli_command_t *cmd)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  unformat_input_t _line_input, *line_input = &_line_input;
+  perfmon_bundle_t *b = 0, **vb = 0;
+  int verbose = 0;
+
+  if (unformat_user (input, unformat_line_input, line_input))
+    {
+      while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+       {
+         if (unformat (line_input, "verbose"))
+           verbose = 1;
+         else if (unformat (line_input, "%U", unformat_perfmon_bundle_name,
+                            &b))
+           vec_add (vb, &b, 1);
+         else
+           return clib_error_return (0, "unknown input `%U'",
+                                     format_unformat_error, line_input);
+       }
+      unformat_free (line_input);
+    }
+
+  if (vb == 0)
+    {
+      char *key;
+      hash_foreach_mem (key, b, pm->bundle_by_name, vec_add (vb, &b, 1););
+    }
+  else
+    verbose = 1;
+
+  if (verbose == 0)
+    vlib_cli_output (vm, "%U\n", format_perfmon_bundle, 0, 0);
+
+  for (int i = 0; i < vec_len (vb); i++)
+    vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], verbose);
+
+  vec_free (vb);
+  return 0;
+}
+
+VLIB_CLI_COMMAND (show_perfmon_bundle_command, static) = {
+  .path = "show perfmon bundle",
+  .short_help = "show perfmon bundle [<bundle-name>] [verbose]",
+  .function = show_perfmon_bundle_command_fn,
+  .is_mp_safe = 1,
+};
+
+u8 *
+format_perfmon_source (u8 *s, va_list *args)
+{
+  perfmon_source_t *src = va_arg (*args, perfmon_source_t *);
+  int verbose = va_arg (*args, int);
+
+  if (src == 0)
+    return format (s, "%-20s%-9s %s", "Name", "NumEvents", "Description");
+
+  if (verbose)
+    {
+      s = format (s, "name:        %s\n", src->name);
+      s = format (s, "description: %s\n", src->description);
+      s = format (s, "Events:\n");
+      for (int i = 0; i < src->n_events; i++)
+       {
+         perfmon_event_t *e = src->events + i;
+         s = format (s, "  %s", e->name);
+         if (src->format_config)
+           s = format (s, " (%U)\n", src->format_config, e->config);
+         else
+           s = format (s, " (0x%x)\n", e->config);
+         if (e->description)
+           s = format (s, "    %s\n", e->description);
+       }
+
+      if (src->instances_by_type)
+       {
+         s = format (s, "Instances:\n");
+         for (int i = 0; i < vec_len (src->instances_by_type); i++)
+           {
+             perfmon_instance_type_t *it;
+             it = vec_elt_at_index (src->instances_by_type, i);
+             if (vec_len (it->instances) == 0)
+               continue;
+             s = format (s, "  %s:\n   ", it->name);
+             for (int j = 0; j < vec_len (it->instances); j++)
+               {
+                 perfmon_instance_t *in = vec_elt_at_index (it->instances, j);
+                 s = format (s, " %s", in->name);
+               }
+             s = format (s, "\n");
+           }
+       }
+    }
+  else
+    s = format (s, "%-20s%9u %s", src->name, src->n_events, src->description);
+
+  return s;
+}
+
+static clib_error_t *
+show_perfmon_source_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                               vlib_cli_command_t *cmd)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  unformat_input_t _line_input, *line_input = &_line_input;
+  perfmon_source_t *s = 0, **vs = 0;
+  int verbose = 0;
+
+  if (unformat_user (input, unformat_line_input, line_input))
+    {
+      while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+       {
+         if (unformat (line_input, "verbose"))
+           verbose = 1;
+         else if (unformat (line_input, "%U", unformat_perfmon_source_name,
+                            &s))
+           vec_add (vs, &s, 1);
+         else
+           return clib_error_return (0, "unknown input `%U'",
+                                     format_unformat_error, line_input);
+       }
+      unformat_free (line_input);
+    }
+
+  if (vs == 0)
+    {
+      char *key;
+      hash_foreach_mem (key, s, pm->source_by_name, vec_add (vs, &s, 1););
+    }
+  else
+    verbose = 1;
+
+  if (verbose == 0)
+    vlib_cli_output (vm, "%U\n", format_perfmon_source, 0, 0);
+
+  for (int i = 0; i < vec_len (vs); i++)
+    vlib_cli_output (vm, "%U\n", format_perfmon_source, vs[i], verbose);
+
+  vec_free (vs);
+  return 0;
+}
+
+VLIB_CLI_COMMAND (show_perfmon_source_command, static) = {
+  .path = "show perfmon source",
+  .short_help = "show perfmon source [<source-name>] [verbose]",
+  .function = show_perfmon_source_command_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_perfmon_active_bundle_command_fn (vlib_main_t *vm,
+                                      unformat_input_t *input,
+                                      vlib_cli_command_t *cmd)
+{
+  perfmon_main_t *pm = &perfmon_main;
+
+  vlib_cli_output (vm, "%U\n", format_perfmon_bundle, pm->active_bundle, 1);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (show_perfmon_active_bundle_command, static) = {
+  .path = "show perfmon active-bundle",
+  .short_help = "show perfmon active-bundle",
+  .function = show_perfmon_active_bundle_command_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                              vlib_cli_command_t *cmd)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  perfmon_bundle_t *b = pm->active_bundle;
+  clib_error_t *err = 0;
+  table_t table = {}, *t = &table;
+  u32 n_instances;
+  perfmon_reading_t *r, *readings = 0;
+  perfmon_instance_type_t *it = pm->active_instance_type;
+  perfmon_instance_t *in;
+  u8 *s = 0;
+  int n_row = 0;
+
+  if (b == 0)
+    return clib_error_return (0, "no budle selected");
+
+  n_instances = vec_len (it->instances);
+  vec_validate (readings, n_instances - 1);
+
+  for (int i = 0; i < n_instances; i++)
+    {
+      in = vec_elt_at_index (it->instances, i);
+      r = vec_elt_at_index (readings, i);
+
+      if (read (pm->group_fds[i], r, (b->n_events + 3) * sizeof (u64)) == -1)
+       {
+         err = clib_error_return_unix (0, "read");
+         goto done;
+       }
+    }
+
+  table_format_title (t, "%s", b->description);
+
+  table_add_header_col (t, 0);
+  table_add_header_row (t, 0);
+
+  if (b->column_headers)
+    {
+      char **hdr = b->column_headers;
+      while (hdr[0])
+       table_format_cell (t, -1, n_row++, "%s", hdr++[0]);
+    }
+
+  int col = 0;
+  for (int i = 0; i < n_instances; i++)
+    {
+      in = vec_elt_at_index (it->instances, i);
+      r = vec_elt_at_index (readings, i);
+      table_format_cell (t, col, -1, "%s", in->name);
+      if (b->type == PERFMON_BUNDLE_TYPE_NODE)
+       {
+         perfmon_thread_runtime_t *tr;
+         tr = vec_elt_at_index (pm->thread_runtimes, i);
+         for (int j = 0; j < tr->n_nodes; j++)
+           if (tr->node_stats[j].n_calls)
+             {
+               perfmon_node_stats_t ns;
+               table_format_cell (t, ++col, -1, "%U", format_vlib_node_name,
+                                  vm, j);
+               table_set_cell_align (t, col, -1, TTAA_RIGHT);
+               table_set_cell_fg_color (t, col, -1, TTAC_CYAN);
+               clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
+               for (int j = 0; j < n_row; j++)
+                 table_format_cell (t, col, j, "%U", b->format_fn, &ns, j);
+             }
+       }
+      else
+       {
+         for (int j = 0; j < n_row; j++)
+           table_format_cell (t, i, j, "%U", b->format_fn, r, j);
+       }
+      col++;
+    }
+
+  vlib_cli_output (vm, "%U\n", format_table, t);
+  table_free (t);
+
+  if (b->footer)
+    vlib_cli_output (vm, "\n%s\n", b->footer);
+
+done:
+  vec_free (readings);
+  vec_free (s);
+  return err;
+}
+
+VLIB_CLI_COMMAND (show_perfmon_stats_command, static) = {
+  .path = "show perfmon statistics",
+  .short_help = "show perfmon statistics",
+  .function = show_perfmon_stats_command_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+set_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                              vlib_cli_command_t *cmd)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  unformat_input_t _line_input, *line_input = &_line_input;
+  perfmon_bundle_t *b = 0;
+
+  if (unformat_user (input, unformat_line_input, line_input) == 0)
+    return clib_error_return (0, "please specify bundle name");
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%U", unformat_perfmon_bundle_name, &b))
+       ;
+      else
+       return clib_error_return (0, "unknown input '%U'",
+                                 format_unformat_error, line_input);
+    }
+  unformat_free (line_input);
+
+  if (b == 0)
+    return clib_error_return (0, "please specify bundle name");
+
+  if (pm->is_running)
+    return clib_error_return (0, "please stop first");
+
+  return perfmon_set (vm, b);
+}
+
+VLIB_CLI_COMMAND (set_perfmon_bundle_command, static) = {
+  .path = "set perfmon bundle",
+  .short_help = "set perfmon bundle [<bundle-name>]",
+  .function = set_perfmon_bundle_command_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+perfmon_reset_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                         vlib_cli_command_t *cmd)
+{
+  perfmon_reset (vm);
+  return 0;
+}
+
+VLIB_CLI_COMMAND (perfmon_reset_command, static) = {
+  .path = "perfmon reset",
+  .short_help = "perfmon reset",
+  .function = perfmon_reset_command_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+perfmon_start_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                         vlib_cli_command_t *cmd)
+{
+  return perfmon_start (vm);
+}
+
+VLIB_CLI_COMMAND (perfmon_start_command, static) = {
+  .path = "perfmon start",
+  .short_help = "perfmon start",
+  .function = perfmon_start_command_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+perfmon_stop_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                        vlib_cli_command_t *cmd)
+{
+  return perfmon_stop (vm);
+}
+
+VLIB_CLI_COMMAND (perfmon_stop_command, static) = {
+  .path = "perfmon stop",
+  .short_help = "perfmon stop",
+  .function = perfmon_stop_command_fn,
+  .is_mp_safe = 1,
+};
diff --git a/src/plugins/perfmon/dispatch_wrapper.c b/src/plugins/perfmon/dispatch_wrapper.c

new file mode 100644 (file)

index 0000000..4ae9c77
--- /dev/null
+++ b/src/plugins/perfmon/dispatch_wrapper.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+#include <sys/ioctl.h>
+
+#include <perfmon/perfmon.h>
+
+static_always_inline void
+perfmon_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
+{
+  switch (n_counters)
+    {
+    default:
+    case 7:
+      counters[6] = _rdpmc (pmc_index[6]);
+    case 6:
+      counters[5] = _rdpmc (pmc_index[5]);
+    case 5:
+      counters[4] = _rdpmc (pmc_index[4]);
+    case 4:
+      counters[3] = _rdpmc (pmc_index[3]);
+    case 3:
+      counters[2] = _rdpmc (pmc_index[2]);
+    case 2:
+      counters[1] = _rdpmc (pmc_index[1]);
+    case 1:
+      counters[0] = _rdpmc (pmc_index[0]);
+      break;
+    }
+}
+
+static_always_inline int
+perfmon_calc_pmc_index (perfmon_thread_runtime_t *tr, u8 i)
+{
+  return (int) (tr->mmap_pages[i]->index + tr->mmap_pages[i]->offset);
+}
+
+uword
+perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
+                         vlib_frame_t *frame)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  perfmon_thread_runtime_t *rt =
+    vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
+  perfmon_node_stats_t *s =
+    vec_elt_at_index (rt->node_stats, node->node_index);
+  u8 n_events = rt->n_events;
+  int pmc_index[PERF_MAX_EVENTS];
+  u64 before[PERF_MAX_EVENTS];
+  u64 after[PERF_MAX_EVENTS];
+  uword rv;
+
+  clib_prefetch_load (s);
+
+  switch (n_events)
+    {
+    default:
+    case 7:
+      pmc_index[6] = perfmon_calc_pmc_index (rt, 6);
+    case 6:
+      pmc_index[5] = perfmon_calc_pmc_index (rt, 5);
+    case 5:
+      pmc_index[4] = perfmon_calc_pmc_index (rt, 4);
+    case 4:
+      pmc_index[3] = perfmon_calc_pmc_index (rt, 3);
+    case 3:
+      pmc_index[2] = perfmon_calc_pmc_index (rt, 2);
+    case 2:
+      pmc_index[1] = perfmon_calc_pmc_index (rt, 1);
+    case 1:
+      pmc_index[0] = perfmon_calc_pmc_index (rt, 0);
+      break;
+    }
+
+  perfmon_read_pmcs (before, pmc_index, n_events);
+  rv = node->function (vm, node, frame);
+  perfmon_read_pmcs (after, pmc_index, n_events);
+
+  if (rv == 0)
+    return rv;
+
+  s->n_calls += 1;
+  s->n_packets += rv;
+  for (int i = 0; i < n_events; i++)
+    s->value[i] += after[i] - before[i];
+
+  return rv;
+}
diff --git a/src/plugins/perfmon/intel/bundle/inst_and_clock.c b/src/plugins/perfmon/intel/bundle/inst_and_clock.c

new file mode 100644 (file)

index 0000000..e08d21a
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/inst_and_clock.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+static u8 *
+format_inst_and_clock (u8 *s, va_list *args)
+{
+  perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+  int row = va_arg (*args, int);
+
+  switch (row)
+    {
+    case 0:
+      s = format (s, "%lu", ns->n_calls);
+      break;
+    case 1:
+      s = format (s, "%lu", ns->n_packets);
+      break;
+    case 2:
+      s = format (s, "%.2f", (f64) ns->n_packets / ns->n_calls);
+      break;
+    case 3:
+      s = format (s, "%.2f", (f64) ns->value[1] / ns->n_packets);
+      break;
+    case 4:
+      s = format (s, "%.2f", (f64) ns->value[0] / ns->n_packets);
+      break;
+    case 5:
+      s = format (s, "%.2f", (f64) ns->value[0] / ns->value[1]);
+      break;
+    }
+  return s;
+}
+
+PERFMON_REGISTER_BUNDLE (inst_and_clock) = {
+  .name = "inst-and-clock",
+  .description = "instructions/packet, cycles/packet and IPC",
+  .source = "intel-core",
+  .type = PERFMON_BUNDLE_TYPE_NODE,
+  .events[0] = INTEL_CORE_E_INST_RETIRED_ANY_P,
+  .events[1] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P,
+  .events[2] = INTEL_CORE_E_CPU_CLK_UNHALTED_REF_TSC,
+  .n_events = 3,
+  .format_fn = format_inst_and_clock,
+  .column_headers = PERFMON_STRINGS ("Calls", "Packets", "Packets/Call",
+                                    "Clocks/Packet", "Instructions/Packet",
+                                    "IPC"),
+};
diff --git a/src/plugins/perfmon/intel/bundle/load_blocks.c b/src/plugins/perfmon/intel/bundle/load_blocks.c

new file mode 100644 (file)

index 0000000..d02ef3a
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/load_blocks.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+static u8 *
+format_load_blocks (u8 *s, va_list *args)
+{
+  perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+  int row = va_arg (*args, int);
+
+  switch (row)
+    {
+    case 0:
+      s = format (s, "%12lu", ns->n_calls);
+      break;
+    case 1:
+      s = format (s, "%12lu", ns->n_packets);
+      break;
+    case 2:
+      s = format (s, "%9.2f", (f64) ns->value[0] / ns->n_calls);
+      break;
+    case 3:
+      s = format (s, "%9.2f", (f64) ns->value[1] / ns->n_calls);
+      break;
+    case 4:
+      s = format (s, "%9.2f", (f64) ns->value[2] / ns->n_calls);
+      break;
+    }
+  return s;
+}
+
+PERFMON_REGISTER_BUNDLE (load_blocks) = {
+  .name = "load-blocks",
+  .description = "load operations blocked due to various uarch reasons",
+  .source = "intel-core",
+  .type = PERFMON_BUNDLE_TYPE_NODE,
+  .events[0] = INTEL_CORE_E_LD_BLOCKS_STORE_FORWARD,
+  .events[1] = INTEL_CORE_E_LD_BLOCKS_NO_SR,
+  .events[2] = INTEL_CORE_E_LD_BLOCKS_PARTIAL_ADDRESS_ALIAS,
+  .n_events = 3,
+  .format_fn = format_load_blocks,
+  .column_headers = PERFMON_STRINGS ("Calls", "Packets", "[1]", "[2]", "[3]"),
+  .footer = "Per node call statistics:\n"
+           "[1] Loads blocked due to overlapping with a preceding store that "
+           "cannot be forwarded.\n"
+           "[2] The number of times that split load operations are "
+           "temporarily blocked because\n"
+           "    all resources for handling the split accesses are in use\n"
+           "[3] False dependencies in Memory Order Buffer (MOB) due to "
+           "partial compare on address.\n",
+};
diff --git a/src/plugins/perfmon/intel/bundle/mem_bw.c b/src/plugins/perfmon/intel/bundle/mem_bw.c

new file mode 100644 (file)

index 0000000..672dbb0
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/mem_bw.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/uncore.h>
+
+static u8 *
+format_intel_uncore_imc_bw (u8 *s, va_list *args)
+{
+  perfmon_reading_t *r = va_arg (*args, perfmon_reading_t *);
+  int col = va_arg (*args, int);
+  f64 tr = r->time_running * 1e-9;
+
+  switch (col)
+    {
+    case 0:
+      s = format (s, "%9.2f", tr);
+      break;
+    case 1:
+      if (r->time_running)
+       s = format (s, "%9.2f", (f64) r->value[0] * 64 * 1e-6 / tr);
+      break;
+    case 2:
+      if (r->time_running)
+       s = format (s, "%9.2f", (f64) r->value[1] * 64 * 1e-6 / tr);
+      break;
+    case 3:
+      if (r->time_running)
+       s = format (s, "%9.2f",
+                   (f64) (r->value[0] + r->value[1]) * 64 * 1e-6 / tr);
+      break;
+    default:
+      break;
+    }
+
+  return s;
+}
+
+PERFMON_REGISTER_BUNDLE (intel_uncore_imc_bw) = {
+  .name = "memory-bandwidth",
+  .description = "memory reads and writes per memory controller channel",
+  .source = "intel-uncore",
+  .type = PERFMON_BUNDLE_TYPE_SYSTEM,
+  .events[0] = INTEL_UNCORE_E_IMC_UNC_M_CAS_COUNT_RD,
+  .events[1] = INTEL_UNCORE_E_IMC_UNC_M_CAS_COUNT_WR,
+  .n_events = 2,
+  .format_fn = format_intel_uncore_imc_bw,
+  .column_headers = PERFMON_STRINGS ("RunTime", "Reads (MB/s)",
+                                    "Writes (MB/s)", "Total (MB/s)"),
+};
diff --git a/src/plugins/perfmon/intel/core.c b/src/plugins/perfmon/intel/core.c

new file mode 100644 (file)

index 0000000..b4267e8
--- /dev/null
+++ b/src/plugins/perfmon/intel/core.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+#include <linux/perf_event.h>
+
+static perfmon_event_t events[] = {
+#define _(event, umask, edge, any, inv, cmask, n, suffix, desc)               \
+  [INTEL_CORE_E_##n##_##suffix] = {                                           \
+    .type = PERF_TYPE_RAW,                                                    \
+    .config = PERF_INTEL_CODE (event, umask, edge, any, inv, cmask),          \
+    .name = #n "." #suffix,                                                   \
+    .description = desc,                                                      \
+  },
+
+  foreach_perf_intel_core_event
+#undef _
+};
+
+u8 *
+format_intel_core_config (u8 *s, va_list *args)
+{
+  u64 config = va_arg (*args, u64);
+  u8 v;
+
+  s = format (s, "event=0x%02x, umask=0x%02x", config & 0xff,
+             (config >> 8) & 0xff);
+
+  if ((v = (config >> 18) & 1))
+    s = format (s, ", edge=%u", v);
+
+  if ((v = (config >> 19) & 1))
+    s = format (s, ", pc=%u", v);
+
+  if ((v = (config >> 21) & 1))
+    s = format (s, ", any=%u", v);
+
+  if ((v = (config >> 23) & 1))
+    s = format (s, ", inv=%u", v);
+
+  if ((v = (config >> 24) & 0xff))
+    s = format (s, ", cmask=0x%02x", v);
+
+  return s;
+}
+
+static clib_error_t *
+intel_core_init (vlib_main_t *vm, perfmon_source_t *src)
+{
+  u32 eax, ebx, ecx, edx;
+  if (__get_cpuid (0, &eax, &ebx, &ecx, &edx) == 0)
+    return clib_error_return (0, "unknown CPU (missing cpuid)");
+
+  // GenuineIntel
+  if (ebx != 0x756e6547 || ecx != 0x6c65746e || edx != 0x49656e69)
+    return clib_error_return (0, "not a IA-32 CPU");
+  return 0;
+}
+
+PERFMON_REGISTER_SOURCE (intel_core) = {
+  .name = "intel-core",
+  .description = "intel arch core events",
+  .events = events,
+  .n_events = ARRAY_LEN (events),
+  .init_fn = intel_core_init,
+  .format_config = format_intel_core_config,
+};
diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h

new file mode 100644 (file)

index 0000000..d7fd9e0
--- /dev/null
+++ b/src/plugins/perfmon/intel/core.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __perfmon_intel_h
+#define __perfmon_intel_h
+
+#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask)                  \
+  ((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 |        \
+   (cmask) << 24)
+
+/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
+ * counter_unit, name, suffix, description */
+#define foreach_perf_intel_core_event                                         \
+  _ (0x00, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD,                     \
+     "Core cycles when the thread is not in halt state")                      \
+  _ (0x00, 0x03, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, REF_TSC,                    \
+     "Reference cycles when the core is not in halt state.")                  \
+  _ (0x03, 0x02, 0, 0, 0, 0x00, LD_BLOCKS, STORE_FORWARD,                     \
+     "Loads blocked due to overlapping with a preceding store that cannot be" \
+     " forwarded.")                                                           \
+  _ (0x03, 0x08, 0, 0, 0, 0x00, LD_BLOCKS, NO_SR,                             \
+     "The number of times that split load operations are temporarily "        \
+     "blocked "                                                               \
+     "because all resources for handling the split accesses are in use.")     \
+  _ (0x07, 0x01, 0, 0, 0, 0x00, LD_BLOCKS_PARTIAL, ADDRESS_ALIAS,             \
+     "False dependencies in MOB due to partial compare on address.")          \
+  _ (0x08, 0x01, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, MISS_CAUSES_A_WALK,         \
+     "Load misses in all DTLB levels that cause page walks")                  \
+  _ (0x08, 0x02, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED_4K,          \
+     "Page walk completed due to a demand data load to a 4K page")            \
+  _ (0x08, 0x04, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED_2M_4M,       \
+     "Page walk completed due to a demand data load to a 2M/4M page")         \
+  _ (0x08, 0x08, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED_1G,          \
+     "Page walk completed due to a demand data load to a 1G page")            \
+  _ (0x08, 0x0E, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_COMPLETED,             \
+     "Load miss in all TLB levels causes a page walk that completes. (All "   \
+     "page sizes)")                                                           \
+  _ (0x08, 0x10, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, WALK_PENDING,               \
+     "Counts 1 per cycle for each PMH that is busy with a page walk for a "   \
+     "load. EPT page walk duration are excluded in Skylake.")                 \
+  _ (0x08, 0x20, 0, 0, 0, 0x00, DTLB_LOAD_MISSES, STLB_HIT,                   \
+     "Loads that miss the DTLB and hit the STLB.")                            \
+  _ (0x0D, 0x01, 0, 0, 0, 0x00, INT_MISC, RECOVERY_CYCLES,                    \
+     "Core cycles the allocator was stalled due to recovery from earlier "    \
+     "clear event for this thread (e.g. misprediction or memory nuke)")       \
+  _ (0x0E, 0x01, 0, 0, 0, 0x00, UOPS_ISSUED, ANY,                             \
+     "Uops that Resource Allocation Table (RAT) issues to Reservation "       \
+     "Station (RS)")                                                          \
+  _ (0x28, 0x07, 0, 0, 0, 0x00, CORE_POWER, LVL0_TURBO_LICENSE,               \
+     "Core cycles where the core was running in a manner where Turbo may be " \
+     "clipped to the Non-AVX turbo schedule.")                                \
+  _ (0x28, 0x18, 0, 0, 0, 0x00, CORE_POWER, LVL1_TURBO_LICENSE,               \
+     "Core cycles where the core was running in a manner where Turbo may be " \
+     "clipped to the AVX2 turbo schedule.")                                   \
+  _ (0x28, 0x20, 0, 0, 0, 0x00, CORE_POWER, LVL2_TURBO_LICENSE,               \
+     "Core cycles where the core was running in a manner where Turbo may be " \
+     "clipped to the AVX512 turbo schedule.")                                 \
+  _ (0x28, 0x40, 0, 0, 0, 0x00, CORE_POWER, THROTTLE,                         \
+     "Core cycles the core was throttled due to a pending power level "       \
+     "request.")                                                              \
+  _ (0x3C, 0x00, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD_P,                   \
+     "Thread cycles when thread is not in halt state")                        \
+  _ (0x3C, 0x00, 0, 1, 0, 0x00, CPU_CLK_UNHALTED, THREAD_P_ANY,               \
+     "Core cycles when at least one thread on the physical core is not in "   \
+     "halt state.")                                                           \
+  _ (0x3C, 0x00, 1, 0, 0, 0x01, CPU_CLK_UNHALTED, RING0_TRANS,                \
+     "Counts when there is a transition from ring 1, 2 or 3 to ring 0.")      \
+  _ (0x48, 0x01, 0, 0, 0, 0x01, L1D_PEND_MISS, PENDING_CYCLES,                \
+     "Cycles with L1D load Misses outstanding.")                              \
+  _ (0x48, 0x01, 0, 0, 0, 0x00, L1D_PEND_MISS, PENDING,                       \
+     "L1D miss outstandings duration in cycles")                              \
+  _ (0x48, 0x02, 0, 0, 0, 0x00, L1D_PEND_MISS, FB_FULL,                       \
+     "Number of times a request needed a FB entry but there was no entry "    \
+     "available for it. That is the FB unavailability was dominant reason "   \
+     "for blocking the request. A request includes cacheable/uncacheable "    \
+     "demands that is load, store or SW prefetch.")                           \
+  _ (0x51, 0x01, 0, 0, 0, 0x00, L1D, REPLACEMENT,                             \
+     "L1D data line replacements")                                            \
+  _ (0x51, 0x04, 0, 0, 0, 0x00, L1D, M_EVICT, "L1D data line evictions")      \
+  _ (0x83, 0x02, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_MISS,                       \
+     "Instruction fetch tag lookups that miss in the instruction cache "      \
+     "(L1I). Counts at 64-byte cache-line granularity.")                      \
+  _ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE,                 \
+     "Uops not delivered to Resource Allocation Table (RAT) per thread when " \
+     "backend of the machine is not stalled")                                 \
+  _ (0xC0, 0x00, 0, 0, 0, 0x00, INST_RETIRED, ANY_P,                          \
+     "Number of instructions retired. General Counter - architectural event") \
+  _ (0xC2, 0x02, 0, 0, 0, 0x00, UOPS_RETIRED, RETIRE_SLOTS,                   \
+     "Retirement slots used.")                                                \
+  _ (0xD0, 0x81, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_LOADS,                  \
+     "All retired load instructions.")                                        \
+  _ (0xD0, 0x82, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_STORES,                 \
+     "All retired store instructions.")                                       \
+  _ (0xD1, 0x01, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L1_HIT,                     \
+     "Retired load instructions with L1 cache hits as data sources")          \
+  _ (0xD1, 0x02, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L2_HIT,                     \
+     "Retired load instructions with L2 cache hits as data sources")          \
+  _ (0xD1, 0x04, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L3_HIT,                     \
+     "Retired load instructions with L3 cache hits as data sources")          \
+  _ (0xD1, 0x08, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L1_MISS,                    \
+     "Retired load instructions missed L1 cache as data sources")             \
+  _ (0xD1, 0x10, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L2_MISS,                    \
+     "Retired load instructions missed L2 cache as data sources")             \
+  _ (0xD1, 0x20, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L3_MISS,                    \
+     "Retired load instructions missed L3 cache as data sources")             \
+  _ (0xD1, 0x40, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, FB_HIT,                     \
+     "Retired load instructions which data sources were load missed L1 but "  \
+     "hit FB due to preceding miss to the same cache line with data not "     \
+     "ready")                                                                 \
+  _ (0xD2, 0x01, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_MISS,           \
+     "Retired load instructions which data sources were L3 hit and cross-"    \
+     "core snoop missed in on-pkg core cache.")                               \
+  _ (0xD2, 0x02, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_HIT,            \
+     "Retired load instructions which data sources were L3 and cross-core "   \
+     "snoop hits in on-pkg core cache")                                       \
+  _ (0xD2, 0x04, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_HITM,           \
+     "Retired load instructions which data sources were HitM responses from " \
+     "shared L3")                                                             \
+  _ (0xD2, 0x08, 0, 0, 0, 0x00, MEM_LOAD_L3_HIT_RETIRED, XSNP_NONE,           \
+     "Retired load instructions which data sources were hits in L3 without "  \
+     "snoops required")                                                       \
+  _ (0xD3, 0x01, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, LOCAL_DRAM,         \
+     "Retired load instructions which data sources missed L3 but serviced "   \
+     "from local dram")                                                       \
+  _ (0xD3, 0x02, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_DRAM,        \
+     "Retired load instructions which data sources missed L3 but serviced "   \
+     "from remote dram")                                                      \
+  _ (0xD3, 0x04, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_HITM,        \
+     "Retired load instructions whose data sources was remote HITM")          \
+  _ (0xD3, 0x08, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_FWD,         \
+     "Retired load instructions whose data sources was forwarded from a "     \
+     "remote cache")                                                          \
+  _ (0xF0, 0x40, 0, 0, 0, 0x00, L2_TRANS, L2_WB,                              \
+     "L2 writebacks that access L2 cache")                                    \
+  _ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL,                             \
+     "L2 cache lines filling L2")                                             \
+  _ (0xFE, 0x02, 0, 0, 0, 0x00, IDI_MISC, WB_UPGRADE,                         \
+     "Counts number of cache lines that are allocated and written back to L3" \
+     " with the intention that they are more likely to be reused shortly")    \
+  _ (0xFE, 0x04, 0, 0, 0, 0x00, IDI_MISC, WB_DOWNGRADE,                       \
+     "Counts number of cache lines that are dropped and not written back to " \
+     "L3 as they are deemed to be less likely to be reused shortly")
+
+typedef enum
+{
+#define _(event, umask, edge, any, inv, cmask, name, suffix, desc)            \
+  INTEL_CORE_E_##name##_##suffix,
+  foreach_perf_intel_core_event
+#undef _
+    INTEL_CORE_N_EVENTS,
+} perf_intel_core_event_t;
+
+#endif
diff --git a/src/plugins/perfmon/intel/uncore.c b/src/plugins/perfmon/intel/uncore.c

new file mode 100644 (file)

index 0000000..e8939cb
--- /dev/null
+++ b/src/plugins/perfmon/intel/uncore.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+#include <perfmon/intel/uncore.h>
+
+VLIB_REGISTER_LOG_CLASS (if_intel_uncore_log, static) = {
+  .class_name = "perfmon",
+  .subclass_name = "intel-uncore",
+};
+
+#define log_debug(fmt, ...)                                                   \
+  vlib_log_debug (if_intel_uncore_log.class, fmt, __VA_ARGS__)
+#define log_warn(fmt, ...)                                                    \
+  vlib_log_warn (if_intel_uncore_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...)                                                     \
+  vlib_log_err (if_intel_uncore_log.class, fmt, __VA_ARGS__)
+
+#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask)                  \
+  ((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 |        \
+   (cmask) << 24)
+
+static perfmon_event_t intel_uncore_events[] = {
+#define _(unit, event, umask, n, suffix, desc)                                \
+  [INTEL_UNCORE_E_##unit##_##n##_##suffix] = {                                \
+    .config = (event) | (umask) << 8,                                         \
+    .name = #n "." #suffix,                                                   \
+    .description = desc,                                                      \
+    .type_from_instance = 1,                                                  \
+    .instance_type = INTEL_UNCORE_UNIT_##unit,                                \
+  },
+
+  foreach_intel_uncore_event
+#undef _
+};
+
+static int
+intel_uncore_instance_name_cmp (void *v1, void *v2)
+{
+  perfmon_instance_t *i1 = v1;
+  perfmon_instance_t *i2 = v2;
+  return strcmp (i1->name, i2->name);
+}
+
+static void
+intel_uncore_add_unit (perfmon_source_t *src, intel_uncore_unit_type_t u,
+                      char *name, char *type_str, char *fmt,
+                      int *socket_by_cpu_id)
+{
+  static char *base_path = "/sys/bus/event_source/devices/uncore";
+  clib_error_t *err;
+  clib_bitmap_t *cpumask = 0;
+  perfmon_instance_t *in;
+  perfmon_instance_type_t *it;
+  u8 *s = 0;
+  int i = 0, j;
+  u32 perf_type;
+
+  vec_validate (src->instances_by_type, u);
+  it = vec_elt_at_index (src->instances_by_type, u);
+  it->name = type_str;
+
+  while (1)
+    {
+      s = format (s, "%s_%s_%u/type%c", base_path, name, i, 0);
+      if ((err = clib_sysfs_read ((char *) s, "%u", &perf_type)))
+       break;
+      vec_reset_length (s);
+
+      s = format (s, "%s_%s_%u/cpumask%c", base_path, name, i, 0);
+      if ((err = clib_sysfs_read ((char *) s, "%U", unformat_bitmap_list,
+                                 &cpumask)))
+       break;
+      vec_reset_length (s);
+
+      clib_bitmap_foreach (j, cpumask)
+       {
+         vec_add2 (it->instances, in, 1);
+         in->type = perf_type;
+         in->cpu = j;
+         in->pid = -1;
+         in->name = (char *) format (0, fmt, socket_by_cpu_id[j], i);
+         vec_terminate_c_string (in->name);
+         log_debug ("found %s %s", type_str, in->name);
+       }
+      i++;
+    };
+  clib_error_free (err);
+  clib_bitmap_free (cpumask);
+  vec_free (s);
+}
+
+static clib_error_t *
+intel_uncore_init (vlib_main_t *vm, perfmon_source_t *src)
+{
+  clib_error_t *err = 0;
+  clib_bitmap_t *node_bitmap = 0, *cpumask = 0;
+  int *numa_by_cpu_id = 0;
+  u32 i, j;
+  u8 *s = 0;
+
+  if ((err = clib_sysfs_read ("/sys/devices/system/node/has_cpu", "%U",
+                             unformat_bitmap_list, &node_bitmap)))
+    {
+      clib_error_free (err);
+      return clib_error_return (0, "failed to discover numa topology");
+    }
+
+  clib_bitmap_foreach (i, node_bitmap)
+    {
+      s = format (s, "/sys/devices/system/node/node%u/cpulist%c", i, 0);
+      if ((err = clib_sysfs_read ((char *) s, "%U", unformat_bitmap_list,
+                                 &cpumask)))
+       {
+         clib_error_free (err);
+         err = clib_error_return (0, "failed to discover numa topology");
+         goto done;
+       }
+
+      clib_bitmap_foreach (j, cpumask)
+       {
+         vec_validate_init_empty (numa_by_cpu_id, j, -1);
+         numa_by_cpu_id[j] = i;
+       }
+      clib_bitmap_free (cpumask);
+      vec_reset_length (s);
+    }
+
+#define _(t, n, name, fmt)                                                    \
+  intel_uncore_add_unit (src, INTEL_UNCORE_UNIT_##t, n, name, fmt,            \
+                        numa_by_cpu_id);
+  foreach_intel_uncore_unit_type;
+#undef _
+
+  for (i = 0, j = 0; i < vec_len (src->instances_by_type); i++)
+    {
+      perfmon_instance_type_t *it;
+
+      it = vec_elt_at_index (src->instances_by_type, i);
+      vec_sort_with_function (it->instances, intel_uncore_instance_name_cmp);
+      j += vec_len (it->instances);
+    }
+
+  if (j == 0)
+    {
+      vec_free (src->instances_by_type);
+      return clib_error_return (0, "no uncore units found");
+    }
+
+done:
+  vec_free (s);
+  vec_free (cpumask);
+  vec_free (node_bitmap);
+  vec_free (numa_by_cpu_id);
+  return err;
+}
+
+format_function_t format_intel_core_config;
+
+PERFMON_REGISTER_SOURCE (intel_uncore) = {
+  .name = "intel-uncore",
+  .description = "intel uncore events",
+  .events = intel_uncore_events,
+  .n_events = INTEL_UNCORE_N_EVENTS,
+  .init_fn = intel_uncore_init,
+  .format_config = format_intel_core_config,
+};
diff --git a/src/plugins/perfmon/intel/uncore.h b/src/plugins/perfmon/intel/uncore.h

new file mode 100644 (file)

index 0000000..03227d6
--- /dev/null
+++ b/src/plugins/perfmon/intel/uncore.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __perfmon_intel_uncore_h__
+#define __perfmon_intel_uncore_h__
+
+#define foreach_intel_uncore_unit_type                                        \
+  _ (IMC, "imc", "integrated Memory Controller (iMC)", "iMC%u/%u")            \
+  _ (UPI, "upi", "Ultra Path Interconnect (UPI)", "UPI%u/%u")
+
+typedef enum
+{
+#define _(t, n, name, fmt) INTEL_UNCORE_UNIT_##t,
+  foreach_intel_uncore_unit_type
+#undef _
+    INTEL_UNCORE_N_UNITS,
+} intel_uncore_unit_type_t;
+
+#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask)                  \
+  ((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 |        \
+   (cmask) << 24)
+
+/* Type, EventCode, UMask, name, suffix, description */
+#define foreach_intel_uncore_event                                            \
+  _ (IMC, 0x04, 0x03, UNC_M_CAS_COUNT, RD,                                    \
+     "All DRAM Read CAS Commands issued (including underfills)")              \
+  _ (IMC, 0x04, 0x0c, UNC_M_CAS_COUNT, WR,                                    \
+     "All DRAM Write CAS commands issued")                                    \
+  _ (IMC, 0x04, 0x0f, UNC_M_CAS_COUNT, ALL, "All DRAM CAS commands issued")
+
+typedef enum
+{
+#define _(unit, event, umask, name, suffix, desc)                             \
+  INTEL_UNCORE_E_##unit##_##name##_##suffix,
+  foreach_intel_uncore_event
+#undef _
+    INTEL_UNCORE_N_EVENTS,
+} perfmon_intel_uncore_event_index_t;
+
+#endif
diff --git a/src/plugins/perfmon/linux.c b/src/plugins/perfmon/linux.c

new file mode 100644 (file)

index 0000000..3715267
--- /dev/null
+++ b/src/plugins/perfmon/linux.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+#include <sys/ioctl.h>
+
+#include <perfmon/perfmon.h>
+
+#include <linux/perf_event.h>
+
+#define foreach_perf_sw_counter                                               \
+  _ (CONTEXT_SWITCHES, "context-switches")                                    \
+  _ (PAGE_FAULTS_MIN, "page-faults-minor")                                    \
+  _ (PAGE_FAULTS_MAJ, "page-faults-major")
+
+typedef enum
+{
+#define _(n, s) n,
+  foreach_perf_sw_counter
+#undef _
+} linux_sw_events;
+
+static perfmon_event_t events[] = {
+#define _(n, s)                                                               \
+  [n] = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##n, .name = s },
+  foreach_perf_sw_counter
+#undef _
+};
+
+PERFMON_REGISTER_SOURCE (linux) = {
+  .name = "linux",
+  .description = "Linux kernel performance counters",
+  .events = events,
+  .n_events = ARRAY_LEN (events),
+};
+
+static u8 *
+format_context_switches (u8 *s, va_list *args)
+{
+  perfmon_reading_t *r = va_arg (*args, perfmon_reading_t *);
+  int row = va_arg (*args, int);
+  f64 t = (f64) r->time_running * 1e-9;
+
+  switch (row)
+    {
+    case 0:
+      s = format (s, "%9.2f", t);
+      break;
+    case 1:
+      if (r->time_running)
+       s = format (s, "%9.2f", (f64) r->value[0] / t);
+      break;
+    }
+  return s;
+}
+
+PERFMON_REGISTER_BUNDLE (context_switches) = {
+  .name = "context-switches",
+  .description = "per-thread context switches",
+  .source = "linux",
+  .type = PERFMON_BUNDLE_TYPE_THREAD,
+  .events[0] = CONTEXT_SWITCHES,
+  .n_events = 1,
+  .format_fn = format_context_switches,
+  .column_headers = PERFMON_STRINGS ("RunTime", "ContextSwitches/Sec"),
+};
+
+static u8 *
+format_page_faults (u8 *s, va_list *args)
+{
+  perfmon_reading_t *r = va_arg (*args, perfmon_reading_t *);
+  int row = va_arg (*args, int);
+  f64 t = (f64) r->time_running * 1e-9;
+
+  switch (row)
+    {
+    case 0:
+      s = format (s, "%9.2f", t);
+      break;
+    case 1:
+      if (r->time_running)
+       s = format (s, "%9.2f", (f64) r->value[0] / t);
+      break;
+    case 2:
+      if (r->time_running)
+       s = format (s, "%9.2f", (f64) r->value[1] / t);
+      break;
+    }
+  return s;
+}
+
+PERFMON_REGISTER_BUNDLE (page_faults) = {
+  .name = "page-faults",
+  .description = "per-thread page faults",
+  .source = "linux",
+  .type = PERFMON_BUNDLE_TYPE_THREAD,
+  .events[0] = PAGE_FAULTS_MIN,
+  .events[1] = PAGE_FAULTS_MAJ,
+  .n_events = 2,
+  .format_fn = format_page_faults,
+  .column_headers = PERFMON_STRINGS ("RunTime", "MinorPageFaults/Sec",
+                                    "MajorPageFaults/Sec"),
+};
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c

new file mode 100644 (file)

index 0000000..07a4ae6
--- /dev/null
+++ b/src/plugins/perfmon/perfmon.c
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+#include <sys/ioctl.h>
+
+#include <perfmon/perfmon.h>
+
+perfmon_main_t perfmon_main;
+
+VLIB_PLUGIN_REGISTER () = {
+  .version = VPP_BUILD_VER,
+  .description = "Performance Monitor",
+};
+
+VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
+  .class_name = "perfmon",
+};
+
+#define log_debug(fmt, ...)                                                   \
+  vlib_log_debug (if_default_log.class, fmt, __VA_ARGS__)
+#define log_warn(fmt, ...)                                                    \
+  vlib_log_warn (if_default_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...) vlib_log_err (if_default_log.class, fmt, __VA_ARGS__)
+
+void
+perfmon_reset (vlib_main_t *vm)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  uword page_size = clib_mem_get_page_size ();
+
+  if (pm->is_running)
+    for (int i = 0; i < vec_len (vlib_mains); i++)
+      vlib_node_set_dispatch_wrapper (vlib_mains[i], 0);
+
+  for (int i = 0; i < vec_len (pm->fds_to_close); i++)
+    close (pm->fds_to_close[i]);
+  vec_free (pm->fds_to_close);
+  vec_free (pm->group_fds);
+  if (pm->default_instance_type)
+    {
+      perfmon_instance_type_t *it = pm->default_instance_type;
+      for (int i = 0; i < vec_len (it->instances); i++)
+       vec_free (it->instances[i].name);
+      vec_free (it->instances);
+      vec_free (pm->default_instance_type);
+    }
+
+  for (int i = 0; i < vec_len (pm->thread_runtimes); i++)
+    {
+      perfmon_thread_runtime_t *tr = vec_elt_at_index (pm->thread_runtimes, i);
+      vec_free (tr->node_stats);
+      for (int j = 0; j < PERF_MAX_EVENTS; j++)
+       if (tr->mmap_pages[j])
+         munmap (tr->mmap_pages, page_size);
+    }
+  vec_free (pm->thread_runtimes);
+
+  pm->is_running = 0;
+  pm->active_instance_type = 0;
+  pm->active_bundle = 0;
+}
+
+clib_error_t *
+perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
+{
+  clib_error_t *err = 0;
+  perfmon_main_t *pm = &perfmon_main;
+  perfmon_source_t *s;
+  int is_node = 0;
+  int n_nodes = vec_len (vm->node_main.nodes);
+  uword page_size = clib_mem_get_page_size ();
+  u32 instance_type = 0;
+  perfmon_event_t *e;
+  perfmon_instance_type_t *it = 0;
+
+  perfmon_reset (vm);
+
+  s = b->src;
+  ASSERT (b->n_events);
+
+  if (b->type == PERFMON_BUNDLE_TYPE_NODE)
+    is_node = 1;
+
+  if (s->instances_by_type == 0)
+    {
+      vec_add2 (pm->default_instance_type, it, 1);
+      it->name = is_node ? "Thread/Node" : "Thread";
+      for (int i = 0; i < vec_len (vlib_mains); i++)
+       {
+         vlib_worker_thread_t *w = vlib_worker_threads + i;
+         perfmon_instance_t *in;
+         vec_add2 (it->instances, in, 1);
+         in->cpu = w->cpu_id;
+         in->pid = w->lwp;
+         in->name = (char *) format (0, "%s (%u)%c", w->name, i, 0);
+       }
+      if (is_node)
+       vec_validate (pm->thread_runtimes, vec_len (vlib_mains) - 1);
+    }
+  else
+    {
+      e = s->events + b->events[0];
+
+      if (e->type_from_instance)
+       {
+         instance_type = e->instance_type;
+         for (int i = 1; i < b->n_events; i++)
+           {
+             e = s->events + b->events[i];
+             ASSERT (e->type_from_instance == 1 &&
+                     e->instance_type == instance_type);
+           }
+       }
+      it = vec_elt_at_index (s->instances_by_type, instance_type);
+    }
+
+  pm->active_instance_type = it;
+
+  for (int i = 0; i < vec_len (it->instances); i++)
+    {
+      perfmon_instance_t *in = vec_elt_at_index (it->instances, i);
+
+      vec_validate (pm->group_fds, i);
+      pm->group_fds[i] = -1;
+
+      for (int j = 0; j < b->n_events; j++)
+       {
+         int fd;
+         perfmon_event_t *e = s->events + b->events[j];
+         struct perf_event_attr pe = {
+           .size = sizeof (struct perf_event_attr),
+           .type = e->type_from_instance ? in->type : e->type,
+           .config = e->config,
+           .exclude_kernel = e->exclude_kernel,
+           .read_format =
+             (PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED |
+              PERF_FORMAT_TOTAL_TIME_RUNNING),
+           .disabled = 1,
+         };
+
+         log_debug ("perf_event_open pe.type=%u pe.config=0x%x pid=%d "
+                    "cpu=%d group_fd=%d",
+                    pe.type, pe.config, in->pid, in->cpu, pm->group_fds[i]);
+         fd = syscall (__NR_perf_event_open, &pe, in->pid, in->cpu,
+                       pm->group_fds[i], 0);
+
+         if (fd == -1)
+           {
+             err = clib_error_return_unix (0, "perf_event_open");
+             goto error;
+           }
+
+         vec_add1 (pm->fds_to_close, fd);
+
+         if (pm->group_fds[i] == -1)
+           pm->group_fds[i] = fd;
+
+         if (is_node)
+           {
+             perfmon_thread_runtime_t *tr;
+             tr = vec_elt_at_index (pm->thread_runtimes, i);
+             tr->mmap_pages[j] =
+               mmap (0, page_size, PROT_READ, MAP_SHARED, fd, 0);
+
+             if (tr->mmap_pages[j] == MAP_FAILED)
+               {
+                 err = clib_error_return_unix (0, "mmap");
+                 goto error;
+               }
+           }
+       }
+
+      if (is_node)
+       {
+         perfmon_thread_runtime_t *rt;
+         rt = vec_elt_at_index (pm->thread_runtimes, i);
+         rt->n_events = b->n_events;
+         rt->n_nodes = n_nodes;
+         vec_validate_aligned (rt->node_stats, n_nodes - 1,
+                               CLIB_CACHE_LINE_BYTES);
+       }
+    }
+
+  pm->active_bundle = b;
+
+error:
+  if (err)
+    {
+      log_err ("%U", format_clib_error, err);
+      perfmon_reset (vm);
+    }
+  return err;
+}
+
+clib_error_t *
+perfmon_start (vlib_main_t *vm)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  int n_groups = vec_len (pm->group_fds);
+
+  if (n_groups == 0)
+    return clib_error_return (0, "no bundle configured");
+
+  if (pm->is_running == 1)
+    return clib_error_return (0, "already running");
+
+  for (int i = 0; i < n_groups; i++)
+    {
+      if (ioctl (pm->group_fds[i], PERF_EVENT_IOC_ENABLE,
+                PERF_IOC_FLAG_GROUP) == -1)
+       {
+         perfmon_reset (vm);
+         return clib_error_return_unix (0, "ioctl(PERF_EVENT_IOC_ENABLE)");
+       }
+    }
+  if (pm->active_bundle->type == PERFMON_BUNDLE_TYPE_NODE)
+    {
+      for (int i = 0; i < vec_len (vlib_mains); i++)
+       vlib_node_set_dispatch_wrapper (vlib_mains[i],
+                                       perfmon_dispatch_wrapper);
+    }
+  pm->is_running = 1;
+  return 0;
+}
+
+clib_error_t *
+perfmon_stop (vlib_main_t *vm)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  int n_groups = vec_len (pm->group_fds);
+
+  if (pm->is_running != 1)
+    return clib_error_return (0, "not running");
+
+  if (pm->active_bundle->type == PERFMON_BUNDLE_TYPE_NODE)
+    {
+      for (int i = 0; i < vec_len (vlib_mains); i++)
+       vlib_node_set_dispatch_wrapper (vlib_mains[i], 0);
+    }
+
+  for (int i = 0; i < n_groups; i++)
+    {
+      if (ioctl (pm->group_fds[i], PERF_EVENT_IOC_DISABLE,
+                PERF_IOC_FLAG_GROUP) == -1)
+       {
+         perfmon_reset (vm);
+         return clib_error_return_unix (0, "ioctl(PERF_EVENT_IOC_DISABLE)");
+       }
+    }
+
+  pm->is_running = 0;
+  return 0;
+}
+
+static clib_error_t *
+perfmon_init (vlib_main_t *vm)
+{
+  perfmon_main_t *pm = &perfmon_main;
+  perfmon_source_t *s = pm->sources;
+  perfmon_bundle_t *b = pm->bundles;
+
+  pm->source_by_name = hash_create_string (0, sizeof (uword));
+  while (s)
+    {
+      clib_error_t *err;
+      if (hash_get_mem (pm->source_by_name, s->name) != 0)
+       clib_panic ("duplicate source name '%s'", s->name);
+      if (s->init_fn && ((err = (s->init_fn) (vm, s))))
+       {
+         log_warn ("skipping source '%s' - %U", s->name, format_clib_error,
+                   err);
+         clib_error_free (err);
+         s = s->next;
+         continue;
+       }
+
+      hash_set_mem (pm->source_by_name, s->name, s);
+      log_debug ("source '%s' regisrtered", s->name);
+      s = s->next;
+    }
+
+  pm->bundle_by_name = hash_create_string (0, sizeof (uword));
+  while (b)
+    {
+      clib_error_t *err;
+      uword *p;
+      if (hash_get_mem (pm->bundle_by_name, b->name) != 0)
+       clib_panic ("duplicate bundle name '%s'", b->name);
+
+      if ((p = hash_get_mem (pm->source_by_name, b->source)) == 0)
+       {
+         log_debug ("missing source '%s', skipping bundle '%s'", b->source,
+                    b->name);
+         b = b->next;
+         continue;
+       }
+
+      b->src = (perfmon_source_t *) p[0];
+      if (b->init_fn && ((err = (b->init_fn) (vm, b))))
+       {
+         log_warn ("skipping bundle '%s' - %U", b->name, format_clib_error,
+                   err);
+         clib_error_free (err);
+         b = b->next;
+         continue;
+       }
+
+      hash_set_mem (pm->bundle_by_name, b->name, b);
+      log_debug ("bundle '%s' regisrtered", b->name);
+
+      b = b->next;
+    }
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (perfmon_init);
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h

new file mode 100644 (file)

index 0000000..61e44ea
--- /dev/null
+++ b/src/plugins/perfmon/perfmon.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __perfmon_perfmon_h
+#define __perfmon_perfmon_h
+
+#include <linux/perf_event.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vlib/vlib.h>
+
+#define PERF_MAX_EVENTS 7 /* 3 fixed and 4 programmable */
+
+typedef enum
+{
+  PERFMON_BUNDLE_TYPE_UNKNOWN,
+  PERFMON_BUNDLE_TYPE_NODE,
+  PERFMON_BUNDLE_TYPE_THREAD,
+  PERFMON_BUNDLE_TYPE_SYSTEM,
+} perfmon_bundle_type_t;
+
+typedef struct
+{
+  u32 type_from_instance : 1;
+  u32 exclude_kernel : 1;
+  union
+  {
+    u32 type;
+    u32 instance_type;
+  };
+  u64 config;
+  char *name;
+  char *description;
+} perfmon_event_t;
+
+typedef struct
+{
+  u32 type;
+  int cpu;
+  pid_t pid;
+  char *name;
+} perfmon_instance_t;
+
+typedef struct
+{
+  char *name;
+  perfmon_instance_t *instances;
+} perfmon_instance_type_t;
+
+struct perfmon_source;
+vlib_node_function_t perfmon_dispatch_wrapper;
+
+typedef clib_error_t *(perfmon_source_init_fn_t) (vlib_main_t *vm,
+                                                 struct perfmon_source *);
+typedef struct perfmon_source
+{
+  char *name;
+  char *description;
+  struct perfmon_source *next;
+  perfmon_event_t *events;
+  u32 n_events;
+  perfmon_instance_type_t *instances_by_type;
+  format_function_t *format_config;
+  perfmon_source_init_fn_t *init_fn;
+} perfmon_source_t;
+
+struct perfmon_bundle;
+typedef clib_error_t *(perfmon_bundle_init_fn_t) (vlib_main_t *vm,
+                                                 struct perfmon_bundle *);
+typedef struct perfmon_bundle
+{
+  char *name;
+  char *description;
+  char *source;
+  char *footer;
+  perfmon_bundle_type_t type;
+  u32 events[PERF_MAX_EVENTS];
+  u32 n_events;
+
+  perfmon_bundle_init_fn_t *init_fn;
+
+  char **column_headers;
+  format_function_t *format_fn;
+
+  /* do not set manually */
+  perfmon_source_t *src;
+  struct perfmon_bundle *next;
+} perfmon_bundle_t;
+
+typedef struct
+{
+  u64 nr;
+  u64 time_enabled;
+  u64 time_running;
+  u64 value[PERF_MAX_EVENTS];
+} perfmon_reading_t;
+
+typedef struct
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  u64 n_calls;
+  u64 n_packets;
+  u64 value[PERF_MAX_EVENTS];
+} perfmon_node_stats_t;
+
+typedef struct
+{
+  u8 n_events;
+  u16 n_nodes;
+  perfmon_node_stats_t *node_stats;
+  struct perf_event_mmap_page *mmap_pages[PERF_MAX_EVENTS];
+} perfmon_thread_runtime_t;
+
+typedef struct
+{
+  perfmon_thread_runtime_t *thread_runtimes;
+  perfmon_bundle_t *bundles;
+  uword *bundle_by_name;
+  perfmon_source_t *sources;
+  uword *source_by_name;
+  perfmon_bundle_t *active_bundle;
+  int is_running;
+  int *group_fds;
+  int *fds_to_close;
+  perfmon_instance_type_t *default_instance_type;
+  perfmon_instance_type_t *active_instance_type;
+} perfmon_main_t;
+
+extern perfmon_main_t perfmon_main;
+
+#define PERFMON_REGISTER_SOURCE(x)                                            \
+  perfmon_source_t __perfmon_source_##x;                                      \
+  static void __clib_constructor __perfmon_source_registration_##x (void)     \
+  {                                                                           \
+    perfmon_main_t *pm = &perfmon_main;                                       \
+    __perfmon_source_##x.next = pm->sources;                                  \
+    pm->sources = &__perfmon_source_##x;                                      \
+  }                                                                           \
+  perfmon_source_t __perfmon_source_##x
+
+#define PERFMON_REGISTER_BUNDLE(x)                                            \
+  perfmon_bundle_t __perfmon_bundle_##x;                                      \
+  static void __clib_constructor __perfmon_bundle_registration_##x (void)     \
+  {                                                                           \
+    perfmon_main_t *pm = &perfmon_main;                                       \
+    __perfmon_bundle_##x.next = pm->bundles;                                  \
+    pm->bundles = &__perfmon_bundle_##x;                                      \
+  }                                                                           \
+  perfmon_bundle_t __perfmon_bundle_##x
+
+void perfmon_reset (vlib_main_t *vm);
+clib_error_t *perfmon_set (vlib_main_t *vm, perfmon_bundle_t *);
+clib_error_t *perfmon_start (vlib_main_t *vm);
+clib_error_t *perfmon_stop (vlib_main_t *vm);
+
+#define PERFMON_STRINGS(...)                                                  \
+  (char *[]) { __VA_ARGS__, 0 }
+
+#endif
diff --git a/src/plugins/perfmon/table.c b/src/plugins/perfmon/table.c

new file mode 100644 (file)

index 0000000..e3fc098
--- /dev/null
+++ b/src/plugins/perfmon/table.c
@@ -0,0 +1,273 @@
+/*
+  Copyright (c) 2020 Damjan Marion
+
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+
+  The above copyright notice and this permission notice shall be
+  included in all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include "table.h"
+
+static table_text_attr_t default_title = {
+  .flags = TTAF_FG_COLOR_SET | TTAF_BOLD,
+  .fg_color = TTAC_YELLOW,
+  .align = TTAA_CENTER,
+};
+
+static table_text_attr_t default_body = {
+  .align = TTAA_RIGHT,
+};
+
+static table_text_attr_t default_header_col = {
+  .flags = TTAF_FG_COLOR_SET,
+  .fg_color = TTAC_YELLOW,
+  .align = TTAA_CENTER,
+};
+
+static table_text_attr_t default_header_row = {
+  .flags = TTAF_FG_COLOR_SET | TTAF_BOLD,
+  .fg_color = TTAC_GREEN,
+  .align = TTAA_LEFT,
+};
+
+u8 *
+format_text_cell (table_t *t, u8 *s, table_cell_t *c, table_text_attr_t *def,
+                 int size)
+{
+  table_text_attr_t _a = {}, *a = &_a;
+
+  if (a == 0)
+    return format (s, t->no_ansi ? "" : "\x1b[0m");
+
+  clib_memcpy (a, def, sizeof (table_text_attr_t));
+
+  if (t->no_ansi == 0)
+    {
+      int *codes = 0;
+      if (c->attr.flags & TTAF_FG_COLOR_SET)
+       {
+         a->fg_color = c->attr.fg_color;
+         a->flags |= TTAF_FG_COLOR_SET;
+       }
+
+      if (c->attr.flags & TTAF_BG_COLOR_SET)
+       {
+         a->bg_color = c->attr.bg_color;
+         a->flags |= TTAF_BG_COLOR_SET;
+       }
+
+      if (a->flags & TTAF_RESET)
+       vec_add1 (codes, 0);
+
+      if (a->flags & TTAF_BOLD)
+       vec_add1 (codes, 1);
+
+      if (a->flags & TTAF_DIM)
+       vec_add1 (codes, 2);
+
+      if (a->flags & TTAF_UNDERLINE)
+       vec_add1 (codes, 4);
+
+      if (a->flags & TTAF_FG_COLOR_SET)
+       vec_add1 (codes,
+                 (a->flags & TTAF_FG_COLOR_BRIGHT ? 90 : 30) + a->fg_color);
+
+      if (a->flags & TTAF_BG_COLOR_SET)
+       vec_add1 (codes,
+                 (a->flags & TTAF_BG_COLOR_BRIGHT ? 100 : 40) + a->bg_color);
+
+      if (codes)
+       {
+         s = format (s, "\x1b[");
+         for (int i = 0; i < vec_len (codes); i++)
+           s = format (s, "%s%u", i ? ";" : "", codes[i]);
+         s = format (s, "m");
+         vec_free (codes);
+       }
+    }
+
+  u8 *fmt = 0;
+  table_text_attr_align_t align = c->attr.align;
+  if (align == TTAA_DEFAULT)
+    align = a->align;
+  if (align == TTAA_LEFT)
+    fmt = format (fmt, "%%-%uv%c", size, 0);
+  else if (align == TTAA_CENTER)
+    fmt = format (fmt, "%%=%uv%c", size, 0);
+  else
+    fmt = format (fmt, "%%%uv%c", size, 0);
+  s = format (s, (char *) fmt, c->text);
+  vec_free (fmt);
+  return format (s, t->no_ansi ? "" : "\x1b[0m");
+}
+
+u8 *
+format_table (u8 *s, va_list *args)
+{
+  table_t *t = va_arg (*args, table_t *);
+  table_cell_t title_cell = { .text = t->title };
+  int table_width = 0;
+  for (int i = 0; i < vec_len (t->row_sizes); i++)
+    table_width += t->row_sizes[i];
+
+  s = format_text_cell (t, s, &title_cell, &default_title, table_width);
+  s = format (s, "\n");
+
+  for (int c = 0; c < vec_len (t->cells); c++)
+    {
+      table_text_attr_t *col_default;
+
+      if (c < t->n_header_cols)
+       col_default = &default_header_col;
+      else
+       col_default = &default_body;
+
+      for (int r = 0; r < vec_len (t->cells[c]); r++)
+       {
+         table_text_attr_t *row_default = col_default;
+         if (r)
+           s = format (s, " ");
+         if (r < t->n_header_rows && c >= t->n_header_cols)
+           row_default = &default_header_row;
+         s = format_text_cell (t, s, &t->cells[c][r], row_default,
+                               t->row_sizes[r]);
+       }
+      s = format (s, "\n");
+    }
+
+  return s;
+}
+
+void
+table_format_title (table_t *t, char *fmt, ...)
+{
+  va_list va;
+
+  va_start (va, fmt);
+  t->title = va_format (t->title, fmt, &va);
+  va_end (va);
+}
+
+static table_cell_t *
+table_get_cell (table_t *t, int c, int r)
+{
+  c += t->n_header_cols;
+  r += t->n_header_rows;
+
+  /* grow table if needed */
+  vec_validate (t->cells, c);
+  for (int i = 0; i < vec_len (t->cells); i++)
+    vec_validate (t->cells[i], r);
+  return &t->cells[c][r];
+}
+
+void
+table_format_cell (table_t *t, int c, int r, char *fmt, ...)
+{
+  table_cell_t *cell = table_get_cell (t, c, r);
+  va_list va;
+
+  c += t->n_header_cols;
+  r += t->n_header_rows;
+
+  va_start (va, fmt);
+  cell->text = va_format (t->cells[c][r].text, fmt, &va);
+  va_end (va);
+
+  vec_validate (t->row_sizes, r);
+  t->row_sizes[r] = clib_max (t->row_sizes[r], vec_len (t->cells[c][r].text));
+}
+
+void
+table_set_cell_align (table_t *t, int c, int r, table_text_attr_align_t a)
+{
+  table_cell_t *cell = table_get_cell (t, c, r);
+  cell->attr.align = a;
+}
+
+void
+table_set_cell_fg_color (table_t *t, int c, int r, table_text_attr_color_t v)
+{
+  table_cell_t *cell = table_get_cell (t, c, r);
+  cell->attr.fg_color = v;
+  cell->attr.flags |= TTAF_FG_COLOR_SET;
+}
+
+void
+table_set_cell_bg_color (table_t *t, int c, int r, table_text_attr_color_t v)
+{
+  table_cell_t *cell = table_get_cell (t, c, r);
+  cell->attr.bg_color = v;
+  cell->attr.flags |= TTAF_BG_COLOR_SET;
+}
+
+void
+table_free (table_t *t)
+{
+  for (int c = 0; c < vec_len (t->cells); c++)
+    {
+      for (int r = 0; r < vec_len (t->cells[c]); r++)
+       vec_free (t->cells[c][r].text);
+      vec_free (t->cells[c]);
+    }
+  vec_free (t->cells);
+  vec_free (t->row_sizes);
+  vec_free (t->title);
+  clib_memset (t, 0, sizeof (table_t));
+}
+
+void
+table_add_header_col (table_t *t, int n_strings, ...)
+{
+  va_list arg;
+  int r, c = t->n_header_cols++;
+  int n_rows;
+
+  vec_insert (t->cells, 1, c);
+  n_rows = clib_max (n_strings, 1);
+  n_rows = clib_max (vec_len (t->row_sizes), n_rows);
+  vec_validate (t->cells[c], n_rows - 1);
+
+  va_start (arg, n_strings);
+  for (r = 0; r < n_rows; r++)
+    {
+      if (n_strings-- > 0)
+       table_format_cell (t, -1, r - t->n_header_rows, "%s",
+                          va_arg (arg, char *));
+    }
+  va_end (arg);
+}
+
+void
+table_add_header_row (table_t *t, int n_strings, ...)
+{
+  va_list arg;
+  int c, r = t->n_header_rows++;
+
+  vec_validate (t->cells, n_strings + t->n_header_cols - 1);
+
+  va_start (arg, n_strings);
+  for (c = t->n_header_cols; c < vec_len (t->cells); c++)
+    {
+      vec_insert (t->cells[c + t->n_header_cols], 1, r);
+      if (n_strings-- > 0)
+       table_format_cell (t, c, -1, "%s", va_arg (arg, char *));
+    }
+  va_end (arg);
+}
diff --git a/src/plugins/perfmon/table.h b/src/plugins/perfmon/table.h

new file mode 100644 (file)

index 0000000..93102a0
--- /dev/null
+++ b/src/plugins/perfmon/table.h
@@ -0,0 +1,98 @@
+/*
+  Copyright (c) 2020 Damjan Marion
+
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+
+  The above copyright notice and this permission notice shall be
+  included in all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef __table_h__
+#define __table_h__
+
+typedef enum
+{
+  TTAF_RESET = (1 << 0),
+  TTAF_BOLD = (1 << 1),
+  TTAF_DIM = (1 << 2),
+  TTAF_UNDERLINE = (1 << 3),
+  TTAF_FG_COLOR_SET = (1 << 4),
+  TTAF_BG_COLOR_SET = (1 << 5),
+  TTAF_FG_COLOR_BRIGHT = (1 << 6),
+  TTAF_BG_COLOR_BRIGHT = (1 << 7),
+} table_text_attr_flags_t;
+
+typedef enum
+{
+  TTAC_BLACK = 0,
+  TTAC_RED = 1,
+  TTAC_GREEN = 2,
+  TTAC_YELLOW = 3,
+  TTAC_BLUE = 4,
+  TTAC_MAGENTA = 5,
+  TTAC_CYAN = 6,
+  TTAC_WHITE = 7,
+} table_text_attr_color_t;
+
+typedef enum
+{
+  TTAA_DEFAULT = 0,
+  TTAA_LEFT = 1,
+  TTAA_RIGHT = 2,
+  TTAA_CENTER = 3,
+} table_text_attr_align_t;
+
+typedef struct
+{
+  table_text_attr_flags_t flags : 16;
+  table_text_attr_color_t fg_color : 4;
+  table_text_attr_color_t bg_color : 4;
+  table_text_attr_align_t align : 4;
+} table_text_attr_t;
+
+typedef struct
+{
+  table_text_attr_t attr;
+  u8 *text;
+} table_cell_t;
+
+typedef struct
+{
+  u8 no_ansi : 1;
+  u8 *title;
+  table_cell_t **cells;
+  int *row_sizes;
+  int n_header_cols;
+  int n_header_rows;
+  int n_footer_cols;
+} table_t;
+
+format_function_t format_table;
+
+void table_format_title (table_t *t, char *fmt, ...);
+void table_format_cell (table_t *t, int c, int r, char *fmt, ...);
+void table_set_cell_align (table_t *t, int c, int r,
+                          table_text_attr_align_t a);
+void table_set_cell_fg_color (table_t *t, int c, int r,
+                             table_text_attr_color_t v);
+void table_set_cell_bg_color (table_t *t, int c, int r,
+                             table_text_attr_color_t v);
+void table_free (table_t *t);
+void table_add_header_col (table_t *t, int n_strings, ...);
+void table_add_header_row (table_t *t, int n_strings, ...);
+
+#endif
diff --git a/src/vlib/main.c b/src/vlib/main.c

index 27cbcb0..6369f39 100644 (file)
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -1208,13 +1208,21 @@ dispatch_node (vlib_main_t * vm,
         }
        if (PREDICT_FALSE (vm->dispatch_pcap_enable))
         dispatch_pcap_trace (vm, node, frame);
-      n = node->function (vm, node, frame);
+
+      if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0))
+       n = node->function (vm, node, frame);
+      else
+       n = vm->dispatch_wrapper_fn (vm, node, frame);
      }
    else
      {
        if (PREDICT_FALSE (vm->dispatch_pcap_enable))
         dispatch_pcap_trace (vm, node, frame);
-      n = node->function (vm, node, frame);
+
+      if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0))
+       n = node->function (vm, node, frame);
+      else
+       n = vm->dispatch_wrapper_fn (vm, node, frame);
      }
  
    t = clib_cpu_time_now ();
diff --git a/src/vlib/main.h b/src/vlib/main.h

index 477211d..5f78b79 100644 (file)
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -149,6 +149,9 @@ typedef struct vlib_main_t
    /* Main loop hw / sw performance counters */
    vlib_node_runtime_perf_callback_set_t vlib_node_runtime_perf_callbacks;
  
+  /* dispatch wrapper function */
+  vlib_node_function_t *dispatch_wrapper_fn;
+
    /* Every so often we switch to the next counter. */
  #define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7
  
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h

index 33bdc79..b33f496 100644 (file)
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -1247,6 +1247,15 @@ vlib_node_increment_counter (vlib_main_t * vm, u32 node_index,
  u32 vlib_process_create (vlib_main_t * vm, char *name,
                          vlib_node_function_t * f, u32 log2_n_stack_bytes);
  
+always_inline int
+vlib_node_set_dispatch_wrapper (vlib_main_t *vm, vlib_node_function_t *fn)
+{
+  if (fn && vm->dispatch_wrapper_fn)
+    return 1;
+  vm->dispatch_wrapper_fn = fn;
+  return 0;
+}
+
  #endif /* included_vlib_node_funcs_h */
  
  /*
author	Damjan Marion <damarion@cisco.com>
	Fri, 27 Nov 2020 19:15:17 +0000 (20:15 +0100)
committer	Florin Coras <florin.coras@gmail.com>
	Fri, 18 Dec 2020 17:20:28 +0000 (17:20 +0000)
MAINTAINERS		patch \| blob \| history
src/plugins/perfmon/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/cli.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/dispatch_wrapper.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/intel/bundle/inst_and_clock.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/intel/bundle/load_blocks.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/intel/bundle/mem_bw.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/intel/core.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/intel/core.h	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/intel/uncore.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/intel/uncore.h	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/linux.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/perfmon.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/perfmon.h	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/table.c	[new file with mode: 0644]	patch \| blob
src/plugins/perfmon/table.h	[new file with mode: 0644]	patch \| blob
src/vlib/main.c		patch \| blob \| history
src/vlib/main.h		patch \| blob \| history
src/vlib/node_funcs.h		patch \| blob \| history