misc: add callback hooks and refactor pmc 61/27861/4
authorTom Seidenberg <tseidenb@cisco.com>
Fri, 10 Jul 2020 15:49:03 +0000 (15:49 +0000)
committerDave Barach <openvpp@barachs.net>
Thu, 16 Jul 2020 21:44:42 +0000 (21:44 +0000)
Callbacks for monitoring and performance measurement:
- Add new callback list type, with context
- Add callbacks for API, CLI, and barrier sync
- Modify node dispatch callback to pass plugin-specific context
- Modify perfmon plugin to keep PMC samples local to the plugin
- Include process nodes in dispatch callback
- Pass dispatch function return value to callback

Type: refactor

Signed-off-by: Tom Seidenberg <tseidenb@cisco.com>
Change-Id: I28b06c58490611e08d76ff5b01b2347ba2109b22

24 files changed:
src/plugins/mdata/mdata.c
src/plugins/perfmon/CMakeLists.txt
src/plugins/perfmon/perfmon.c
src/plugins/perfmon/perfmon.h
src/plugins/perfmon/perfmon_intel.h
src/plugins/perfmon/perfmon_intel_skl.c
src/plugins/perfmon/perfmon_intel_skx.c
src/plugins/perfmon/perfmon_periodic.c
src/plugins/perfmon/perfmon_plugin.c [new file with mode: 0644]
src/vlib/cli.c
src/vlib/cli.h
src/vlib/init.h
src/vlib/main.c
src/vlib/main.h
src/vlib/node.h
src/vlib/node_funcs.h
src/vlib/threads.c
src/vlib/threads.h
src/vlibapi/api_common.h
src/vlibapi/api_shared.c
src/vnet/interface.c
src/vppinfra/CMakeLists.txt
src/vppinfra/callback.h
src/vppinfra/callback_data.h [new file with mode: 0644]

index fc5bbfb..f74564e 100644 (file)
@@ -21,6 +21,7 @@
 
 #include <vlibapi/api.h>
 #include <vlibmemory/api.h>
+#include <vppinfra/callback_data.h>
 #include <vpp/app/version.h>
 #include <stdbool.h>
 
@@ -42,9 +43,8 @@ static mdata_t mdata_none;
     before_or_after: 0 => before, 1=> after
 */
 static void
-mdata_trace_callback (vlib_main_t * vm, u64 * c0, u64 * c1,
-                     vlib_node_runtime_t * node,
-                     vlib_frame_t * frame, int before_or_after)
+mdata_trace_callback (vlib_node_runtime_perf_callback_data_t * data,
+                     vlib_node_runtime_perf_callback_args_t * args)
 {
   int i;
   mdata_main_t *mm = &mdata_main;
@@ -53,6 +53,12 @@ mdata_trace_callback (vlib_main_t * vm, u64 * c0, u64 * c1,
   u32 n_left_from;
   mdata_t *before, *modifies;
   u8 *after;
+  vlib_main_t *vm = args->vm;
+  vlib_frame_t *frame = args->frame;
+  vlib_node_runtime_t *node = args->node;
+
+  if (PREDICT_FALSE (args->call_type == VLIB_NODE_RUNTIME_PERF_RESET))
+    return;
 
   /* Input nodes don't have frames, etc. */
   if (frame == 0)
@@ -68,7 +74,7 @@ mdata_trace_callback (vlib_main_t * vm, u64 * c0, u64 * c1,
   vlib_get_buffers (vm, from, bufs, n_left_from);
   b = bufs;
 
-  if (before_or_after == 1 /* after */ )
+  if (args->call_type == VLIB_NODE_RUNTIME_PERF_AFTER)
     goto after_pass;
 
   /* Resize the per-thread "before" vector to cover the current frame */
@@ -152,11 +158,9 @@ mdata_enable_disable (mdata_main_t * mmp, int enable_disable)
       if (vlib_mains[i] == 0)
        continue;
 
-      clib_callback_enable_disable
-       (vlib_mains[i]->vlib_node_runtime_perf_counter_cbs,
-        vlib_mains[i]->vlib_node_runtime_perf_counter_cb_tmp,
-        vlib_mains[i]->worker_thread_main_loop_callback_lock,
-        (void *) mdata_trace_callback, enable_disable);
+      clib_callback_data_enable_disable
+       (&vlib_mains[i]->vlib_node_runtime_perf_callbacks,
+        mdata_trace_callback, enable_disable);
     }
 
   return rv;
index a3f045f..69e225b 100644 (file)
@@ -11,7 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-add_vpp_plugin(perfmon
+add_vpp_library (perfcore
   SOURCES
   perfmon.c
   perfmon_periodic.c
@@ -32,6 +32,22 @@ add_vpp_plugin(perfmon
   perfmon_intel_wsm_ep_dp.c
   perfmon_intel_wsm_ep_sp.c
   perfmon_intel_wsm_ex.c
+
+  INSTALL_HEADERS
+  perfmon.h
+
+  LINK_LIBRARIES
+  vppinfra
+  vlib
+  vnet
+)
+
+add_vpp_plugin(perfmon
+  SOURCES
+  perfmon_plugin.c
+
+  LINK_LIBRARIES
+  perfcore
 )
 
 option(VPP_BUILD_MAPFILE_TOOL "Build perfmon mapfile utility." OFF)
index 7e276c3..525a864 100644 (file)
@@ -16,7 +16,6 @@
  */
 
 #include <vnet/vnet.h>
-#include <vnet/plugin/plugin.h>
 #include <perfmon/perfmon.h>
 #include <perfmon/perfmon_intel.h>
 
@@ -98,6 +97,7 @@ perfmon_init (vlib_main_t * vm)
   u32 cpuid;
   u8 model, stepping;
   perfmon_intel_pmc_event_t *ev;
+  int i;
 
   pm->vlib_main = vm;
   pm->vnet_main = vnet_get_main ();
@@ -109,9 +109,17 @@ perfmon_init (vlib_main_t * vm)
 
   /* Default data collection interval */
   pm->timeout_interval = 2.0;  /* seconds */
-  vec_validate (pm->pm_fds, 1);
-  vec_validate (pm->perf_event_pages, 1);
-  vec_validate (pm->rdpmc_indices, 1);
+
+  vec_validate (pm->threads, vlib_get_thread_main ()->n_vlib_mains - 1);
+  for (i = 0; i < vec_len (pm->threads); i++)
+    {
+      perfmon_thread_t *pt = clib_mem_alloc_aligned
+       (sizeof (perfmon_thread_t), CLIB_CACHE_LINE_BYTES);
+      clib_memset (pt, 0, sizeof (*pt));
+      pm->threads[i] = pt;
+      pt->pm_fds[0] = -1;
+      pt->pm_fds[1] = -1;
+    }
   pm->page_size = getpagesize ();
 
   pm->perfmon_table = 0;
@@ -147,18 +155,7 @@ perfmon_init (vlib_main_t * vm)
 
 VLIB_INIT_FUNCTION (perfmon_init);
 
-/* *INDENT-OFF* */
-VLIB_PLUGIN_REGISTER () =
-{
-  .version = VPP_BUILD_VER,
-  .description = "Performance Monitor",
-#if !defined(__x86_64__)
-  .default_disabled = 1,
-#endif
-};
-/* *INDENT-ON* */
-
-static uword
+uword
 unformat_processor_event (unformat_input_t * input, va_list * args)
 {
   perfmon_main_t *pm = va_arg (*args, perfmon_main_t *);
@@ -185,6 +182,10 @@ unformat_processor_event (unformat_input_t * input, va_list * args)
 
   pe_config |= pm->perfmon_table[idx].event_code[0];
   pe_config |= pm->perfmon_table[idx].umask << 8;
+  pe_config |= pm->perfmon_table[idx].edge << 18;
+  pe_config |= pm->perfmon_table[idx].anyt << 21;
+  pe_config |= pm->perfmon_table[idx].inv << 23;
+  pe_config |= pm->perfmon_table[idx].cmask << 24;
 
   ep->name = (char *) hp->key;
   ep->pe_type = PERF_TYPE_RAW;
index 000e3c2..c878202 100644 (file)
@@ -76,6 +76,32 @@ typedef struct
   u8 *value;
 } name_value_pair_t;
 
+typedef struct
+{
+  u64 ticks[2];
+  u64 vectors;
+} perfmon_counters_t;
+
+typedef struct
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+  /* Current counters */
+  u64 c[2];
+
+  /* Current perf_event file descriptors, per thread */
+  int pm_fds[2];
+
+  /* mmap base of mapped struct perf_event_mmap_page */
+  u8 *perf_event_pages[2];
+
+  u32 rdpmc_indices[2];
+
+  /* vector of counters by node index */
+  perfmon_counters_t *counters;
+
+} perfmon_thread_t;
+
 typedef struct
 {
   /* API message ID base */
@@ -112,17 +138,15 @@ typedef struct
   /* Current event (index) being collected */
   u32 current_event;
   int n_active;
-  u32 **rdpmc_indices;
-  /* mmap base / size of (mapped) struct perf_event_mmap_page */
-  u8 ***perf_event_pages;
+  /* mmap size of (mapped) struct perf_event_mmap_page */
   u32 page_size;
 
-  /* Current perf_event file descriptors, per thread */
-  int **pm_fds;
-
   /* thread bitmap */
   uword *thread_bitmap;
 
+  /* per-thread data */
+  perfmon_thread_t **threads;
+
   /* Logging */
   vlib_log_class_t log_class;
 
@@ -137,6 +161,8 @@ extern perfmon_main_t perfmon_main;
 extern vlib_node_registration_t perfmon_periodic_node;
 uword *perfmon_parse_table (perfmon_main_t * pm, char *path, char *filename);
 
+uword unformat_processor_event (unformat_input_t * input, va_list * args);
+
 /* Periodic function events */
 #define PERFMON_START 1
 
index 6bb8492..4753091 100644 (file)
@@ -25,6 +25,10 @@ typedef struct
 {
   u8 event_code[2];
   u8 umask;
+  u8 cmask;
+  u8 inv;
+  u8 anyt;
+  u8 edge;
   char *event_name;
 } perfmon_intel_pmc_event_t;
 
index 726dbb4..b1c0314 100644 (file)
@@ -88,6 +88,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x0D},
    .umask = 0x01,
+   .anyt = 1,
    .event_name = "int_misc.recovery_cycles_any",
    },
   {
@@ -103,6 +104,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x0E},
    .umask = 0x01,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "uops_issued.stall_cycles",
    },
   {
@@ -233,6 +236,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x3C},
    .umask = 0x00,
+   .anyt = 1,
    .event_name = "cpu_clk_unhalted.thread_p_any",
    },
   {
@@ -248,6 +252,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x3C},
    .umask = 0x01,
+   .anyt = 1,
    .event_name = "cpu_clk_thread_unhalted.ref_xclk_any",
    },
   {
@@ -268,6 +273,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x48},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "l1d_pend_miss.pending",
    },
   {
@@ -305,6 +311,12 @@ static perfmon_intel_pmc_event_t event_table[] = {
    .umask = 0x0E,
    .event_name = "dtlb_store_misses.walk_completed",
    },
+  {
+   .event_code = {0x49},
+   .umask = 0x10,
+   .cmask = 1,
+   .event_name = "dtlb_store_misses.walk_active",
+   },
   {
    .event_code = {0x49},
    .umask = 0x10,
@@ -403,6 +415,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x5E},
    .umask = 0x01,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "rs_events.empty_end",
    },
   {
@@ -413,6 +427,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x60},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "offcore_requests_outstanding.cycles_with_demand_data_rd",
    },
   {
@@ -423,6 +438,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x60},
    .umask = 0x02,
+   .cmask = 1,
    .event_name = "offcore_requests_outstanding.cycles_with_demand_code_rd",
    },
   {
@@ -433,6 +449,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x60},
    .umask = 0x04,
+   .cmask = 1,
    .event_name = "offcore_requests_outstanding.cycles_with_demand_rfo",
    },
   {
@@ -443,6 +460,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x60},
    .umask = 0x08,
+   .cmask = 1,
    .event_name = "offcore_requests_outstanding.cycles_with_data_rd",
    },
   {
@@ -458,6 +476,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x79},
    .umask = 0x04,
+   .cmask = 1,
    .event_name = "idq.mite_cycles",
    },
   {
@@ -468,6 +487,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x79},
    .umask = 0x08,
+   .cmask = 1,
    .event_name = "idq.dsb_cycles",
    },
   {
@@ -478,11 +498,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x79},
    .umask = 0x18,
+   .cmask = 4,
    .event_name = "idq.all_dsb_cycles_4_uops",
    },
   {
    .event_code = {0x79},
    .umask = 0x18,
+   .cmask = 1,
    .event_name = "idq.all_dsb_cycles_any_uops",
    },
   {
@@ -503,11 +525,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x79},
    .umask = 0x30,
+   .cmask = 1,
    .event_name = "idq.ms_cycles",
    },
   {
    .event_code = {0x79},
    .umask = 0x30,
+   .edge = 1,
    .event_name = "idq.ms_switches",
    },
   {
@@ -588,26 +612,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 4,
    .event_name = "idq_uops_not_delivered.cycles_0_uops_deliv.core",
    },
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 3,
    .event_name = "idq_uops_not_delivered.cycles_le_1_uop_deliv.core",
    },
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 4,
    .event_name = "idq_uops_not_delivered.cycles_le_2_uop_deliv.core",
    },
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "idq_uops_not_delivered.cycles_le_3_uop_deliv.core",
    },
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "idq_uops_not_delivered.cycles_fe_was_ok",
    },
   {
@@ -663,36 +693,43 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xA3},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "cycle_activity.cycles_l2_miss",
    },
   {
    .event_code = {0xA3},
    .umask = 0x04,
+   .cmask = 4,
    .event_name = "cycle_activity.stalls_total",
    },
   {
    .event_code = {0xA3},
    .umask = 0x05,
+   .cmask = 5,
    .event_name = "cycle_activity.stalls_l2_miss",
    },
   {
    .event_code = {0xA3},
    .umask = 0x08,
+   .cmask = 8,
    .event_name = "cycle_activity.cycles_l1d_miss",
    },
   {
    .event_code = {0xA3},
    .umask = 0x0C,
+   .cmask = 12,
    .event_name = "cycle_activity.stalls_l1d_miss",
    },
   {
    .event_code = {0xA3},
    .umask = 0x10,
+   .cmask = 16,
    .event_name = "cycle_activity.cycles_mem_any",
    },
   {
    .event_code = {0xA3},
    .umask = 0x14,
+   .cmask = 20,
    .event_name = "cycle_activity.stalls_mem_any",
    },
   {
@@ -733,11 +770,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xA8},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "lsd.cycles_active",
    },
   {
    .event_code = {0xA8},
    .umask = 0x01,
+   .cmask = 4,
    .event_name = "lsd.cycles_4_uops",
    },
   {
@@ -788,26 +827,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "uops_executed.stall_cycles",
    },
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "uops_executed.cycles_ge_1_uop_exec",
    },
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 2,
    .event_name = "uops_executed.cycles_ge_2_uops_exec",
    },
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 3,
    .event_name = "uops_executed.cycles_ge_3_uops_exec",
    },
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 4,
    .event_name = "uops_executed.cycles_ge_4_uops_exec",
    },
   {
@@ -818,26 +863,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 1,
    .event_name = "uops_executed.core_cycles_ge_1",
    },
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 2,
    .event_name = "uops_executed.core_cycles_ge_2",
    },
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 3,
    .event_name = "uops_executed.core_cycles_ge_3",
    },
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 4,
    .event_name = "uops_executed.core_cycles_ge_4",
    },
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "uops_executed.core_cycles_none",
    },
   {
@@ -873,6 +924,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xC0},
    .umask = 0x01,
+   .cmask = 10,
    .event_name = "inst_retired.total_cycles_ps",
    },
   {
@@ -883,16 +935,22 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xC2},
    .umask = 0x02,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "uops_retired.stall_cycles",
    },
   {
    .event_code = {0xC2},
    .umask = 0x02,
+   .cmask = 10,
+   .inv = 1,
    .event_name = "uops_retired.total_cycles",
    },
   {
    .event_code = {0xC3},
    .umask = 0x01,
+   .cmask = 1,
+   .edge = 1,
    .event_name = "machine_clears.count",
    },
   {
@@ -1083,6 +1141,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xCA},
    .umask = 0x1E,
+   .cmask = 1,
    .event_name = "fp_assist.any",
    },
   {
index 3991744..9de202d 100644 (file)
@@ -88,6 +88,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x0D},
    .umask = 0x01,
+   .anyt = 1,
    .event_name = "int_misc.recovery_cycles_any",
    },
   {
@@ -98,6 +99,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x0E},
    .umask = 0x01,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "uops_issued.stall_cycles",
    },
   {
@@ -253,6 +256,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x3C},
    .umask = 0x00,
+   .anyt = 1,
    .event_name = "cpu_clk_unhalted.thread_p_any",
    },
   {
@@ -268,6 +272,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x3C},
    .umask = 0x01,
+   .anyt = 1,
    .event_name = "cpu_clk_thread_unhalted.ref_xclk_any",
    },
   {
@@ -288,6 +293,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x48},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "l1d_pend_miss.pending_cycles",
    },
   {
@@ -325,6 +331,12 @@ static perfmon_intel_pmc_event_t event_table[] = {
    .umask = 0x0E,
    .event_name = "dtlb_store_misses.walk_completed",
    },
+  {
+   .event_code = {0x49},
+   .umask = 0x10,
+   .cmask = 1,
+   .event_name = "dtlb_store_misses.walk_active",
+   },
   {
    .event_code = {0x49},
    .umask = 0x10,
@@ -418,6 +430,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x5E},
    .umask = 0x01,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "rs_events.empty_end",
    },
   {
@@ -428,6 +442,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x60},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "offcore_requests_outstanding.cycles_with_demand_data_rd",
    },
   {
@@ -443,6 +458,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x60},
    .umask = 0x02,
+   .cmask = 1,
    .event_name = "offcore_requests_outstanding.cycles_with_demand_code_rd",
    },
   {
@@ -453,11 +469,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x60},
    .umask = 0x04,
+   .cmask = 1,
    .event_name = "offcore_requests_outstanding.cycles_with_demand_rfo",
    },
   {
    .event_code = {0x60},
    .umask = 0x08,
+   .cmask = 1,
    .event_name = "offcore_requests_outstanding.cycles_with_data_rd",
    },
   {
@@ -473,6 +491,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x79},
    .umask = 0x04,
+   .cmask = 1,
    .event_name = "idq.mite_cycles",
    },
   {
@@ -483,6 +502,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x79},
    .umask = 0x08,
+   .cmask = 1,
    .event_name = "idq.dsb_cycles",
    },
   {
@@ -498,11 +518,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x79},
    .umask = 0x18,
+   .cmask = 1,
    .event_name = "idq.all_dsb_cycles_any_uops",
    },
   {
    .event_code = {0x79},
    .umask = 0x18,
+   .cmask = 4,
    .event_name = "idq.all_dsb_cycles_4_uops",
    },
   {
@@ -523,6 +545,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x79},
    .umask = 0x30,
+   .cmask = 1,
    .event_name = "idq.ms_cycles",
    },
   {
@@ -533,6 +556,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x79},
    .umask = 0x30,
+   .edge = 1,
    .event_name = "idq.ms_switches",
    },
   {
@@ -603,26 +627,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "idq_uops_not_delivered.cycles_fe_was_ok",
    },
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "idq_uops_not_delivered.cycles_le_3_uop_deliv.core",
    },
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 2,
    .event_name = "idq_uops_not_delivered.cycles_le_2_uop_deliv.core",
    },
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 3,
    .event_name = "idq_uops_not_delivered.cycles_le_1_uop_deliv.core",
    },
   {
    .event_code = {0x9C},
    .umask = 0x01,
+   .cmask = 4,
    .event_name = "idq_uops_not_delivered.cycles_0_uops_deliv.core",
    },
   {
@@ -683,36 +713,43 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xA3},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "cycle_activity.cycles_l2_miss",
    },
   {
    .event_code = {0xA3},
    .umask = 0x04,
+   .cmask = 4,
    .event_name = "cycle_activity.stalls_total",
    },
   {
    .event_code = {0xA3},
    .umask = 0x05,
+   .cmask = 5,
    .event_name = "cycle_activity.stalls_l2_miss",
    },
   {
    .event_code = {0xA3},
    .umask = 0x08,
+   .cmask = 8,
    .event_name = "cycle_activity.cycles_l1d_miss",
    },
   {
    .event_code = {0xA3},
    .umask = 0x0C,
+   .cmask = 12,
    .event_name = "cycle_activity.stalls_l1d_miss",
    },
   {
    .event_code = {0xA3},
    .umask = 0x10,
+   .cmask = 16,
    .event_name = "cycle_activity.cycles_mem_any",
    },
   {
    .event_code = {0xA3},
    .umask = 0x14,
+   .cmask = 20,
    .event_name = "cycle_activity.stalls_mem_any",
    },
   {
@@ -753,11 +790,13 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xA8},
    .umask = 0x01,
+   .cmask = 4,
    .event_name = "lsd.cycles_4_uops",
    },
   {
    .event_code = {0xA8},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "lsd.cycles_active",
    },
   {
@@ -803,26 +842,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 4,
    .event_name = "uops_executed.cycles_ge_4_uops_exec",
    },
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 3,
    .event_name = "uops_executed.cycles_ge_3_uops_exec",
    },
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 2,
    .event_name = "uops_executed.cycles_ge_2_uops_exec",
    },
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 1,
    .event_name = "uops_executed.cycles_ge_1_uop_exec",
    },
   {
    .event_code = {0xB1},
    .umask = 0x01,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "uops_executed.stall_cycles",
    },
   {
@@ -838,26 +883,32 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "uops_executed.core_cycles_none",
    },
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 4,
    .event_name = "uops_executed.core_cycles_ge_4",
    },
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 3,
    .event_name = "uops_executed.core_cycles_ge_3",
    },
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 2,
    .event_name = "uops_executed.core_cycles_ge_2",
    },
   {
    .event_code = {0xB1},
    .umask = 0x02,
+   .cmask = 1,
    .event_name = "uops_executed.core_cycles_ge_1",
    },
   {
@@ -898,16 +949,21 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xC0},
    .umask = 0x01,
+   .cmask = 10,
    .event_name = "inst_retired.total_cycles_ps",
    },
   {
    .event_code = {0xC2},
    .umask = 0x02,
+   .cmask = 10,
+   .inv = 1,
    .event_name = "uops_retired.total_cycles",
    },
   {
    .event_code = {0xC2},
    .umask = 0x02,
+   .cmask = 1,
+   .inv = 1,
    .event_name = "uops_retired.stall_cycles",
    },
   {
@@ -918,6 +974,8 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xC3},
    .umask = 0x01,
+   .cmask = 1,
+   .edge = 1,
    .event_name = "machine_clears.count",
    },
   {
@@ -1118,6 +1176,7 @@ static perfmon_intel_pmc_event_t event_table[] = {
   {
    .event_code = {0xCA},
    .umask = 0x1E,
+   .cmask = 1,
    .event_name = "fp_assist.any",
    },
   {
index 37d669b..de31221 100644 (file)
@@ -33,52 +33,65 @@ perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
 }
 
 static void
-read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1,
-                           vlib_node_runtime_t * node,
-                           vlib_frame_t * frame, int before_or_after)
+read_current_perf_counters (vlib_node_runtime_perf_callback_data_t * data,
+                           vlib_node_runtime_perf_callback_args_t * args)
 {
   int i;
-  u64 *cc;
   perfmon_main_t *pm = &perfmon_main;
-  uword my_thread_index = vm->thread_index;
+  perfmon_thread_t *pt = data->u[0].v;
+  u64 c[2] = { 0, 0 };
+  u64 *cc;
 
-  *c0 = *c1 = 0;
+  if (PREDICT_FALSE (args->call_type == VLIB_NODE_RUNTIME_PERF_RESET))
+    return;
+
+  if (args->call_type == VLIB_NODE_RUNTIME_PERF_BEFORE)
+    cc = pt->c;
+  else
+    cc = c;
 
   for (i = 0; i < pm->n_active; i++)
     {
-      cc = (i == 0) ? c0 : c1;
-      if (pm->rdpmc_indices[i][my_thread_index] != ~0)
-       *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]);
+      if (pt->rdpmc_indices[i] != ~0)
+       cc[i] = clib_rdpmc ((int) pt->rdpmc_indices[i]);
       else
        {
          u64 sw_value;
          int read_result;
-         if ((read_result = read (pm->pm_fds[i][my_thread_index], &sw_value,
-                                  sizeof (sw_value)) != sizeof (sw_value)))
+         if ((read_result = read (pt->pm_fds[i], &sw_value,
+                                  sizeof (sw_value))) != sizeof (sw_value))
            {
              clib_unix_warning
                ("counter read returned %d, expected %d",
                 read_result, sizeof (sw_value));
-             clib_callback_enable_disable
-               (vm->vlib_node_runtime_perf_counter_cbs,
-                vm->vlib_node_runtime_perf_counter_cb_tmp,
-                vm->worker_thread_main_loop_callback_lock,
+             clib_callback_data_enable_disable
+               (&args->vm->vlib_node_runtime_perf_callbacks,
                 read_current_perf_counters, 0 /* enable */ );
              return;
            }
-         *cc = sw_value;
+         cc[i] = sw_value;
        }
     }
+
+  if (args->call_type == VLIB_NODE_RUNTIME_PERF_AFTER)
+    {
+      u32 node_index = args->node->node_index;
+      vec_validate (pt->counters, node_index);
+      pt->counters[node_index].ticks[0] += c[0] - pt->c[0];
+      pt->counters[node_index].ticks[1] += c[1] - pt->c[1];
+      pt->counters[node_index].vectors += args->packets;
+    }
 }
 
 static void
 clear_counters (perfmon_main_t * pm)
 {
-  int i, j;
+  int j;
   vlib_main_t *vm = pm->vlib_main;
   vlib_main_t *stat_vm;
-  vlib_node_main_t *nm;
-  vlib_node_t *n;
+  perfmon_thread_t *pt;
+  u32 len;
+
 
   vlib_worker_thread_barrier_sync (vm);
 
@@ -88,26 +101,12 @@ clear_counters (perfmon_main_t * pm)
       if (stat_vm == 0)
        continue;
 
-      nm = &stat_vm->node_main;
-
-      /* Clear the node runtime perfmon counters */
-      for (i = 0; i < vec_len (nm->nodes); i++)
-       {
-         n = nm->nodes[i];
-         vlib_node_sync_stats (stat_vm, n);
-       }
+      pt = pm->threads[j];
+      len = vec_len (pt->counters);
+      if (!len)
+       continue;
 
-      /* And clear the node perfmon counters */
-      for (i = 0; i < vec_len (nm->nodes); i++)
-       {
-         n = nm->nodes[i];
-         n->stats_total.perf_counter0_ticks = 0;
-         n->stats_total.perf_counter1_ticks = 0;
-         n->stats_total.perf_counter_vectors = 0;
-         n->stats_last_clear.perf_counter0_ticks = 0;
-         n->stats_last_clear.perf_counter1_ticks = 0;
-         n->stats_last_clear.perf_counter_vectors = 0;
-       }
+      clib_memset (pt->counters, 0, len * sizeof (pt->counters[0]));
     }
   vlib_worker_thread_barrier_release (vm);
 }
@@ -121,19 +120,20 @@ enable_current_events (perfmon_main_t * pm)
   perfmon_event_config_t *c;
   vlib_main_t *vm = vlib_get_main ();
   u32 my_thread_index = vm->thread_index;
+  perfmon_thread_t *pt = pm->threads[my_thread_index];
   u32 index;
   int i, limit = 1;
   int cpu;
+  vlib_node_runtime_perf_callback_data_t cbdata = { 0 };
+  cbdata.fp = read_current_perf_counters;
+  cbdata.u[0].v = pt;
+  cbdata.u[1].v = vm;
 
   if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect))
     limit = 2;
 
   for (i = 0; i < limit; i++)
     {
-      vec_validate (pm->pm_fds[i], vec_len (vlib_mains) - 1);
-      vec_validate (pm->perf_event_pages[i], vec_len (vlib_mains) - 1);
-      vec_validate (pm->rdpmc_indices[i], vec_len (vlib_mains) - 1);
-
       c = vec_elt_at_index (pm->single_events_to_collect,
                            pm->current_event + i);
 
@@ -184,8 +184,8 @@ enable_current_events (perfmon_main_t * pm)
       if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
        clib_unix_warning ("enable ioctl");
 
-      pm->perf_event_pages[i][my_thread_index] = (void *) p;
-      pm->pm_fds[i][my_thread_index] = fd;
+      pt->perf_event_pages[i] = (void *) p;
+      pt->pm_fds[i] = fd;
     }
 
   /*
@@ -194,9 +194,7 @@ enable_current_events (perfmon_main_t * pm)
    */
   for (i = 0; i < limit; i++)
     {
-      p =
-       (struct perf_event_mmap_page *)
-       pm->perf_event_pages[i][my_thread_index];
+      p = (struct perf_event_mmap_page *) pt->perf_event_pages[i];
 
       /*
        * Software event counters - and others not capable of being
@@ -208,16 +206,12 @@ enable_current_events (perfmon_main_t * pm)
       else
        index = p->index - 1;
 
-      pm->rdpmc_indices[i][my_thread_index] = index;
+      pt->rdpmc_indices[i] = index;
     }
 
   pm->n_active = i;
   /* Enable the main loop counter snapshot mechanism */
-  clib_callback_enable_disable
-    (vm->vlib_node_runtime_perf_counter_cbs,
-     vm->vlib_node_runtime_perf_counter_cb_tmp,
-     vm->worker_thread_main_loop_callback_lock,
-     read_current_perf_counters, 1 /* enable */ );
+  clib_callback_data_add (&vm->vlib_node_runtime_perf_callbacks, cbdata);
 }
 
 static void
@@ -225,35 +219,30 @@ disable_events (perfmon_main_t * pm)
 {
   vlib_main_t *vm = vlib_get_main ();
   u32 my_thread_index = vm->thread_index;
+  perfmon_thread_t *pt = pm->threads[my_thread_index];
   int i;
 
   /* Stop main loop collection */
-  clib_callback_enable_disable
-    (vm->vlib_node_runtime_perf_counter_cbs,
-     vm->vlib_node_runtime_perf_counter_cb_tmp,
-     vm->worker_thread_main_loop_callback_lock,
-     read_current_perf_counters, 0 /* enable */ );
+  clib_callback_data_remove (&vm->vlib_node_runtime_perf_callbacks,
+                            read_current_perf_counters);
 
   for (i = 0; i < pm->n_active; i++)
     {
-      if (pm->pm_fds[i][my_thread_index] == 0)
+      if (pt->pm_fds[i] == 0)
        continue;
 
-      if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) <
-         0)
+      if (ioctl (pt->pm_fds[i], PERF_EVENT_IOC_DISABLE, 0) < 0)
        clib_unix_warning ("disable ioctl");
 
-      if (pm->perf_event_pages[i][my_thread_index])
+      if (pt->perf_event_pages[i])
        {
-         if (munmap (pm->perf_event_pages[i][my_thread_index],
-                     pm->page_size) < 0)
+         if (munmap (pt->perf_event_pages[i], pm->page_size) < 0)
            clib_unix_warning ("munmap");
-         pm->perf_event_pages[i][my_thread_index] = 0;
+         pt->perf_event_pages[i] = 0;
        }
 
-      (void) close (pm->pm_fds[i][my_thread_index]);
-      pm->pm_fds[i][my_thread_index] = 0;
-
+      (void) close (pt->pm_fds[i]);
+      pt->pm_fds[i] = 0;
     }
 }
 
@@ -265,7 +254,7 @@ worker_thread_start_event (vlib_main_t * vm)
   clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks,
                                vm->worker_thread_main_loop_callback_tmp,
                                vm->worker_thread_main_loop_callback_lock,
-                               worker_thread_start_event, 0 /* enable */ );
+                               worker_thread_start_event, 0 /* disable */ );
   enable_current_events (pm);
 }
 
@@ -276,7 +265,7 @@ worker_thread_stop_event (vlib_main_t * vm)
   clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks,
                                vm->worker_thread_main_loop_callback_tmp,
                                vm->worker_thread_main_loop_callback_lock,
-                               worker_thread_stop_event, 0 /* enable */ );
+                               worker_thread_stop_event, 0 /* disable */ );
   disable_events (pm);
 }
 
@@ -329,14 +318,15 @@ scrape_and_clear_counters (perfmon_main_t * pm)
   vlib_main_t *vm = pm->vlib_main;
   vlib_main_t *stat_vm;
   vlib_node_main_t *nm;
-  vlib_node_t ***node_dups = 0;
-  vlib_node_t **nodes;
-  vlib_node_t *n;
+  perfmon_counters_t *ctr;
+  perfmon_counters_t *ctrs;
+  perfmon_counters_t **ctr_dups = 0;
+  perfmon_thread_t *pt;
   perfmon_capture_t *c;
   perfmon_event_config_t *current_event;
   uword *p;
   u8 *counter_name;
-  u64 vectors_this_counter;
+  u32 len;
 
   /* snapshoot the nodes, including pm counters */
   vlib_worker_thread_barrier_sync (vm);
@@ -347,31 +337,16 @@ scrape_and_clear_counters (perfmon_main_t * pm)
       if (stat_vm == 0)
        continue;
 
-      nm = &stat_vm->node_main;
-
-      for (i = 0; i < vec_len (nm->nodes); i++)
-       {
-         n = nm->nodes[i];
-         vlib_node_sync_stats (stat_vm, n);
-       }
-
-      nodes = 0;
-      vec_validate (nodes, vec_len (nm->nodes) - 1);
-      vec_add1 (node_dups, nodes);
-
-      /* Snapshoot and clear the per-node perfmon counters */
-      for (i = 0; i < vec_len (nm->nodes); i++)
+      pt = pm->threads[j];
+      len = vec_len (pt->counters);
+      ctrs = 0;
+      if (len)
        {
-         n = nm->nodes[i];
-         nodes[i] = clib_mem_alloc (sizeof (*n));
-         clib_memcpy_fast (nodes[i], n, sizeof (*n));
-         n->stats_total.perf_counter0_ticks = 0;
-         n->stats_total.perf_counter1_ticks = 0;
-         n->stats_total.perf_counter_vectors = 0;
-         n->stats_last_clear.perf_counter0_ticks = 0;
-         n->stats_last_clear.perf_counter1_ticks = 0;
-         n->stats_last_clear.perf_counter_vectors = 0;
+         vec_validate (ctrs, len - 1);
+         clib_memcpy (ctrs, pt->counters, len * sizeof (pt->counters[0]));
+         clib_memset (pt->counters, 0, len * sizeof (pt->counters[0]));
        }
+      vec_add1 (ctr_dups, ctrs);
     }
 
   vlib_worker_thread_barrier_release (vm);
@@ -382,22 +357,21 @@ scrape_and_clear_counters (perfmon_main_t * pm)
       if (stat_vm == 0)
        continue;
 
-      nodes = node_dups[j];
+      pt = pm->threads[j];
+      ctrs = ctr_dups[j];
 
-      for (i = 0; i < vec_len (nodes); i++)
+      for (i = 0; i < vec_len (ctrs); i++)
        {
          u8 *capture_name;
 
-         n = nodes[i];
+         ctr = &ctrs[i];
+         nm = &stat_vm->node_main;
 
-         if (n->stats_total.perf_counter0_ticks == 0 &&
-             n->stats_total.perf_counter1_ticks == 0)
-           goto skip_this_node;
+         if (ctr->ticks[0] == 0 && ctr->ticks[1] == 0)
+           continue;
 
          for (k = 0; k < 2; k++)
            {
-             u64 counter_value, counter_last_clear;
-
              /*
               * We collect 2 counters at once, except for the
               * last counter when the user asks for an odd number of
@@ -407,20 +381,7 @@ scrape_and_clear_counters (perfmon_main_t * pm)
                  >= vec_len (pm->single_events_to_collect))
                break;
 
-             if (k == 0)
-               {
-                 counter_value = n->stats_total.perf_counter0_ticks;
-                 counter_last_clear =
-                   n->stats_last_clear.perf_counter0_ticks;
-               }
-             else
-               {
-                 counter_value = n->stats_total.perf_counter1_ticks;
-                 counter_last_clear =
-                   n->stats_last_clear.perf_counter1_ticks;
-               }
-
-             capture_name = format (0, "t%d-%v%c", j, n->name, 0);
+             capture_name = format (0, "t%d-%v%c", j, nm->nodes[i]->name, 0);
 
              p = hash_get_mem (pm->capture_by_thread_and_node_name,
                                capture_name);
@@ -443,20 +404,15 @@ scrape_and_clear_counters (perfmon_main_t * pm)
              current_event = pm->single_events_to_collect
                + pm->current_event + k;
              counter_name = (u8 *) current_event->name;
-             vectors_this_counter = n->stats_total.perf_counter_vectors -
-               n->stats_last_clear.perf_counter_vectors;
 
              vec_add1 (c->counter_names, counter_name);
-             vec_add1 (c->counter_values,
-                       counter_value - counter_last_clear);
-             vec_add1 (c->vectors_this_counter, vectors_this_counter);
+             vec_add1 (c->counter_values, ctr->ticks[k]);
+             vec_add1 (c->vectors_this_counter, ctr->vectors);
            }
-       skip_this_node:
-         clib_mem_free (n);
        }
-      vec_free (nodes);
+      vec_free (ctrs);
     }
-  vec_free (node_dups);
+  vec_free (ctr_dups);
 }
 
 static void
@@ -492,9 +448,8 @@ handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now)
       for (i = 1; i < vec_len (vlib_mains); i++)
        {
          /* Has the worker actually stopped collecting data? */
-         while (clib_callback_is_set
-                (vlib_mains[i]->worker_thread_main_loop_callbacks,
-                 vlib_mains[i]->worker_thread_main_loop_callback_lock,
+         while (clib_callback_data_is_set
+                (&vm->vlib_node_runtime_perf_callbacks,
                  read_current_perf_counters))
            {
              if (vlib_time_now (vm) > deadman)
@@ -528,7 +483,7 @@ handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now)
          (vlib_mains[i]->worker_thread_main_loop_callbacks,
           vlib_mains[i]->worker_thread_main_loop_callback_tmp,
           vlib_mains[i]->worker_thread_main_loop_callback_lock,
-          worker_thread_start_event, 1 /* enable */ );
+          worker_thread_start_event, 0 /* disable */ );
     }
 }
 
diff --git a/src/plugins/perfmon/perfmon_plugin.c b/src/plugins/perfmon/perfmon_plugin.c
new file mode 100644 (file)
index 0000000..1d56573
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * perfmon_plugin.c - perf monitor plugin
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () =
+{
+  .version = VPP_BUILD_VER,
+  .description = "Performance Monitor",
+#if !defined(__x86_64__)
+  .default_disabled = 1,
+#endif
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index d14ea68..2697c0a 100644 (file)
@@ -39,6 +39,7 @@
 
 #include <vlib/vlib.h>
 #include <vlib/unix/unix.h>
+#include <vppinfra/callback.h>
 #include <vppinfra/cpu.h>
 #include <vppinfra/elog.h>
 #include <unistd.h>
@@ -563,10 +564,16 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm,
 
              if (!c->is_mp_safe)
                vlib_worker_thread_barrier_sync (vm);
+             if (PREDICT_FALSE (vec_len (cm->perf_counter_cbs) != 0))
+               clib_call_callbacks (cm->perf_counter_cbs, cm,
+                                    c - cm->commands, 0 /* before */ );
 
              c->hit_counter++;
              c_error = c->function (vm, si, c);
 
+             if (PREDICT_FALSE (vec_len (cm->perf_counter_cbs) != 0))
+               clib_call_callbacks (cm->perf_counter_cbs, cm,
+                                    c - cm->commands, 1 /* after */ );
              if (!c->is_mp_safe)
                vlib_worker_thread_barrier_release (vm);
 
index df9ed72..0a8ef9d 100644 (file)
@@ -132,7 +132,7 @@ typedef struct vlib_cli_command_t
 
 typedef void (vlib_cli_output_function_t) (uword arg,
                                           u8 * buffer, uword buffer_bytes);
-typedef struct
+typedef struct vlib_cli_main_t
 {
   /* Vector of all known commands. */
   vlib_cli_command_t *commands;
@@ -146,6 +146,12 @@ typedef struct
   /* index vector, to sort commands, etc. */
   u32 *sort_vector;
 
+
+  /* performance counter callback */
+  void (**perf_counter_cbs)
+    (struct vlib_cli_main_t *, u32 id, int before_or_after);
+  void (**perf_counter_cbs_tmp)
+    (struct vlib_cli_main_t *, u32 id, int before_or_after);
 } vlib_cli_main_t;
 
 #ifndef CLIB_MARCH_VARIANT
index fc63801..68ac2f3 100644 (file)
@@ -317,6 +317,19 @@ static void __vlib_rm_config_function_##x (void)                \
     _error;                                                    \
   })
 
+#define vlib_call_main_loop_enter_function(vm, x)                      \
+  ({                                                                   \
+    extern vlib_init_function_t * VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL (x); \
+    vlib_init_function_t * _f = VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL (x); \
+    clib_error_t * _error = 0;                                         \
+    if (! hash_get (vm->init_functions_called, _f))                    \
+      {                                                                        \
+       hash_set1 (vm->init_functions_called, _f);                      \
+       _error = _f (vm);                                               \
+      }                                                                        \
+    _error;                                                            \
+  })
+
 /* External functions. */
 clib_error_t *vlib_call_all_init_functions (struct vlib_main_t *vm);
 clib_error_t *vlib_call_all_config_functions (struct vlib_main_t *vm,
index 8d7c6c0..cb651e4 100644 (file)
@@ -568,41 +568,29 @@ vlib_put_next_frame (vlib_main_t * vm,
 never_inline void
 vlib_node_runtime_sync_stats (vlib_main_t * vm,
                              vlib_node_runtime_t * r,
-                             uword n_calls, uword n_vectors, uword n_clocks,
-                             uword n_ticks0, uword n_ticks1)
+                             uword n_calls, uword n_vectors, uword n_clocks)
 {
   vlib_node_t *n = vlib_get_node (vm, r->node_index);
 
   n->stats_total.calls += n_calls + r->calls_since_last_overflow;
   n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
   n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
-  n->stats_total.perf_counter0_ticks += n_ticks0 +
-    r->perf_counter0_ticks_since_last_overflow;
-  n->stats_total.perf_counter1_ticks += n_ticks1 +
-    r->perf_counter1_ticks_since_last_overflow;
-  n->stats_total.perf_counter_vectors += n_vectors +
-    r->perf_counter_vectors_since_last_overflow;
   n->stats_total.max_clock = r->max_clock;
   n->stats_total.max_clock_n = r->max_clock_n;
 
   r->calls_since_last_overflow = 0;
   r->vectors_since_last_overflow = 0;
   r->clocks_since_last_overflow = 0;
-  r->perf_counter0_ticks_since_last_overflow = 0ULL;
-  r->perf_counter1_ticks_since_last_overflow = 0ULL;
-  r->perf_counter_vectors_since_last_overflow = 0ULL;
 }
 
 always_inline void __attribute__ ((unused))
 vlib_process_sync_stats (vlib_main_t * vm,
                         vlib_process_t * p,
-                        uword n_calls, uword n_vectors, uword n_clocks,
-                        uword n_ticks0, uword n_ticks1)
+                        uword n_calls, uword n_vectors, uword n_clocks)
 {
   vlib_node_runtime_t *rt = &p->node_runtime;
   vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-  vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks,
-                               n_ticks0, n_ticks1);
+  vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks);
   n->stats_total.suspends += p->n_suspends;
   p->n_suspends = 0;
 }
@@ -628,7 +616,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
       vec_elt_at_index (vm->node_main.nodes_by_type[n->type],
                        n->runtime_index);
 
-  vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0, 0);
+  vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);
 
   /* Sync up runtime next frame vector counters with main node structure. */
   {
@@ -648,32 +636,21 @@ always_inline u32
 vlib_node_runtime_update_stats (vlib_main_t * vm,
                                vlib_node_runtime_t * node,
                                uword n_calls,
-                               uword n_vectors, uword n_clocks,
-                               uword n_ticks0, uword n_ticks1)
+                               uword n_vectors, uword n_clocks)
 {
   u32 ca0, ca1, v0, v1, cl0, cl1, r;
-  u32 ptick00, ptick01, ptick10, ptick11, pvec0, pvec1;
 
   cl0 = cl1 = node->clocks_since_last_overflow;
   ca0 = ca1 = node->calls_since_last_overflow;
   v0 = v1 = node->vectors_since_last_overflow;
-  ptick00 = ptick01 = node->perf_counter0_ticks_since_last_overflow;
-  ptick10 = ptick11 = node->perf_counter1_ticks_since_last_overflow;
-  pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow;
 
   ca1 = ca0 + n_calls;
   v1 = v0 + n_vectors;
   cl1 = cl0 + n_clocks;
-  ptick01 = ptick00 + n_ticks0;
-  ptick11 = ptick10 + n_ticks1;
-  pvec1 = pvec0 + n_vectors;
 
   node->calls_since_last_overflow = ca1;
   node->clocks_since_last_overflow = cl1;
   node->vectors_since_last_overflow = v1;
-  node->perf_counter0_ticks_since_last_overflow = ptick01;
-  node->perf_counter1_ticks_since_last_overflow = ptick11;
-  node->perf_counter_vectors_since_last_overflow = pvec1;
 
   node->max_clock_n = node->max_clock > n_clocks ?
     node->max_clock_n : n_vectors;
@@ -681,42 +658,25 @@ vlib_node_runtime_update_stats (vlib_main_t * vm,
 
   r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
 
-  if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick01 < ptick00)
-      || (ptick11 < ptick10) || (pvec1 < pvec0))
+  if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0))
     {
       node->calls_since_last_overflow = ca0;
       node->clocks_since_last_overflow = cl0;
       node->vectors_since_last_overflow = v0;
-      node->perf_counter0_ticks_since_last_overflow = ptick00;
-      node->perf_counter1_ticks_since_last_overflow = ptick10;
-      node->perf_counter_vectors_since_last_overflow = pvec0;
 
-      vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks,
-                                   n_ticks0, n_ticks1);
+      vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
     }
 
   return r;
 }
 
-always_inline void
-vlib_node_runtime_perf_counter (vlib_main_t * vm, u64 * pmc0, u64 * pmc1,
-                               vlib_node_runtime_t * node,
-                               vlib_frame_t * frame, int before_or_after)
-{
-  *pmc0 = 0;
-  *pmc1 = 0;
-  if (PREDICT_FALSE (vec_len (vm->vlib_node_runtime_perf_counter_cbs) != 0))
-    clib_call_callbacks (vm->vlib_node_runtime_perf_counter_cbs, vm, pmc0,
-                        pmc1, node, frame, before_or_after);
-}
-
 always_inline void
 vlib_process_update_stats (vlib_main_t * vm,
                           vlib_process_t * p,
                           uword n_calls, uword n_vectors, uword n_clocks)
 {
   vlib_node_runtime_update_stats (vm, &p->node_runtime,
-                                 n_calls, n_vectors, n_clocks, 0ULL, 0ULL);
+                                 n_calls, n_vectors, n_clocks);
 }
 
 static clib_error_t *
@@ -1166,7 +1126,6 @@ dispatch_node (vlib_main_t * vm,
   u64 t;
   vlib_node_main_t *nm = &vm->node_main;
   vlib_next_frame_t *nf;
-  u64 pmc_before[2], pmc_after[2], pmc_delta[2];
 
   if (CLIB_DEBUG > 0)
     {
@@ -1206,8 +1165,8 @@ dispatch_node (vlib_main_t * vm,
                             last_time_stamp, frame ? frame->n_vectors : 0,
                             /* is_after */ 0);
 
-  vlib_node_runtime_perf_counter (vm, &pmc_before[0], &pmc_before[1],
-                                 node, frame, 0 /* before */ );
+  vlib_node_runtime_perf_counter (vm, node, frame, 0, last_time_stamp,
+                                 VLIB_NODE_RUNTIME_PERF_BEFORE);
 
   /*
    * Turn this on if you run into
@@ -1237,15 +1196,8 @@ dispatch_node (vlib_main_t * vm,
 
   t = clib_cpu_time_now ();
 
-  /*
-   * To validate accounting: pmc_delta = t - pmc_before;
-   * perf ticks should equal clocks/pkt...
-   */
-  vlib_node_runtime_perf_counter (vm, &pmc_after[0], &pmc_after[1], node,
-                                 frame, 1 /* after */ );
-
-  pmc_delta[0] = pmc_after[0] - pmc_before[0];
-  pmc_delta[1] = pmc_after[1] - pmc_before[1];
+  vlib_node_runtime_perf_counter (vm, node, frame, n, t,
+                                 VLIB_NODE_RUNTIME_PERF_AFTER);
 
   vlib_elog_main_loop_event (vm, node->node_index, t, n, 1 /* is_after */ );
 
@@ -1255,9 +1207,7 @@ dispatch_node (vlib_main_t * vm,
   v = vlib_node_runtime_update_stats (vm, node,
                                      /* n_calls */ 1,
                                      /* n_vectors */ n,
-                                     /* n_clocks */ t - last_time_stamp,
-                                     pmc_delta[0] /* PMC0 */ ,
-                                     pmc_delta[1] /* PMC1 */ );
+                                     /* n_clocks */ t - last_time_stamp);
 
   /* When in interrupt mode and vector rate crosses threshold switch to
      polling mode. */
@@ -1579,6 +1529,9 @@ dispatch_process (vlib_main_t * vm,
   old_process_index = nm->current_process_index;
   nm->current_process_index = node->runtime_index;
 
+  vlib_node_runtime_perf_counter (vm, node_runtime, f, 0, last_time_stamp,
+                                 VLIB_NODE_RUNTIME_PERF_BEFORE);
+
   n_vectors = vlib_process_startup (vm, p, f);
 
   nm->current_process_index = old_process_index;
@@ -1618,6 +1571,9 @@ dispatch_process (vlib_main_t * vm,
   vlib_elog_main_loop_event (vm, node_runtime->node_index, t, is_suspend,
                             /* is_after */ 1);
 
+  vlib_node_runtime_perf_counter (vm, node_runtime, f, n_vectors, t,
+                                 VLIB_NODE_RUNTIME_PERF_AFTER);
+
   vlib_process_update_stats (vm, p,
                             /* n_calls */ !is_suspend,
                             /* n_vectors */ n_vectors,
@@ -1668,6 +1624,9 @@ dispatch_suspended_process (vlib_main_t * vm,
   /* Save away current process for suspend. */
   nm->current_process_index = node->runtime_index;
 
+  vlib_node_runtime_perf_counter (vm, node_runtime, f, 0, last_time_stamp,
+                                 VLIB_NODE_RUNTIME_PERF_BEFORE);
+
   n_vectors = vlib_process_resume (vm, p);
   t = clib_cpu_time_now ();
 
@@ -1701,6 +1660,9 @@ dispatch_suspended_process (vlib_main_t * vm,
   vlib_elog_main_loop_event (vm, node_runtime->node_index, t, !is_suspend,
                             /* is_after */ 1);
 
+  vlib_node_runtime_perf_counter (vm, node_runtime, f, n_vectors, t,
+                                 VLIB_NODE_RUNTIME_PERF_AFTER);
+
   vlib_process_update_stats (vm, p,
                             /* n_calls */ !is_suspend,
                             /* n_vectors */ n_vectors,
@@ -1831,11 +1793,14 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
              else
                frame_queue_check_counter--;
            }
-         if (PREDICT_FALSE (vec_len (vm->worker_thread_main_loop_callbacks)))
-           clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm);
        }
 
+      if (PREDICT_FALSE (vec_len (vm->worker_thread_main_loop_callbacks)))
+       clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm,
+                            cpu_time_now);
+
       /* Process pre-input nodes. */
+      cpu_time_now = clib_cpu_time_now ();
       vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
        cpu_time_now = dispatch_node (vm, n,
                                      VLIB_NODE_TYPE_PRE_INPUT,
index 2e070aa..f7a4a1c 100644 (file)
@@ -40,6 +40,7 @@
 #ifndef included_vlib_main_h
 #define included_vlib_main_h
 
+#include <vppinfra/callback_data.h>
 #include <vppinfra/elog.h>
 #include <vppinfra/format.h>
 #include <vppinfra/longjmp.h>
@@ -80,6 +81,42 @@ typedef struct
   u32 trace_filter_set_index;
 } vlib_trace_filter_t;
 
+typedef enum
+{
+  VLIB_NODE_RUNTIME_PERF_BEFORE,
+  VLIB_NODE_RUNTIME_PERF_AFTER,
+  VLIB_NODE_RUNTIME_PERF_RESET,
+} vlib_node_runtime_perf_call_type_t;
+
+typedef struct
+{
+  struct vlib_main_t *vm;
+  vlib_node_runtime_t *node;
+  vlib_frame_t *frame;
+  uword packets;
+  u64 cpu_time_now;
+  vlib_node_runtime_perf_call_type_t call_type;
+} vlib_node_runtime_perf_callback_args_t;
+
+struct vlib_node_runtime_perf_callback_data_t;
+
+typedef void (*vlib_node_runtime_perf_callback_fp_t)
+  (struct vlib_node_runtime_perf_callback_data_t * data,
+   vlib_node_runtime_perf_callback_args_t * args);
+
+typedef struct vlib_node_runtime_perf_callback_data_t
+{
+  vlib_node_runtime_perf_callback_fp_t fp;
+  union
+  {
+    void *v;
+    u64 u;
+  } u[3];
+} vlib_node_runtime_perf_callback_data_t;
+
+clib_callback_data_typedef (vlib_node_runtime_perf_callback_set_t,
+                           vlib_node_runtime_perf_callback_data_t);
+
 typedef struct vlib_main_t
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -112,14 +149,8 @@ typedef struct vlib_main_t
   u32 internal_node_last_vectors_per_main_loop;
 
   /* Main loop hw / sw performance counters */
-  void (**vlib_node_runtime_perf_counter_cbs) (struct vlib_main_t *,
-                                              u64 *, u64 *,
-                                              vlib_node_runtime_t *,
-                                              vlib_frame_t *, int);
-  void (**vlib_node_runtime_perf_counter_cb_tmp) (struct vlib_main_t *,
-                                                 u64 *, u64 *,
-                                                 vlib_node_runtime_t *,
-                                                 vlib_frame_t *, int);
+  vlib_node_runtime_perf_callback_set_t vlib_node_runtime_perf_callbacks;
+
   /* Every so often we switch to the next counter. */
 #define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7
 
@@ -234,9 +265,10 @@ typedef struct vlib_main_t
   u8 **argv;
 
   /* Top of (worker) dispatch loop callback */
-  void (**volatile worker_thread_main_loop_callbacks) (struct vlib_main_t *);
+  void (**volatile worker_thread_main_loop_callbacks)
+    (struct vlib_main_t *, u64 t);
   void (**volatile worker_thread_main_loop_callback_tmp)
-    (struct vlib_main_t *);
+    (struct vlib_main_t *, u64 t);
   clib_spinlock_t worker_thread_main_loop_callback_lock;
 
   /* debugging */
@@ -268,6 +300,12 @@ typedef struct vlib_main_t
   /* Earliest barrier can be closed again */
   f64 barrier_no_close_before;
 
+  /* Barrier counter callback */
+  void (**volatile barrier_perf_callbacks)
+    (struct vlib_main_t *, u64 t, int leave);
+  void (**volatile barrier_perf_callbacks_tmp)
+    (struct vlib_main_t *, u64 t, int leave);
+
   /* Need to check the frame queues */
   volatile uword check_frame_queues;
 
@@ -399,6 +437,27 @@ vlib_last_vectors_per_main_loop (vlib_main_t * vm)
   return vm->internal_node_last_vectors_per_main_loop;
 }
 
+always_inline void
+vlib_node_runtime_perf_counter (vlib_main_t * vm, vlib_node_runtime_t * node,
+                               vlib_frame_t * frame, uword n, u64 t,
+                               vlib_node_runtime_perf_call_type_t call_type)
+{
+  vlib_node_runtime_perf_callback_data_t *v =
+    clib_callback_data_check_and_get (&vm->vlib_node_runtime_perf_callbacks);
+  if (vec_len (v))
+    {
+      vlib_node_runtime_perf_callback_args_t args = {
+       .vm = vm,
+       .node = node,
+       .frame = frame,
+       .packets = n,
+       .cpu_time_now = t,
+       .call_type = call_type,
+      };
+      clib_callback_data_call_vec (v, &args);
+    }
+}
+
 always_inline void vlib_set_queue_signal_callback
   (vlib_main_t * vm, void (*fp) (vlib_main_t *))
 {
index 9c4cadd..f7155ae 100644 (file)
@@ -235,9 +235,6 @@ typedef struct
   u64 calls, vectors, clocks, suspends;
   u64 max_clock;
   u64 max_clock_n;
-  u64 perf_counter0_ticks;
-  u64 perf_counter1_ticks;
-  u64 perf_counter_vectors;
 } vlib_node_stats_t;
 
 #define foreach_vlib_node_state                                        \
@@ -484,10 +481,6 @@ typedef struct vlib_node_runtime_t
   u32 vectors_since_last_overflow;     /**< Number of vector elements
                                          processed by this node. */
 
-  u32 perf_counter0_ticks_since_last_overflow; /**< Perf counter 0 ticks */
-  u32 perf_counter1_ticks_since_last_overflow; /**< Perf counter 1 ticks */
-  u32 perf_counter_vectors_since_last_overflow;        /**< Perf counter vectors */
-
   u32 next_frame_index;                        /**< Start of next frames for this
                                          node. */
 
index 89f2123..dfeba17 100644 (file)
@@ -201,6 +201,10 @@ vlib_node_set_state (vlib_main_t * vm, u32 node_index,
       nm->input_node_counts_by_state[new_state] += 1;
     }
 
+  if (PREDICT_FALSE (r->state == VLIB_NODE_STATE_DISABLED))
+    vlib_node_runtime_perf_counter (vm, r, 0, 0, 0,
+                                   VLIB_NODE_RUNTIME_PERF_RESET);
+
   n->state = new_state;
   r->state = new_state;
 }
index a8c1a1a..4df550e 100644 (file)
@@ -700,6 +700,9 @@ start_workers (vlib_main_t * vm)
     clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
   vm->elog_main.lock[0] = 0;
 
+  clib_callback_data_init (&vm->vlib_node_runtime_perf_callbacks,
+                          &vm->worker_thread_main_loop_callback_lock);
+
   if (n_vlib_mains > 1)
     {
       /* Replace hand-crafted length-1 vector with a real vector */
@@ -734,6 +737,7 @@ start_workers (vlib_main_t * vm)
       vm->barrier_no_close_before = 0;
 
       worker_thread_index = 1;
+      clib_spinlock_init (&vm->worker_thread_main_loop_callback_lock);
 
       for (i = 0; i < vec_len (tm->registrations); i++)
        {
@@ -790,6 +794,11 @@ start_workers (vlib_main_t * vm)
              _vec_len (vm_clone->pending_rpc_requests) = 0;
              clib_memset (&vm_clone->random_buffer, 0,
                           sizeof (vm_clone->random_buffer));
+             clib_spinlock_init
+               (&vm_clone->worker_thread_main_loop_callback_lock);
+             clib_callback_data_init
+               (&vm_clone->vlib_node_runtime_perf_callbacks,
+                &vm_clone->worker_thread_main_loop_callback_lock);
 
              nm = &vlib_mains[0]->node_main;
              nm_clone = &vm_clone->node_main;
@@ -1466,6 +1475,10 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name)
       return;
     }
 
+  if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
+    clib_call_callbacks (vm->barrier_perf_callbacks, vm,
+                        vm->clib_time.last_cpu_time, 0 /* enter */ );
+
   /*
    * Need data to decide if we're working hard enough to honor
    * the barrier hold-down timer.
@@ -1629,6 +1642,9 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 
   barrier_trace_release (t_entry, t_closed_total, t_update_main);
 
+  if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
+    clib_call_callbacks (vm->barrier_perf_callbacks, vm,
+                        vm->clib_time.last_cpu_time, 1 /* leave */ );
 }
 
 /*
index c1188ce..e8d4169 100644 (file)
@@ -16,6 +16,7 @@
 #define included_vlib_threads_h
 
 #include <vlib/main.h>
+#include <vppinfra/callback.h>
 #include <linux/sched.h>
 
 extern vlib_main_t **vlib_mains;
@@ -400,6 +401,10 @@ vlib_worker_thread_barrier_check (void)
       u32 thread_index = vm->thread_index;
       f64 t = vlib_time_now (vm);
 
+      if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
+       clib_call_callbacks (vm->barrier_perf_callbacks, vm,
+                            vm->clib_time.last_cpu_time, 0 /* enter */ );
+
       if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled))
        {
          vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
@@ -498,6 +503,10 @@ vlib_worker_thread_barrier_check (void)
          ed->thread_index = thread_index;
          ed->duration = (int) (1000000.0 * t);
        }
+
+      if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
+       clib_call_callbacks (vm->barrier_perf_callbacks, vm,
+                            vm->clib_time.last_cpu_time, 1 /* leave */ );
     }
 }
 
index 86b1c5a..915ddab 100644 (file)
@@ -224,7 +224,7 @@ typedef struct
 } api_version_t;
 
 /** API main structure, used by both vpp and binary API clients */
-typedef struct
+typedef struct api_main_t
 {
   /** Message handler vector  */
   void (**msg_handlers) (void *);
@@ -374,6 +374,12 @@ typedef struct
   elog_main_t *elog_main;
   int elog_trace_api_messages;
 
+  /** performance counter callback **/
+  void (**perf_counter_cbs)
+    (struct api_main_t *, u32 id, int before_or_after);
+  void (**perf_counter_cbs_tmp)
+    (struct api_main_t *, u32 id, int before_or_after);
+
 } api_main_t;
 
 extern __thread api_main_t *my_api_main;
index caad6e5..5e715d6 100644 (file)
@@ -30,6 +30,7 @@
 #include <vlib/unix/unix.h>
 #include <vlibapi/api.h>
 #include <vppinfra/elog.h>
+#include <vppinfra/callback.h>
 
 /* *INDENT-OFF* */
 api_main_t api_global_main =
@@ -493,7 +494,15 @@ msg_handler_internal (api_main_t * am,
              (*endian_fp) (the_msg);
            }
 
+         if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+           clib_call_callbacks (am->perf_counter_cbs, am, id,
+                                0 /* before */ );
+
          (*am->msg_handlers[id]) (the_msg);
+
+         if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+           clib_call_callbacks (am->perf_counter_cbs, am, id,
+                                1 /* after */ );
          if (!am->is_mp_safe[id])
            vl_msg_api_barrier_release ();
        }
@@ -620,8 +629,13 @@ vl_msg_api_handler_with_vm_node (api_main_t * am, svm_region_t * vlib_rp,
          endian_fp = am->msg_endian_handlers[id];
          (*endian_fp) (the_msg);
        }
+      if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+       clib_call_callbacks (am->perf_counter_cbs, am, id, 0 /* before */ );
 
       (*handler) (the_msg, vm, node);
+
+      if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+       clib_call_callbacks (am->perf_counter_cbs, am, id, 1 /* after */ );
       if (is_private)
        {
          am->vlib_rp = old_vlib_rp;
index dfefdba..6d5b356 100644 (file)
@@ -872,6 +872,8 @@ vnet_register_interface (vnet_main_t * vnm,
       foreach_vlib_main ({
         nrt = vlib_node_get_runtime (this_vlib_main, hw->output_node_index);
         nrt->function = node->function;
+       vlib_node_runtime_perf_counter (this_vlib_main, nrt, 0, 0, 0,
+                                       VLIB_NODE_RUNTIME_PERF_RESET);
       });
       /* *INDENT-ON* */
 
@@ -882,6 +884,8 @@ vnet_register_interface (vnet_main_t * vnm,
       foreach_vlib_main ({
         nrt = vlib_node_get_runtime (this_vlib_main, hw->tx_node_index);
         nrt->function = node->function;
+       vlib_node_runtime_perf_counter (this_vlib_main, nrt, 0, 0, 0,
+                                       VLIB_NODE_RUNTIME_PERF_RESET);
       });
       /* *INDENT-ON* */
 
index a10f335..8648275 100644 (file)
@@ -102,6 +102,7 @@ set(VPPINFRA_HEADERS
   byte_order.h
   cache.h
   callback.h
+  callback_data.h
   clib_error.h
   clib.h
   cpu.h
index 595d69d..a938ea3 100644 (file)
@@ -70,12 +70,11 @@ do {                                                    \
    * Note: fp exists to shut up gcc-6, which            \
    * produces a warning not seen with gcc-7 or 8        \
    */                                                   \
-  void (*fp)(void *a1, ...);                            \
+  typeof (h) h_ = (h);                                  \
   int i;                                                \
-  for (i = 0; i < vec_len (h); i++)                     \
+  for (i = 0; i < vec_len (h_); i++)                    \
     {                                                   \
-      fp = (void *)(h[i]);                              \
-      (*fp) (__VA_ARGS__);                              \
+      (h_[i]) (__VA_ARGS__);                            \
     }                                                   \
  } while (0);
 
diff --git a/src/vppinfra/callback_data.h b/src/vppinfra/callback_data.h
new file mode 100644 (file)
index 0000000..9a1ad0a
--- /dev/null
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file
+ * @brief Callback multiplex scheme
+ */
+
+#ifndef included_callback_data_h
+#define included_callback_data_h
+#include <vppinfra/clib.h>
+
+/** @brief Declare and define a callback set type
+ * @param set_t_ The set type to define
+ * @param cb_t_ The callback type to use
+ */
+#define clib_callback_data_typedef(set_t_, cb_t_)   \
+typedef struct set_t_                               \
+{                                                   \
+  cb_t_* curr;                                      \
+  cb_t_* volatile next;                             \
+  cb_t_* spare;                                     \
+  clib_spinlock_t* lock;                            \
+} set_t_
+
+/** @brief Initialize a callback set
+ * @param set_ The callback set to initialize
+ * @param lock_ The lock to use, if any
+ */
+#define clib_callback_data_init(set_,lock_)  \
+do {                                         \
+  (set_)->lock = (lock_);                    \
+  (set_)->curr = 0;                          \
+  (set_)->next = 0;                          \
+  (set_)->spare = 0;                         \
+} while (0)
+
+/** @brief Add a callback to the specified callback set
+ * @param set_ The callback set
+ * @param value_ The value_ to assign the callback
+ *
+ * Add a callback from the indicated callback set.  If the set is
+ * currently being iterated, then the change will be applied after the
+ * current full iteration, and prior to the next full iteration.
+ */
+#define clib_callback_data_add(set_,value_)          \
+do {                                                 \
+  clib_spinlock_lock_if_init ((set_)->lock);         \
+  typeof ((set_)->next) next_ = (set_)->next;        \
+  if (PREDICT_TRUE (next_ == 0))                     \
+    {                                                \
+      next_ = (set_)->spare;                         \
+      (set_)->spare = 0;                             \
+      vec_append (next_, (set_)->curr);              \
+    }                                                \
+  u32 sz_ = vec_len (next_);                         \
+  vec_validate (next_, sz_);                         \
+  next_[sz_] = (value_);                             \
+  (set_)->next = next_;                              \
+  clib_spinlock_unlock_if_init ((set_)->lock);       \
+} while (0)
+
+/** @brief Remove a callback from the specified callback set
+ * @param set_ The callback set
+ * @param fp_ The current callback function
+ * @return 1 if the function was removed, 0 if not
+ *
+ * Remove a callback from the indicated callback set.  Idempotent.  If
+ * the set is currently being iterated, then the change will be applied
+ * after the current full iteration, and prior to the next full
+ * iteration.
+ */
+#define clib_callback_data_remove(set_,fp_)          \
+({                                                   \
+  int found_ = 0;                                    \
+  clib_spinlock_lock_if_init ((set_)->lock);         \
+  typeof ((set_)->next) next_ = (set_)->next;        \
+  if (PREDICT_TRUE (next_ == 0))                     \
+    {                                                \
+      next_ = (set_)->spare;                         \
+      (set_)->spare = 0;                             \
+      vec_append (next_, (set_)->curr);              \
+    }                                                \
+  u32 sz_ = vec_len (next_);                         \
+  u32 i_;                                            \
+  for (i_ = 0; i_ < sz_; i_++)                       \
+    if (next_[i_].fp == (fp_))                       \
+      {                                              \
+        vec_delete (next_, 1, i_);                   \
+        found_ = 1;                                  \
+        break;                                       \
+      }                                              \
+  (set_)->next = next_;                              \
+  clib_spinlock_unlock_if_init ((set_)->lock);       \
+  found_;                                            \
+})
+
+/** @brief Swap a callback in the specified callback set
+ * @param set_ The callback set
+ * @param fp_ The current callback function
+ * @param value_ The value_ to assign the callback
+ * @return 1 if the function was swapped, 0 if not
+ *
+ * Swap a callback in the indicated callback set.  If the callback is
+ * not found, then nothing is done.  If the set is currently being
+ * iterated, then the change will be applied after the current full
+ * iteration, and prior to the next full iteration.
+ */
+#define clib_callback_data_swap(set_,fp_,value_)     \
+({                                                   \
+  int found_ = 0;                                    \
+  clib_spinlock_lock_if_init ((set_)->lock);         \
+  typeof ((set_)->next) next_ = (set_)->next;        \
+  if (PREDICT_TRUE (next_ == 0))                     \
+    {                                                \
+      next_ = (set_)->spare;                         \
+      (set_)->spare = 0;                             \
+      vec_append (next_, (set_)->curr);              \
+    }                                                \
+  u32 sz_ = vec_len (next_);                         \
+  u32 i_;                                            \
+  for (i_ = 0; i_ < sz_; i_++)                       \
+    if (next_[i_].fp == (fp_))                       \
+      {                                              \
+        next_[i_] = (value_);                        \
+        found_ = 1;                                  \
+        break;                                       \
+      }                                              \
+  (set_)->next = next_;                              \
+  clib_spinlock_unlock_if_init ((set_)->lock);       \
+  found_;                                            \
+})
+
+/** @brief Ensure a callback is in the specified callback set
+ * @param set_ The callback set
+ * @param value_ The value_ to assign the callback
+ * @return 1 if the function was swapped, 0 if not
+ *
+ * Add or swap a callback in the indicated callback set.  If the
+ * callback is already in the set, it is replaced.  If the callback is
+ * not found, then it is added.  If the set is currently being
+ * iterated, then the change will be applied after the current full
+ * iteration, and prior to the next full iteration.
+ */
+#define clib_callback_data_ensure(set_,value_)       \
+do {                                                 \
+  int found_ = 0;                                    \
+  clib_spinlock_lock_if_init ((set_)->lock);         \
+  typeof ((set_)->next) next_ = (set_)->next;        \
+  if (PREDICT_TRUE (next_ == 0))                     \
+    {                                                \
+      next_ = (set_)->spare;                         \
+      (set_)->spare = 0;                             \
+      vec_append (next_, (set_)->curr);              \
+    }                                                \
+  u32 sz_ = vec_len (next_);                         \
+  u32 i_;                                            \
+  for (i_ = 0; i_ < sz_; i_++)                       \
+    if (next_[i_].fp == (value_).fp)                 \
+      {                                              \
+        found_ = 1;                                  \
+        break;                                       \
+      }                                              \
+  if (!found_)                                       \
+    vec_validate (next_, i_);                        \
+  next_[i_] = (value_);                              \
+  (set_)->next = next_;                              \
+  clib_spinlock_unlock_if_init ((set_)->lock);       \
+} while(0)
+
+/** @brief Enable/Disable the specified callback
+ * @param set_ The callback set
+ * @param fp_ The callback function
+ * @param ena_ 1 to enable, 0 to disable
+ *
+ * Enable or disable a callback function, with no data.
+ */
+#define clib_callback_data_enable_disable(set_,fp_,ena_)   \
+do {                                                       \
+  if (ena_)                                                \
+    {                                                      \
+      typeof ((set_)->next[0]) data_ = { .fp = (fp_) };    \
+      clib_callback_data_add ((set_), data_);              \
+    }                                                      \
+  else                                                     \
+    clib_callback_data_remove ((set_), (fp_));             \
+} while (0)
+
+/** @brief Get the value of a callback, if set.
+ * @param set_ The callback set
+ * @param fp_ The callback function
+ * @param v_ Set to the callback's current value
+ * @return 1 if the function is in the set, 0 if not
+ */
+#define clib_callback_data_get_value(set_,fp_,v_)    \
+({                                                   \
+  int found_ = 0;                                    \
+  clib_spinlock_lock_if_init ((set_)->lock);         \
+  typeof ((set_)->next) search_ = (set_)->next;      \
+  if (PREDICT_TRUE (search_ == 0))                   \
+    search_ = (set_)->curr;                          \
+  u32 sz_ = vec_len (search_);                       \
+  u32 i_;                                            \
+  for (i_ = 0; i_ < sz_; i_++)                       \
+    if (search_[i_].fp == (fp_))                     \
+      {                                              \
+        (v_) = search_[i];                           \
+        found_ = 1;                                  \
+        break;                                       \
+      }                                              \
+  clib_spinlock_unlock_if_init ((set_)->lock);       \
+  found_;                                            \
+})
+
+/** @brief Check if callback is set
+ * @param set_ The callback set
+ * @param fp_ The callback function
+ * @return 1 if the function is in the set, 0 if not
+ */
+#define clib_callback_data_is_set(set_,fp_)          \
+({                                                   \
+  int found_ = 0;                                    \
+  clib_spinlock_lock_if_init ((set_)->lock);         \
+  typeof ((set_)->next) search_ = (set_)->next;      \
+  if (PREDICT_TRUE (search_ == 0))                   \
+    search_ = (set_)->curr;                          \
+  u32 sz_ = vec_len (search_);                       \
+  u32 i_;                                            \
+  for (i_ = 0; i_ < sz_; i_++)                       \
+    if (search_[i_].fp == (fp_))                     \
+      {                                              \
+        found_ = 1;                                  \
+        break;                                       \
+      }                                              \
+  clib_spinlock_unlock_if_init ((set_)->lock);       \
+  found_;                                            \
+})
+
+/** @brief Check for and get current callback set
+ * @param set_ the callback set
+ * @param varargs additional callback parameters
+ */
+#define clib_callback_data_check_and_get(set_)       \
+({                                                   \
+  typeof ((set_)->curr) curr_ = (set_)->curr;        \
+  if (PREDICT_FALSE ((set_)->next != 0))             \
+    {                                                \
+      clib_spinlock_lock_if_init ((set_)->lock);     \
+      vec_reset_length (curr_);                      \
+      (set_)->spare = curr_;                         \
+      curr_ = (set_)->next;                          \
+      (set_)->next = 0;                              \
+      if (PREDICT_FALSE (0 == vec_len (curr_)))      \
+        vec_free (curr_);                            \
+      (set_)->curr = curr_;                          \
+      clib_spinlock_unlock_if_init ((set_)->lock);   \
+    }                                                \
+  curr_;                                             \
+})
+
+/** @brief Iterate and call a callback vector
+ * @param vec_ the callback vector
+ * @param varargs additional callback parameters
+ */
+#define clib_callback_data_call_vec(vec_, ...)                     \
+do {                                                               \
+  u32 sz_ = vec_len (vec_);                                        \
+  u32 i_;                                                          \
+  for (i_ = 0; i_ < sz_; i_++)                                     \
+    {                                                              \
+      CLIB_PREFETCH (&vec_[i_+1], CLIB_CACHE_LINE_BYTES, STORE);   \
+      (vec_[i_].fp) (&vec_[i_], __VA_ARGS__);                      \
+    }                                                              \
+} while (0)
+
+/** @brief Call the specified callback set
+ * @param set_ the callback set
+ * @param varargs additional callback parameters
+ */
+#define clib_callback_data_call(set_, ...)                           \
+do {                                                                 \
+  typeof ((set_)->curr) v_ = clib_callback_data_check_and_get(set_); \
+  clib_callback_data_iterate (v_, __VA_ARGS__);                      \
+} while (0)
+
+/** @brief prefetch the callback set
+ * @param set_ The callback set
+ */
+#define clib_callback_data_prefetch(set_)                        \
+do {                                                             \
+  if (PREDICT_FALSE ((set_)->curr))                              \
+    CLIB_PREFETCH ((set_)->curr, CLIB_CACHE_LINE_BYTES, STORE);  \
+} while (0)
+
+
+#endif /* included_callback_data_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */