perfmon plugin: 2-way parallel stat collection
[vpp.git] / src / plugins / perfmon / perfmon_periodic.c
index 4e7e237..ae20ac4 100644 (file)
@@ -31,22 +31,34 @@ perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
   return ret;
 }
 
-static u64
-read_current_perf_counter (vlib_main_t * vm)
+static void
+read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1)
 {
-  if (vm->perf_counter_id)
-    return clib_rdpmc (vm->perf_counter_id);
-  else
+  int i;
+  u64 *cc;
+  perfmon_main_t *pm = &perfmon_main;
+  uword my_thread_index = vm->thread_index;
+
+  *c0 = *c1 = 0;
+
+  for (i = 0; i < pm->n_active; i++)
     {
-      u64 sw_value;
-      if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) !=
-         sizeof (sw_value))
+      cc = (i == 0) ? c0 : c1;
+      if (pm->rdpmc_indices[i][my_thread_index] != ~0)
+       *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]);
+      else
        {
-         clib_unix_warning ("counter read failed, disable collection...");
-         vm->vlib_node_runtime_perf_counter_cb = 0;
-         return 0ULL;
+         u64 sw_value;
+         if (read (pm->pm_fds[i][my_thread_index], &sw_value,
+                   sizeof (sw_value)) != sizeof (sw_value))
+           {
+             clib_unix_warning
+               ("counter read failed, disable collection...");
+             vm->vlib_node_runtime_perf_counter_cb = 0;
+             return;
+           }
+         *cc = sw_value;
        }
-      return sw_value;
     }
 }
 
@@ -80,9 +92,11 @@ clear_counters (perfmon_main_t * pm)
       for (i = 0; i < vec_len (nm->nodes); i++)
        {
          n = nm->nodes[i];
-         n->stats_total.perf_counter_ticks = 0;
+         n->stats_total.perf_counter0_ticks = 0;
+         n->stats_total.perf_counter1_ticks = 0;
          n->stats_total.perf_counter_vectors = 0;
-         n->stats_last_clear.perf_counter_ticks = 0;
+         n->stats_last_clear.perf_counter0_ticks = 0;
+         n->stats_last_clear.perf_counter1_ticks = 0;
          n->stats_last_clear.perf_counter_vectors = 0;
        }
     }
@@ -90,7 +104,7 @@ clear_counters (perfmon_main_t * pm)
 }
 
 static void
-enable_current_event (perfmon_main_t * pm)
+enable_current_events (perfmon_main_t * pm)
 {
   struct perf_event_attr pe;
   int fd;
@@ -98,91 +112,108 @@ enable_current_event (perfmon_main_t * pm)
   perfmon_event_config_t *c;
   vlib_main_t *vm = vlib_get_main ();
   u32 my_thread_index = vm->thread_index;
+  u32 index;
+  int i, limit = 1;
 
-  c = vec_elt_at_index (pm->events_to_collect, pm->current_event);
-
-  memset (&pe, 0, sizeof (struct perf_event_attr));
-  pe.type = c->pe_type;
-  pe.size = sizeof (struct perf_event_attr);
-  pe.config = c->pe_config;
-  pe.disabled = 1;
-  pe.pinned = 1;
-  /*
-   * Note: excluding the kernel makes the
-   * (software) context-switch counter read 0...
-   */
-  if (pe.type != PERF_TYPE_SOFTWARE)
-    {
-      /* Exclude kernel and hypervisor */
-      pe.exclude_kernel = 1;
-      pe.exclude_hv = 1;
-    }
+  if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect))
+    limit = 2;
 
-  fd = perf_event_open (&pe, 0, -1, -1, 0);
-  if (fd == -1)
+  for (i = 0; i < limit; i++)
     {
-      clib_unix_warning ("event open: type %d config %d", c->pe_type,
-                        c->pe_config);
-      return;
-    }
+      c = vec_elt_at_index (pm->single_events_to_collect,
+                           pm->current_event + i);
+
+      memset (&pe, 0, sizeof (struct perf_event_attr));
+      pe.type = c->pe_type;
+      pe.size = sizeof (struct perf_event_attr);
+      pe.config = c->pe_config;
+      pe.disabled = 1;
+      pe.pinned = 1;
+      /*
+       * Note: excluding the kernel makes the
+       * (software) context-switch counter read 0...
+       */
+      if (pe.type != PERF_TYPE_SOFTWARE)
+       {
+         /* Exclude kernel and hypervisor */
+         pe.exclude_kernel = 1;
+         pe.exclude_hv = 1;
+       }
 
-  if (pe.type != PERF_TYPE_SOFTWARE)
-    {
-      p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
-      if (p == MAP_FAILED)
+      fd = perf_event_open (&pe, 0, -1, -1, 0);
+      if (fd == -1)
        {
-         clib_unix_warning ("mmap");
-         close (fd);
+         clib_unix_warning ("event open: type %d config %d", c->pe_type,
+                            c->pe_config);
          return;
        }
-    }
 
-  if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
-    clib_unix_warning ("reset ioctl");
+      if (pe.type != PERF_TYPE_SOFTWARE)
+       {
+         p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
+         if (p == MAP_FAILED)
+           {
+             clib_unix_warning ("mmap");
+             close (fd);
+             return;
+           }
+       }
+      else
+       p = 0;
+
+      /*
+       * Software event counters - and others not capable of being
+       * read via the "rdpmc" instruction - will be read
+       * by system calls.
+       */
+      if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0)
+       index = ~0;
+      else
+       index = p->index - 1;
 
-  if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
-    clib_unix_warning ("enable ioctl");
+      if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
+       clib_unix_warning ("reset ioctl");
 
-  /*
-   * Software event counters - and others not capable of being
-   * read via the "rdpmc" instruction - will be read
-   * by system calls.
-   */
-  if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0)
-    pm->rdpmc_indices[my_thread_index] = 0;
-  else                         /* use rdpmc instrs */
-    pm->rdpmc_indices[my_thread_index] = p->index - 1;
-  pm->perf_event_pages[my_thread_index] = (void *) p;
+      if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
+       clib_unix_warning ("enable ioctl");
 
-  pm->pm_fds[my_thread_index] = fd;
+      pm->rdpmc_indices[i][my_thread_index] = index;
+      pm->perf_event_pages[i][my_thread_index] = (void *) p;
+      pm->pm_fds[i][my_thread_index] = fd;
+    }
 
+  pm->n_active = i;
   /* Enable the main loop counter snapshot mechanism */
-  vm->perf_counter_id = pm->rdpmc_indices[my_thread_index];
-  vm->perf_counter_fd = fd;
-  vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter;
+  vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counters;
 }
 
 static void
-disable_event (perfmon_main_t * pm)
+disable_events (perfmon_main_t * pm)
 {
   vlib_main_t *vm = vlib_get_main ();
   u32 my_thread_index = vm->thread_index;
-
-  if (pm->pm_fds[my_thread_index] == 0)
-    return;
+  int i;
 
   /* Stop main loop collection */
   vm->vlib_node_runtime_perf_counter_cb = 0;
 
-  if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0)
-    clib_unix_warning ("disable ioctl");
+  for (i = 0; i < pm->n_active; i++)
+    {
+      if (pm->pm_fds[i][my_thread_index] == 0)
+       continue;
+
+      if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) <
+         0)
+       clib_unix_warning ("disable ioctl");
 
-  if (pm->perf_event_pages[my_thread_index])
-    if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0)
-      clib_unix_warning ("munmap");
+      if (pm->perf_event_pages[i][my_thread_index])
+       if (munmap (pm->perf_event_pages[i][my_thread_index],
+                   pm->page_size) < 0)
+         clib_unix_warning ("munmap");
 
-  (void) close (pm->pm_fds[my_thread_index]);
-  pm->pm_fds[my_thread_index] = 0;
+      (void) close (pm->pm_fds[i][my_thread_index]);
+      pm->pm_fds[i][my_thread_index] = 0;
+    }
 }
 
 static void
@@ -190,7 +221,7 @@ worker_thread_start_event (vlib_main_t * vm)
 {
   perfmon_main_t *pm = &perfmon_main;
 
-  enable_current_event (pm);
+  enable_current_events (pm);
   vm->worker_thread_main_loop_callback = 0;
 }
 
@@ -198,7 +229,7 @@ static void
 worker_thread_stop_event (vlib_main_t * vm)
 {
   perfmon_main_t *pm = &perfmon_main;
-  disable_event (pm);
+  disable_events (pm);
   vm->worker_thread_main_loop_callback = 0;
 }
 
@@ -207,7 +238,7 @@ start_event (perfmon_main_t * pm, f64 now, uword event_data)
 {
   int i;
   pm->current_event = 0;
-  if (vec_len (pm->events_to_collect) == 0)
+  if (vec_len (pm->single_events_to_collect) == 0)
     {
       pm->state = PERFMON_STATE_OFF;
       return;
@@ -216,7 +247,7 @@ start_event (perfmon_main_t * pm, f64 now, uword event_data)
   clear_counters (pm);
 
   /* Start collection on this thread */
-  enable_current_event (pm);
+  enable_current_events (pm);
 
   /* And also on worker threads */
   for (i = 1; i < vec_len (vlib_mains); i++)
@@ -231,7 +262,7 @@ start_event (perfmon_main_t * pm, f64 now, uword event_data)
 void
 scrape_and_clear_counters (perfmon_main_t * pm)
 {
-  int i, j;
+  int i, j, k;
   vlib_main_t *vm = pm->vlib_main;
   vlib_main_t *stat_vm;
   vlib_node_main_t *nm;
@@ -242,7 +273,6 @@ scrape_and_clear_counters (perfmon_main_t * pm)
   perfmon_event_config_t *current_event;
   uword *p;
   u8 *counter_name;
-  u64 counter_value;
   u64 vectors_this_counter;
 
   /* snapshoot the nodes, including pm counters */
@@ -272,17 +302,17 @@ scrape_and_clear_counters (perfmon_main_t * pm)
          n = nm->nodes[i];
          nodes[i] = clib_mem_alloc (sizeof (*n));
          clib_memcpy_fast (nodes[i], n, sizeof (*n));
-         n->stats_total.perf_counter_ticks = 0;
+         n->stats_total.perf_counter0_ticks = 0;
+         n->stats_total.perf_counter1_ticks = 0;
          n->stats_total.perf_counter_vectors = 0;
-         n->stats_last_clear.perf_counter_ticks = 0;
+         n->stats_last_clear.perf_counter0_ticks = 0;
+         n->stats_last_clear.perf_counter1_ticks = 0;
          n->stats_last_clear.perf_counter_vectors = 0;
        }
     }
 
   vlib_worker_thread_barrier_release (vm);
 
-  current_event = pm->events_to_collect + pm->current_event;
-
   for (j = 0; j < vec_len (vlib_mains); j++)
     {
       stat_vm = vlib_mains[j];
@@ -296,38 +326,69 @@ scrape_and_clear_counters (perfmon_main_t * pm)
          u8 *capture_name;
 
          n = nodes[i];
-         if (n->stats_total.perf_counter_ticks == 0)
-           {
-             clib_mem_free (n);
-             continue;
-           }
-
-         capture_name = format (0, "t%d-%v%c", j, n->name, 0);
 
-         p = hash_get_mem (pm->capture_by_thread_and_node_name,
-                           capture_name);
+         if (n->stats_total.perf_counter0_ticks == 0 &&
+             n->stats_total.perf_counter1_ticks == 0)
+           goto skip_this_node;
 
-         if (p == 0)
+         for (k = 0; k < 2; k++)
            {
-             pool_get (pm->capture_pool, c);
-             memset (c, 0, sizeof (*c));
-             c->thread_and_node_name = capture_name;
-             hash_set_mem (pm->capture_by_thread_and_node_name,
-                           capture_name, c - pm->capture_pool);
+             u64 counter_value, counter_last_clear;
+
+             /*
+              * We collect 2 counters at once, except for the
+              * last counter when the user asks for an odd number of
+              * counters
+              */
+             if ((pm->current_event + k)
+                 >= vec_len (pm->single_events_to_collect))
+               break;
+
+             if (k == 0)
+               {
+                 counter_value = n->stats_total.perf_counter0_ticks;
+                 counter_last_clear =
+                   n->stats_last_clear.perf_counter0_ticks;
+               }
+             else
+               {
+                 counter_value = n->stats_total.perf_counter1_ticks;
+                 counter_last_clear =
+                   n->stats_last_clear.perf_counter1_ticks;
+               }
+
+             capture_name = format (0, "t%d-%v%c", j, n->name, 0);
+
+             p = hash_get_mem (pm->capture_by_thread_and_node_name,
+                               capture_name);
+
+             if (p == 0)
+               {
+                 pool_get (pm->capture_pool, c);
+                 memset (c, 0, sizeof (*c));
+                 c->thread_and_node_name = capture_name;
+                 hash_set_mem (pm->capture_by_thread_and_node_name,
+                               capture_name, c - pm->capture_pool);
+               }
+             else
+               {
+                 c = pool_elt_at_index (pm->capture_pool, p[0]);
+                 vec_free (capture_name);
+               }
+
+             /* Snapshoot counters, etc. into the capture */
+             current_event = pm->single_events_to_collect
+               + pm->current_event + k;
+             counter_name = (u8 *) current_event->name;
+             vectors_this_counter = n->stats_total.perf_counter_vectors -
+               n->stats_last_clear.perf_counter_vectors;
+
+             vec_add1 (c->counter_names, counter_name);
+             vec_add1 (c->counter_values,
+                       counter_value - counter_last_clear);
+             vec_add1 (c->vectors_this_counter, vectors_this_counter);
            }
-         else
-           c = pool_elt_at_index (pm->capture_pool, p[0]);
-
-         /* Snapshoot counters, etc. into the capture */
-         counter_name = (u8 *) current_event->name;
-         counter_value = n->stats_total.perf_counter_ticks -
-           n->stats_last_clear.perf_counter_ticks;
-         vectors_this_counter = n->stats_total.perf_counter_vectors -
-           n->stats_last_clear.perf_counter_vectors;
-
-         vec_add1 (c->counter_names, counter_name);
-         vec_add1 (c->counter_values, counter_value);
-         vec_add1 (c->vectors_this_counter, vectors_this_counter);
+       skip_this_node:
          clib_mem_free (n);
        }
       vec_free (nodes);
@@ -339,7 +400,7 @@ static void
 handle_timeout (perfmon_main_t * pm, f64 now)
 {
   int i;
-  disable_event (pm);
+  disable_events (pm);
 
   /* And also on worker threads */
   for (i = 1; i < vec_len (vlib_mains); i++)
@@ -354,14 +415,14 @@ handle_timeout (perfmon_main_t * pm, f64 now)
   if (i > 1)
     vlib_process_suspend (pm->vlib_main, 1e-3);
   scrape_and_clear_counters (pm);
-  pm->current_event++;
-  if (pm->current_event >= vec_len (pm->events_to_collect))
+  pm->current_event += pm->n_active;
+  if (pm->current_event >= vec_len (pm->single_events_to_collect))
     {
       pm->current_event = 0;
       pm->state = PERFMON_STATE_OFF;
       return;
     }
-  enable_current_event (pm);
+  enable_current_events (pm);
 
   /* And also on worker threads */
   for (i = 1; i < vec_len (vlib_mains); i++)