misc: asan: do not poison memory after munmap()
[vpp.git] / src / plugins / perfmon / perfmon_periodic.c
index 4e7e237..37d669b 100644 (file)
@@ -21,6 +21,7 @@
 #include <asm/unistd.h>
 #include <sys/ioctl.h>
 
+/* "not in glibc" */
 static long
 perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
                 int group_fd, unsigned long flags)
@@ -31,22 +32,42 @@ perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
   return ret;
 }
 
-static u64
-read_current_perf_counter (vlib_main_t * vm)
+static void
+read_current_perf_counters (vlib_main_t * vm, u64 * c0, u64 * c1,
+                           vlib_node_runtime_t * node,
+                           vlib_frame_t * frame, int before_or_after)
 {
-  if (vm->perf_counter_id)
-    return clib_rdpmc (vm->perf_counter_id);
-  else
+  int i;
+  u64 *cc;
+  perfmon_main_t *pm = &perfmon_main;
+  uword my_thread_index = vm->thread_index;
+
+  *c0 = *c1 = 0;
+
+  for (i = 0; i < pm->n_active; i++)
     {
-      u64 sw_value;
-      if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) !=
-         sizeof (sw_value))
+      cc = (i == 0) ? c0 : c1;
+      if (pm->rdpmc_indices[i][my_thread_index] != ~0)
+       *cc = clib_rdpmc ((int) pm->rdpmc_indices[i][my_thread_index]);
+      else
        {
-         clib_unix_warning ("counter read failed, disable collection...");
-         vm->vlib_node_runtime_perf_counter_cb = 0;
-         return 0ULL;
+         u64 sw_value;
+         int read_result;
+         if ((read_result = read (pm->pm_fds[i][my_thread_index], &sw_value,
+                                  sizeof (sw_value)) != sizeof (sw_value)))
+           {
+             clib_unix_warning
+               ("counter read returned %d, expected %d",
+                read_result, sizeof (sw_value));
+             clib_callback_enable_disable
+               (vm->vlib_node_runtime_perf_counter_cbs,
+                vm->vlib_node_runtime_perf_counter_cb_tmp,
+                vm->worker_thread_main_loop_callback_lock,
+                read_current_perf_counters, 0 /* enable */ );
+             return;
+           }
+         *cc = sw_value;
        }
-      return sw_value;
     }
 }
 
@@ -80,9 +101,11 @@ clear_counters (perfmon_main_t * pm)
       for (i = 0; i < vec_len (nm->nodes); i++)
        {
          n = nm->nodes[i];
-         n->stats_total.perf_counter_ticks = 0;
+         n->stats_total.perf_counter0_ticks = 0;
+         n->stats_total.perf_counter1_ticks = 0;
          n->stats_total.perf_counter_vectors = 0;
-         n->stats_last_clear.perf_counter_ticks = 0;
+         n->stats_last_clear.perf_counter0_ticks = 0;
+         n->stats_last_clear.perf_counter1_ticks = 0;
          n->stats_last_clear.perf_counter_vectors = 0;
        }
     }
@@ -90,7 +113,7 @@ clear_counters (perfmon_main_t * pm)
 }
 
 static void
-enable_current_event (perfmon_main_t * pm)
+enable_current_events (perfmon_main_t * pm)
 {
   struct perf_event_attr pe;
   int fd;
@@ -98,91 +121,140 @@ enable_current_event (perfmon_main_t * pm)
   perfmon_event_config_t *c;
   vlib_main_t *vm = vlib_get_main ();
   u32 my_thread_index = vm->thread_index;
+  u32 index;
+  int i, limit = 1;
+  int cpu;
 
-  c = vec_elt_at_index (pm->events_to_collect, pm->current_event);
+  if ((pm->current_event + 1) < vec_len (pm->single_events_to_collect))
+    limit = 2;
 
-  memset (&pe, 0, sizeof (struct perf_event_attr));
-  pe.type = c->pe_type;
-  pe.size = sizeof (struct perf_event_attr);
-  pe.config = c->pe_config;
-  pe.disabled = 1;
-  pe.pinned = 1;
-  /*
-   * Note: excluding the kernel makes the
-   * (software) context-switch counter read 0...
-   */
-  if (pe.type != PERF_TYPE_SOFTWARE)
+  for (i = 0; i < limit; i++)
     {
-      /* Exclude kernel and hypervisor */
-      pe.exclude_kernel = 1;
-      pe.exclude_hv = 1;
-    }
+      vec_validate (pm->pm_fds[i], vec_len (vlib_mains) - 1);
+      vec_validate (pm->perf_event_pages[i], vec_len (vlib_mains) - 1);
+      vec_validate (pm->rdpmc_indices[i], vec_len (vlib_mains) - 1);
+
+      c = vec_elt_at_index (pm->single_events_to_collect,
+                           pm->current_event + i);
+
+      memset (&pe, 0, sizeof (struct perf_event_attr));
+      pe.type = c->pe_type;
+      pe.size = sizeof (struct perf_event_attr);
+      pe.config = c->pe_config;
+      pe.disabled = 1;
+      pe.pinned = 1;
+      /*
+       * Note: excluding the kernel makes the
+       * (software) context-switch counter read 0...
+       */
+      if (pe.type != PERF_TYPE_SOFTWARE)
+       {
+         /* Exclude kernel and hypervisor */
+         pe.exclude_kernel = 1;
+         pe.exclude_hv = 1;
+       }
 
-  fd = perf_event_open (&pe, 0, -1, -1, 0);
-  if (fd == -1)
-    {
-      clib_unix_warning ("event open: type %d config %d", c->pe_type,
-                        c->pe_config);
-      return;
-    }
+      cpu = vm->cpu_id;
 
-  if (pe.type != PERF_TYPE_SOFTWARE)
-    {
-      p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
-      if (p == MAP_FAILED)
+      fd = perf_event_open (&pe, 0, cpu, -1, 0);
+      if (fd == -1)
        {
-         clib_unix_warning ("mmap");
-         close (fd);
+         clib_unix_warning ("event open: type %d config %d", c->pe_type,
+                            c->pe_config);
          return;
        }
-    }
 
-  if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
-    clib_unix_warning ("reset ioctl");
+      if (pe.type != PERF_TYPE_SOFTWARE)
+       {
+         p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
+         if (p == MAP_FAILED)
+           {
+             clib_unix_warning ("mmap");
+             close (fd);
+             return;
+           }
+         CLIB_MEM_UNPOISON (p, pm->page_size);
+       }
+      else
+       p = 0;
 
-  if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
-    clib_unix_warning ("enable ioctl");
+      if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
+       clib_unix_warning ("reset ioctl");
+
+      if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
+       clib_unix_warning ("enable ioctl");
+
+      pm->perf_event_pages[i][my_thread_index] = (void *) p;
+      pm->pm_fds[i][my_thread_index] = fd;
+    }
 
   /*
-   * Software event counters - and others not capable of being
-   * read via the "rdpmc" instruction - will be read
-   * by system calls.
+   * Hardware events must be all opened and enabled before aquiring
+   * pmc indices, otherwise the pmc indices might be out-dated.
    */
-  if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0)
-    pm->rdpmc_indices[my_thread_index] = 0;
-  else                         /* use rdpmc instrs */
-    pm->rdpmc_indices[my_thread_index] = p->index - 1;
-  pm->perf_event_pages[my_thread_index] = (void *) p;
+  for (i = 0; i < limit; i++)
+    {
+      p =
+       (struct perf_event_mmap_page *)
+       pm->perf_event_pages[i][my_thread_index];
+
+      /*
+       * Software event counters - and others not capable of being
+       * read via the "rdpmc" instruction - will be read
+       * by system calls.
+       */
+      if (p == 0 || p->cap_user_rdpmc == 0)
+       index = ~0;
+      else
+       index = p->index - 1;
 
-  pm->pm_fds[my_thread_index] = fd;
+      pm->rdpmc_indices[i][my_thread_index] = index;
+    }
 
+  pm->n_active = i;
   /* Enable the main loop counter snapshot mechanism */
-  vm->perf_counter_id = pm->rdpmc_indices[my_thread_index];
-  vm->perf_counter_fd = fd;
-  vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter;
+  clib_callback_enable_disable
+    (vm->vlib_node_runtime_perf_counter_cbs,
+     vm->vlib_node_runtime_perf_counter_cb_tmp,
+     vm->worker_thread_main_loop_callback_lock,
+     read_current_perf_counters, 1 /* enable */ );
 }
 
 static void
-disable_event (perfmon_main_t * pm)
+disable_events (perfmon_main_t * pm)
 {
   vlib_main_t *vm = vlib_get_main ();
   u32 my_thread_index = vm->thread_index;
-
-  if (pm->pm_fds[my_thread_index] == 0)
-    return;
+  int i;
 
   /* Stop main loop collection */
-  vm->vlib_node_runtime_perf_counter_cb = 0;
+  clib_callback_enable_disable
+    (vm->vlib_node_runtime_perf_counter_cbs,
+     vm->vlib_node_runtime_perf_counter_cb_tmp,
+     vm->worker_thread_main_loop_callback_lock,
+     read_current_perf_counters, 0 /* enable */ );
+
+  for (i = 0; i < pm->n_active; i++)
+    {
+      if (pm->pm_fds[i][my_thread_index] == 0)
+       continue;
 
-  if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0)
-    clib_unix_warning ("disable ioctl");
+      if (ioctl (pm->pm_fds[i][my_thread_index], PERF_EVENT_IOC_DISABLE, 0) <
+         0)
+       clib_unix_warning ("disable ioctl");
 
-  if (pm->perf_event_pages[my_thread_index])
-    if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0)
-      clib_unix_warning ("munmap");
+      if (pm->perf_event_pages[i][my_thread_index])
+       {
+         if (munmap (pm->perf_event_pages[i][my_thread_index],
+                     pm->page_size) < 0)
+           clib_unix_warning ("munmap");
+         pm->perf_event_pages[i][my_thread_index] = 0;
+       }
 
-  (void) close (pm->pm_fds[my_thread_index]);
-  pm->pm_fds[my_thread_index] = 0;
+      (void) close (pm->pm_fds[i][my_thread_index]);
+      pm->pm_fds[i][my_thread_index] = 0;
+
+    }
 }
 
 static void
@@ -190,48 +262,70 @@ worker_thread_start_event (vlib_main_t * vm)
 {
   perfmon_main_t *pm = &perfmon_main;
 
-  enable_current_event (pm);
-  vm->worker_thread_main_loop_callback = 0;
+  clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks,
+                               vm->worker_thread_main_loop_callback_tmp,
+                               vm->worker_thread_main_loop_callback_lock,
+                               worker_thread_start_event, 0 /* enable */ );
+  enable_current_events (pm);
 }
 
 static void
 worker_thread_stop_event (vlib_main_t * vm)
 {
   perfmon_main_t *pm = &perfmon_main;
-  disable_event (pm);
-  vm->worker_thread_main_loop_callback = 0;
+  clib_callback_enable_disable (vm->worker_thread_main_loop_callbacks,
+                               vm->worker_thread_main_loop_callback_tmp,
+                               vm->worker_thread_main_loop_callback_lock,
+                               worker_thread_stop_event, 0 /* enable */ );
+  disable_events (pm);
 }
 
 static void
 start_event (perfmon_main_t * pm, f64 now, uword event_data)
 {
   int i;
+  int last_set;
+  int all = 0;
   pm->current_event = 0;
-  if (vec_len (pm->events_to_collect) == 0)
+
+  if (vec_len (pm->single_events_to_collect) == 0)
     {
       pm->state = PERFMON_STATE_OFF;
       return;
     }
+
+  last_set = clib_bitmap_last_set (pm->thread_bitmap);
+  all = (last_set == ~0);
+
   pm->state = PERFMON_STATE_RUNNING;
   clear_counters (pm);
 
-  /* Start collection on this thread */
-  enable_current_event (pm);
+  /* Start collection on thread 0? */
+  if (all || clib_bitmap_get (pm->thread_bitmap, 0))
+    {
+      /* Start collection on this thread */
+      enable_current_events (pm);
+    }
 
   /* And also on worker threads */
   for (i = 1; i < vec_len (vlib_mains); i++)
     {
       if (vlib_mains[i] == 0)
        continue;
-      vlib_mains[i]->worker_thread_main_loop_callback = (void *)
-       worker_thread_start_event;
+
+      if (all || clib_bitmap_get (pm->thread_bitmap, i))
+       clib_callback_enable_disable
+         (vlib_mains[i]->worker_thread_main_loop_callbacks,
+          vlib_mains[i]->worker_thread_main_loop_callback_tmp,
+          vlib_mains[i]->worker_thread_main_loop_callback_lock,
+          (void *) worker_thread_start_event, 1 /* enable */ );
     }
 }
 
 void
 scrape_and_clear_counters (perfmon_main_t * pm)
 {
-  int i, j;
+  int i, j, k;
   vlib_main_t *vm = pm->vlib_main;
   vlib_main_t *stat_vm;
   vlib_node_main_t *nm;
@@ -242,7 +336,6 @@ scrape_and_clear_counters (perfmon_main_t * pm)
   perfmon_event_config_t *current_event;
   uword *p;
   u8 *counter_name;
-  u64 counter_value;
   u64 vectors_this_counter;
 
   /* snapshoot the nodes, including pm counters */
@@ -272,17 +365,17 @@ scrape_and_clear_counters (perfmon_main_t * pm)
          n = nm->nodes[i];
          nodes[i] = clib_mem_alloc (sizeof (*n));
          clib_memcpy_fast (nodes[i], n, sizeof (*n));
-         n->stats_total.perf_counter_ticks = 0;
+         n->stats_total.perf_counter0_ticks = 0;
+         n->stats_total.perf_counter1_ticks = 0;
          n->stats_total.perf_counter_vectors = 0;
-         n->stats_last_clear.perf_counter_ticks = 0;
+         n->stats_last_clear.perf_counter0_ticks = 0;
+         n->stats_last_clear.perf_counter1_ticks = 0;
          n->stats_last_clear.perf_counter_vectors = 0;
        }
     }
 
   vlib_worker_thread_barrier_release (vm);
 
-  current_event = pm->events_to_collect + pm->current_event;
-
   for (j = 0; j < vec_len (vlib_mains); j++)
     {
       stat_vm = vlib_mains[j];
@@ -296,38 +389,69 @@ scrape_and_clear_counters (perfmon_main_t * pm)
          u8 *capture_name;
 
          n = nodes[i];
-         if (n->stats_total.perf_counter_ticks == 0)
-           {
-             clib_mem_free (n);
-             continue;
-           }
-
-         capture_name = format (0, "t%d-%v%c", j, n->name, 0);
 
-         p = hash_get_mem (pm->capture_by_thread_and_node_name,
-                           capture_name);
+         if (n->stats_total.perf_counter0_ticks == 0 &&
+             n->stats_total.perf_counter1_ticks == 0)
+           goto skip_this_node;
 
-         if (p == 0)
+         for (k = 0; k < 2; k++)
            {
-             pool_get (pm->capture_pool, c);
-             memset (c, 0, sizeof (*c));
-             c->thread_and_node_name = capture_name;
-             hash_set_mem (pm->capture_by_thread_and_node_name,
-                           capture_name, c - pm->capture_pool);
+             u64 counter_value, counter_last_clear;
+
+             /*
+              * We collect 2 counters at once, except for the
+              * last counter when the user asks for an odd number of
+              * counters
+              */
+             if ((pm->current_event + k)
+                 >= vec_len (pm->single_events_to_collect))
+               break;
+
+             if (k == 0)
+               {
+                 counter_value = n->stats_total.perf_counter0_ticks;
+                 counter_last_clear =
+                   n->stats_last_clear.perf_counter0_ticks;
+               }
+             else
+               {
+                 counter_value = n->stats_total.perf_counter1_ticks;
+                 counter_last_clear =
+                   n->stats_last_clear.perf_counter1_ticks;
+               }
+
+             capture_name = format (0, "t%d-%v%c", j, n->name, 0);
+
+             p = hash_get_mem (pm->capture_by_thread_and_node_name,
+                               capture_name);
+
+             if (p == 0)
+               {
+                 pool_get (pm->capture_pool, c);
+                 memset (c, 0, sizeof (*c));
+                 c->thread_and_node_name = capture_name;
+                 hash_set_mem (pm->capture_by_thread_and_node_name,
+                               capture_name, c - pm->capture_pool);
+               }
+             else
+               {
+                 c = pool_elt_at_index (pm->capture_pool, p[0]);
+                 vec_free (capture_name);
+               }
+
+             /* Snapshoot counters, etc. into the capture */
+             current_event = pm->single_events_to_collect
+               + pm->current_event + k;
+             counter_name = (u8 *) current_event->name;
+             vectors_this_counter = n->stats_total.perf_counter_vectors -
+               n->stats_last_clear.perf_counter_vectors;
+
+             vec_add1 (c->counter_names, counter_name);
+             vec_add1 (c->counter_values,
+                       counter_value - counter_last_clear);
+             vec_add1 (c->vectors_this_counter, vectors_this_counter);
            }
-         else
-           c = pool_elt_at_index (pm->capture_pool, p[0]);
-
-         /* Snapshoot counters, etc. into the capture */
-         counter_name = (u8 *) current_event->name;
-         counter_value = n->stats_total.perf_counter_ticks -
-           n->stats_last_clear.perf_counter_ticks;
-         vectors_this_counter = n->stats_total.perf_counter_vectors -
-           n->stats_last_clear.perf_counter_vectors;
-
-         vec_add1 (c->counter_names, counter_name);
-         vec_add1 (c->counter_values, counter_value);
-         vec_add1 (c->vectors_this_counter, vectors_this_counter);
+       skip_this_node:
          clib_mem_free (n);
        }
       vec_free (nodes);
@@ -336,40 +460,75 @@ scrape_and_clear_counters (perfmon_main_t * pm)
 }
 
 static void
-handle_timeout (perfmon_main_t * pm, f64 now)
+handle_timeout (vlib_main_t * vm, perfmon_main_t * pm, f64 now)
 {
   int i;
-  disable_event (pm);
+  int last_set, all;
+
+  last_set = clib_bitmap_last_set (pm->thread_bitmap);
+  all = (last_set == ~0);
+
+  if (all || clib_bitmap_get (pm->thread_bitmap, 0))
+    disable_events (pm);
 
   /* And also on worker threads */
   for (i = 1; i < vec_len (vlib_mains); i++)
     {
       if (vlib_mains[i] == 0)
        continue;
-      vlib_mains[i]->worker_thread_main_loop_callback = (void *)
-       worker_thread_stop_event;
+      if (all || clib_bitmap_get (pm->thread_bitmap, i))
+       clib_callback_enable_disable
+         (vlib_mains[i]->worker_thread_main_loop_callbacks,
+          vlib_mains[i]->worker_thread_main_loop_callback_tmp,
+          vlib_mains[i]->worker_thread_main_loop_callback_lock,
+          (void *) worker_thread_stop_event, 1 /* enable */ );
     }
 
-  /* Short delay to make sure workers have stopped collection */
+  /* Make sure workers have stopped collection */
   if (i > 1)
-    vlib_process_suspend (pm->vlib_main, 1e-3);
+    {
+      f64 deadman = vlib_time_now (vm) + 1.0;
+
+      for (i = 1; i < vec_len (vlib_mains); i++)
+       {
+         /* Has the worker actually stopped collecting data? */
+         while (clib_callback_is_set
+                (vlib_mains[i]->worker_thread_main_loop_callbacks,
+                 vlib_mains[i]->worker_thread_main_loop_callback_lock,
+                 read_current_perf_counters))
+           {
+             if (vlib_time_now (vm) > deadman)
+               {
+                 clib_warning ("Thread %d deadman timeout!", i);
+                 break;
+               }
+             vlib_process_suspend (pm->vlib_main, 1e-3);
+           }
+       }
+    }
   scrape_and_clear_counters (pm);
-  pm->current_event++;
-  if (pm->current_event >= vec_len (pm->events_to_collect))
+  pm->current_event += pm->n_active;
+  if (pm->current_event >= vec_len (pm->single_events_to_collect))
     {
       pm->current_event = 0;
       pm->state = PERFMON_STATE_OFF;
       return;
     }
-  enable_current_event (pm);
+
+  if (all || clib_bitmap_get (pm->thread_bitmap, 0))
+    enable_current_events (pm);
 
   /* And also on worker threads */
   for (i = 1; i < vec_len (vlib_mains); i++)
     {
       if (vlib_mains[i] == 0)
        continue;
-      vlib_mains[i]->worker_thread_main_loop_callback = (void *)
-       worker_thread_start_event;
+      if (all || clib_bitmap_get (pm->thread_bitmap, i))
+       clib_callback_enable_disable
+         (vlib_mains[i]->worker_thread_main_loop_callbacks,
+          vlib_mains[i]->worker_thread_main_loop_callback_tmp,
+          vlib_mains[i]->worker_thread_main_loop_callback_lock,
+          worker_thread_start_event, 1 /* enable */ );
     }
 }
 
@@ -403,7 +562,7 @@ perfmon_periodic_process (vlib_main_t * vm,
 
          /* Handle timeout */
        case ~0:
-         handle_timeout (pm, now);
+         handle_timeout (vm, pm, now);
          break;
 
        default: