X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fmain.c;h=964feb06c6540ba2f911d42fd7b600c325226e3b;hb=178cf493d009995b28fdf220f04c98860ff79a9b;hp=6783068b42bf59f7b17e4b16b9bc25d8bef25de5;hpb=e3248989586ade29baba635aae66b06995917221;p=vpp.git

diff --git a/src/vlib/main.c b/src/vlib/main.c
index 6783068b42b..964feb06c65 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -143,7 +143,7 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
 
   /* Poison frame when debugging. */
   if (CLIB_DEBUG > 0)
-    memset (f, 0xfe, n);
+    clib_memset (f, 0xfe, n);
 
   /* Insert magic number. */
   {
@@ -153,10 +153,11 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
     *magic = VLIB_FRAME_MAGIC;
   }
 
-  f->flags = VLIB_FRAME_IS_ALLOCATED | frame_flags;
+  f->frame_flags = VLIB_FRAME_IS_ALLOCATED | frame_flags;
   f->n_vectors = 0;
   f->scalar_size = scalar_size;
   f->vector_size = vector_size;
+  f->flags = 0;
 
   fs->n_alloc_frames += 1;
 
@@ -200,7 +201,7 @@ vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f)
 
   vec_add2 (vm->node_main.pending_frames, p, 1);
 
-  f->flags |= VLIB_FRAME_PENDING;
+  f->frame_flags |= VLIB_FRAME_PENDING;
   p->frame_index = vlib_frame_index (vm, f);
   p->node_runtime_index = to_node->runtime_index;
   p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME;
@@ -215,14 +216,14 @@ vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f)
   vlib_frame_size_t *fs;
   u32 frame_index;
 
-  ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+  ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED);
 
   node = vlib_get_node (vm, r->node_index);
   fs = get_frame_size_info (nm, node->scalar_size, node->vector_size);
 
   frame_index = vlib_frame_index (vm, f);
 
-  ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+  ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED);
 
   /* No next frames may point to freed frame. */
   if (CLIB_DEBUG > 0)
@@ -232,7 +233,7 @@ vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f)
 	ASSERT (nf->frame_index != frame_index);
     }
 
-  f->flags &= ~VLIB_FRAME_IS_ALLOCATED;
+  f->frame_flags &= ~VLIB_FRAME_IS_ALLOCATED;
 
   vec_add1 (fs->free_frame_indices, frame_index);
   ASSERT (fs->n_alloc_frames > 0);
@@ -378,7 +379,8 @@ vlib_get_next_frame_internal (vlib_main_t * vm,
 
   /* Has frame been removed from pending vector (e.g. finished dispatching)?
      If so we can reuse frame. */
-  if ((nf->flags & VLIB_FRAME_PENDING) && !(f->flags & VLIB_FRAME_PENDING))
+  if ((nf->flags & VLIB_FRAME_PENDING)
+      && !(f->frame_flags & VLIB_FRAME_PENDING))
     {
       nf->flags &= ~VLIB_FRAME_PENDING;
       f->n_vectors = 0;
@@ -393,7 +395,7 @@ vlib_get_next_frame_internal (vlib_main_t * vm,
       if (!(nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH))
 	{
 	  vlib_frame_t *f_old = vlib_get_frame (vm, nf->frame_index);
-	  f_old->flags |= VLIB_FRAME_FREE_AFTER_DISPATCH;
+	  f_old->frame_flags |= VLIB_FRAME_FREE_AFTER_DISPATCH;
 	}
 
       /* Allocate new frame to replace full one. */
@@ -488,7 +490,7 @@ vlib_put_next_frame (vlib_main_t * vm,
 
       r->cached_next_index = next_index;
 
-      if (!(f->flags & VLIB_FRAME_PENDING))
+      if (!(f->frame_flags & VLIB_FRAME_PENDING))
 	{
 	  __attribute__ ((unused)) vlib_node_t *node;
 	  vlib_node_t *next_node;
@@ -504,7 +506,7 @@ vlib_put_next_frame (vlib_main_t * vm,
 	  p->node_runtime_index = nf->node_runtime_index;
 	  p->next_frame_index = nf - nm->next_frames;
 	  nf->flags |= VLIB_FRAME_PENDING;
-	  f->flags |= VLIB_FRAME_PENDING;
+	  f->frame_flags |= VLIB_FRAME_PENDING;
 
 	  /*
 	   * If we're going to dispatch this frame on another thread,
@@ -539,29 +541,38 @@ vlib_put_next_frame (vlib_main_t * vm,
 never_inline void
 vlib_node_runtime_sync_stats (vlib_main_t * vm,
 			      vlib_node_runtime_t * r,
-			      uword n_calls, uword n_vectors, uword n_clocks)
+			      uword n_calls, uword n_vectors, uword n_clocks,
+			      uword n_ticks)
 {
   vlib_node_t *n = vlib_get_node (vm, r->node_index);
 
   n->stats_total.calls += n_calls + r->calls_since_last_overflow;
   n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
   n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
+  n->stats_total.perf_counter_ticks += n_ticks +
+    r->perf_counter_ticks_since_last_overflow;
+  n->stats_total.perf_counter_vectors += n_vectors +
+    r->perf_counter_vectors_since_last_overflow;
   n->stats_total.max_clock = r->max_clock;
   n->stats_total.max_clock_n = r->max_clock_n;
 
   r->calls_since_last_overflow = 0;
   r->vectors_since_last_overflow = 0;
   r->clocks_since_last_overflow = 0;
+  r->perf_counter_ticks_since_last_overflow = 0ULL;
+  r->perf_counter_vectors_since_last_overflow = 0ULL;
 }
 
 always_inline void __attribute__ ((unused))
 vlib_process_sync_stats (vlib_main_t * vm,
 			 vlib_process_t * p,
-			 uword n_calls, uword n_vectors, uword n_clocks)
+			 uword n_calls, uword n_vectors, uword n_clocks,
+			 uword n_ticks)
 {
   vlib_node_runtime_t *rt = &p->node_runtime;
   vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-  vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks);
+  vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks,
+				n_ticks);
   n->stats_total.suspends += p->n_suspends;
   p->n_suspends = 0;
 }
@@ -587,7 +598,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
       vec_elt_at_index (vm->node_main.nodes_by_type[n->type],
 			n->runtime_index);
 
-  vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);
+  vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0);
 
   /* Sync up runtime next frame vector counters with main node structure. */
   {
@@ -607,45 +618,68 @@ always_inline u32
 vlib_node_runtime_update_stats (vlib_main_t * vm,
 				vlib_node_runtime_t * node,
 				uword n_calls,
-				uword n_vectors, uword n_clocks)
+				uword n_vectors, uword n_clocks,
+				uword n_ticks)
 {
   u32 ca0, ca1, v0, v1, cl0, cl1, r;
+  u32 ptick0, ptick1, pvec0, pvec1;
 
   cl0 = cl1 = node->clocks_since_last_overflow;
   ca0 = ca1 = node->calls_since_last_overflow;
   v0 = v1 = node->vectors_since_last_overflow;
+  ptick0 = ptick1 = node->perf_counter_ticks_since_last_overflow;
+  pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow;
 
   ca1 = ca0 + n_calls;
   v1 = v0 + n_vectors;
   cl1 = cl0 + n_clocks;
+  ptick1 = ptick0 + n_ticks;
+  pvec1 = pvec0 + n_vectors;
 
   node->calls_since_last_overflow = ca1;
   node->clocks_since_last_overflow = cl1;
   node->vectors_since_last_overflow = v1;
+  node->perf_counter_ticks_since_last_overflow = ptick1;
+  node->perf_counter_vectors_since_last_overflow = pvec1;
+
   node->max_clock_n = node->max_clock > n_clocks ?
     node->max_clock_n : n_vectors;
   node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks;
 
   r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
 
-  if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0))
+  if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick1 < ptick0)
+      || (pvec1 < pvec0))
     {
       node->calls_since_last_overflow = ca0;
       node->clocks_since_last_overflow = cl0;
       node->vectors_since_last_overflow = v0;
-      vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
+      node->perf_counter_ticks_since_last_overflow = ptick0;
+      node->perf_counter_vectors_since_last_overflow = pvec0;
+
+      vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks,
+				    n_ticks);
     }
 
   return r;
 }
 
+static inline u64
+vlib_node_runtime_perf_counter (vlib_main_t * vm)
+{
+  if (PREDICT_FALSE (vm->vlib_node_runtime_perf_counter_cb != 0))
+    return ((*vm->vlib_node_runtime_perf_counter_cb) (vm));
+  return 0ULL;
+}
+
 always_inline void
 vlib_process_update_stats (vlib_main_t * vm,
 			   vlib_process_t * p,
-			   uword n_calls, uword n_vectors, uword n_clocks)
+			   uword n_calls, uword n_vectors, uword n_clocks,
+			   uword n_ticks)
 {
   vlib_node_runtime_update_stats (vm, &p->node_runtime,
-				  n_calls, n_vectors, n_clocks);
+				  n_calls, n_vectors, n_clocks, n_ticks);
 }
 
 static clib_error_t *
@@ -958,15 +992,19 @@ dispatch_node (vlib_main_t * vm,
 
   if (1 /* || vm->thread_index == node->thread_index */ )
     {
-      vlib_main_t *stat_vm;
-
-      stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm;
+      u64 pmc_before, pmc_delta;
 
       vlib_elog_main_loop_event (vm, node->node_index,
 				 last_time_stamp,
 				 frame ? frame->n_vectors : 0,
 				 /* is_after */ 0);
 
+      /*
+       * To validate accounting: pmc_before = last_time_stamp
+       * perf ticks should equal clocks/pkt...
+       */
+      pmc_before = vlib_node_runtime_perf_counter (vm);
+
       /*
        * Turn this on if you run into
        * "bad monkey" contexts, and you want to know exactly
@@ -989,16 +1027,23 @@ dispatch_node (vlib_main_t * vm,
 
       t = clib_cpu_time_now ();
 
+      /*
+       * To validate accounting: pmc_delta = t - pmc_before;
+       * perf ticks should equal clocks/pkt...
+       */
+      pmc_delta = vlib_node_runtime_perf_counter (vm) - pmc_before;
+
       vlib_elog_main_loop_event (vm, node->node_index, t, n,	/* is_after */
 				 1);
 
       vm->main_loop_vectors_processed += n;
       vm->main_loop_nodes_processed += n > 0;
 
-      v = vlib_node_runtime_update_stats (stat_vm, node,
+      v = vlib_node_runtime_update_stats (vm, node,
 					  /* n_calls */ 1,
 					  /* n_vectors */ n,
-					  /* n_clocks */ t - last_time_stamp);
+					  /* n_clocks */ t - last_time_stamp,
+					  pmc_delta /* PMC ticks */ );
 
       /* When in interrupt mode and vector rate crosses threshold switch to
          polling mode. */
@@ -1105,13 +1150,13 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
     {
       /* No next frame: so use dummy on stack. */
       nf = &nf_dummy;
-      nf->flags = f->flags & VLIB_NODE_FLAG_TRACE;
+      nf->flags = f->frame_flags & VLIB_NODE_FLAG_TRACE;
       nf->frame_index = ~p->frame_index;
     }
   else
     nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
 
-  ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+  ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED);
 
   /* Force allocation of new frame while current frame is being
      dispatched. */
@@ -1125,7 +1170,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
     }
 
   /* Frame must be pending. */
-  ASSERT (f->flags & VLIB_FRAME_PENDING);
+  ASSERT (f->frame_flags & VLIB_FRAME_PENDING);
   ASSERT (f->n_vectors > 0);
 
   /* Copy trace flag from next frame to node.
@@ -1140,7 +1185,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
 				   VLIB_NODE_STATE_POLLING,
 				   f, last_time_stamp);
 
-  f->flags &= ~VLIB_FRAME_PENDING;
+  f->frame_flags &= ~VLIB_FRAME_PENDING;
 
   /* Frame is ready to be used again, so restore it. */
   if (restore_frame_index != ~0)
@@ -1155,7 +1200,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
        * longer part of the queue for that node and hence it cannot be
        * it's overspill.
        */
-      ASSERT (!(f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH));
+      ASSERT (!(f->frame_flags & VLIB_FRAME_FREE_AFTER_DISPATCH));
 
       /*
        * NB: dispatching node n can result in the creation and scheduling
@@ -1187,7 +1232,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
     }
   else
     {
-      if (f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH)
+      if (f->frame_flags & VLIB_FRAME_FREE_AFTER_DISPATCH)
 	{
 	  ASSERT (!(n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH));
 	  vlib_frame_free (vm, n, f);
@@ -1337,7 +1382,8 @@ dispatch_process (vlib_main_t * vm,
   vlib_process_update_stats (vm, p,
 			     /* n_calls */ !is_suspend,
 			     /* n_vectors */ n_vectors,
-			     /* n_clocks */ t - last_time_stamp);
+			     /* n_clocks */ t - last_time_stamp,
+			     /* pmc_ticks */ 0ULL);
 
   return t;
 }
@@ -1371,9 +1417,8 @@ dispatch_suspended_process (vlib_main_t * vm,
   ASSERT (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
 		      | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT));
 
-  pf =
-    pool_elt_at_index (nm->suspended_process_frames,
-		       p->suspended_process_frame_index);
+  pf = pool_elt_at_index (nm->suspended_process_frames,
+			  p->suspended_process_frame_index);
 
   node_runtime = &p->node_runtime;
   node = vlib_get_node (vm, node_runtime->node_index);
@@ -1409,8 +1454,9 @@ dispatch_suspended_process (vlib_main_t * vm,
   else
     {
       p->flags &= ~VLIB_PROCESS_IS_RUNNING;
+      pool_put_index (nm->suspended_process_frames,
+		      p->suspended_process_frame_index);
       p->suspended_process_frame_index = ~0;
-      pool_put (nm->suspended_process_frames, pf);
     }
 
   t = clib_cpu_time_now ();
@@ -1420,7 +1466,8 @@ dispatch_suspended_process (vlib_main_t * vm,
   vlib_process_update_stats (vm, p,
 			     /* n_calls */ !is_suspend,
 			     /* n_vectors */ n_vectors,
-			     /* n_clocks */ t - last_time_stamp);
+			     /* n_clocks */ t - last_time_stamp,
+			     /* pmc_ticks */ 0ULL);
 
   return t;
 }
@@ -1470,6 +1517,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
   if (!nm->interrupt_threshold_vector_length)
     nm->interrupt_threshold_vector_length = 5;
 
+  /* Make sure the performance monitor counter is disabled */
+  vm->perf_counter_id = ~0;
+
   /* Start all processes. */
   if (is_main)
     {
@@ -1485,13 +1535,19 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
       vlib_node_runtime_t *n;
 
       if (PREDICT_FALSE (_vec_len (vm->pending_rpc_requests) > 0))
-	vl_api_send_pending_rpc_requests (vm);
+	{
+	  if (!is_main)
+	    vl_api_send_pending_rpc_requests (vm);
+	}
 
       if (!is_main)
 	{
 	  vlib_worker_thread_barrier_check ();
 	  vec_foreach (fqm, tm->frame_queue_mains)
 	    vlib_frame_queue_dequeue (vm, fqm);
+	  if (PREDICT_FALSE (vm->worker_thread_main_loop_callback != 0))
+	    ((void (*)(vlib_main_t *)) vm->worker_thread_main_loop_callback)
+	      (vm);
 	}
 
       /* Process pre-input nodes. */
@@ -1594,12 +1650,12 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
 							  te->n_data_elts,
 							  te->n_data_elt_bytes);
 		      if (te->n_data_bytes < sizeof (te->inline_event_data))
-			clib_memcpy (data, te->inline_event_data,
-				     te->n_data_bytes);
+			clib_memcpy_fast (data, te->inline_event_data,
+					  te->n_data_bytes);
 		      else
 			{
-			  clib_memcpy (data, te->event_data_as_vector,
-				       te->n_data_bytes);
+			  clib_memcpy_fast (data, te->event_data_as_vector,
+					    te->n_data_bytes);
 			  vec_free (te->event_data_as_vector);
 			}
 		      pool_put (nm->signal_timed_event_data_pool, te);
@@ -1704,7 +1760,7 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
   if (!vm->name)
     vm->name = "VLIB";
 
-  if ((error = unix_physmem_init (vm)))
+  if ((error = vlib_physmem_init (vm)))
     {
       clib_error_report (error);
       goto done;
@@ -1790,6 +1846,8 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
 
   vec_validate (vm->pending_rpc_requests, 0);
   _vec_len (vm->pending_rpc_requests) = 0;
+  vec_validate (vm->processing_rpc_requests, 0);
+  _vec_len (vm->processing_rpc_requests) = 0;
 
   switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE))
     {