X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fmain.c;h=73548fbea72834851acae9fc5054e665cfaae82c;hb=22d32d916f4f3806501cf39b324be19e06b89c12;hp=a27268608f37d80a3c1dea0b5027cc190c5f1651;hpb=903fd513e32a37e55aec0cfb4cf30e000680e0c3;p=vpp.git

diff --git a/src/vlib/main.c b/src/vlib/main.c
index a27268608f3..73548fbea72 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -41,7 +41,9 @@
 #include <vppinfra/format.h>
 #include <vlib/vlib.h>
 #include <vlib/threads.h>
+#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
 
+#include <vlib/unix/unix.h>
 #include <vlib/unix/cj.h>
 
 CJ_GLOBAL_LOG_PROTOTYPE;
@@ -136,18 +138,18 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
   else
     {
       f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
-      f->cpu_index = vm->cpu_index;
+      f->thread_index = vm->thread_index;
       fi = vlib_frame_index_no_check (vm, f);
     }
 
   /* Poison frame when debugging. */
   if (CLIB_DEBUG > 0)
     {
-      u32 save_cpu_index = f->cpu_index;
+      u32 save_thread_index = f->thread_index;
 
       memset (f, 0xfe, n);
 
-      f->cpu_index = save_cpu_index;
+      f->thread_index = save_thread_index;
     }
 
   /* Insert magic number. */
@@ -465,7 +467,7 @@ vlib_put_next_frame (vlib_main_t * vm,
   vlib_frame_t *f;
   u32 n_vectors_in_frame;
 
-  if (vm->buffer_main->extern_buffer_mgmt == 0 && CLIB_DEBUG > 0)
+  if (vm->buffer_main->callbacks_registered == 0 && CLIB_DEBUG > 0)
     vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left);
 
   nf = vlib_node_runtime_get_next_frame (vm, r, next_index);
@@ -517,7 +519,7 @@ vlib_put_next_frame (vlib_main_t * vm,
 	   * a dangling frame reference. Each thread has its own copy of
 	   * the next_frames vector.
 	   */
-	  if (0 && r->cpu_index != next_runtime->cpu_index)
+	  if (0 && r->thread_index != next_runtime->thread_index)
 	    {
 	      nf->frame_index = ~0;
 	      nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
@@ -707,6 +709,24 @@ elog_save_buffer (vlib_main_t * vm,
   return error;
 }
 
+void
+elog_post_mortem_dump (void)
+{
+  vlib_main_t *vm = &vlib_global_main;
+  elog_main_t *em = &vm->elog_main;
+  u8 *filename;
+  clib_error_t *error;
+
+  if (!vm->elog_post_mortem_dump)
+    return;
+
+  filename = format (0, "/tmp/elog_post_mortem.%d%c", getpid (), 0);
+  error = elog_write_file (em, (char *) filename, 1 /* flush ring */ );
+  if (error)
+    clib_error_report (error);
+  vec_free (filename);
+}
+
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (elog_save_cli, static) = {
   .path = "event-logger save",
@@ -866,7 +886,7 @@ vlib_elog_main_loop_event (vlib_main_t * vm,
 				  : evm->node_call_elog_event_types,
 				  node_index),
 		/* track */
-		(vm->cpu_index ? &vlib_worker_threads[vm->cpu_index].
+		(vm->thread_index ? &vlib_worker_threads[vm->thread_index].
 		 elog_track : &em->default_track),
 		/* data to log */ n_vectors);
 }
@@ -963,7 +983,7 @@ dispatch_node (vlib_main_t * vm,
 
   vm->cpu_time_last_node_dispatch = last_time_stamp;
 
-  if (1 /* || vm->cpu_index == node->cpu_index */ )
+  if (1 /* || vm->thread_index == node->thread_index */ )
     {
       vlib_main_t *stat_vm;
 
@@ -1029,7 +1049,7 @@ dispatch_node (vlib_main_t * vm,
 	  {
 	    u32 node_name, vector_length, is_polling;
 	  } *ed;
-	  vlib_worker_thread_t *w = vlib_worker_threads + vm->cpu_index;
+	  vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index;
 #endif
 
 	  if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT
@@ -1094,14 +1114,18 @@ dispatch_node (vlib_main_t * vm,
 }
 
 static u64
-dispatch_pending_node (vlib_main_t * vm,
-		       vlib_pending_frame_t * p, u64 last_time_stamp)
+dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
+		       u64 last_time_stamp)
 {
   vlib_node_main_t *nm = &vm->node_main;
   vlib_frame_t *f;
   vlib_next_frame_t *nf, nf_dummy;
   vlib_node_runtime_t *n;
   u32 restore_frame_index;
+  vlib_pending_frame_t *p;
+
+  /* See comment below about dangling references to nm->pending_frames */
+  p = nm->pending_frames + pending_frame_index;
 
   n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
 			p->node_runtime_index);
@@ -1151,18 +1175,29 @@ dispatch_pending_node (vlib_main_t * vm,
   /* Frame is ready to be used again, so restore it. */
   if (restore_frame_index != ~0)
     {
-      /* we musn't restore a frame that is flagged to be freed. This shouldn't
-         happen since frames to be freed post dispatch are those used
-         when the to-node frame becomes full i.e. they form a sort of queue of
-         frames to a single node. If we get here then the to-node frame and the
-         pending frame *were* the same, and so we removed the to-node frame.
-         Therefore this frame is no longer part of the queue for that node
-         and hence it cannot be it's overspill.
+      /*
+       * We musn't restore a frame that is flagged to be freed. This
+       * shouldn't happen since frames to be freed post dispatch are
+       * those used when the to-node frame becomes full i.e. they form a
+       * sort of queue of frames to a single node. If we get here then
+       * the to-node frame and the pending frame *were* the same, and so
+       * we removed the to-node frame.  Therefore this frame is no
+       * longer part of the queue for that node and hence it cannot be
+       * it's overspill.
        */
       ASSERT (!(f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH));
 
-      /* p->next_frame_index can change during node dispatch if node
-         function decides to change graph hook up. */
+      /*
+       * NB: dispatching node n can result in the creation and scheduling
+       * of new frames, and hence in the reallocation of nm->pending_frames.
+       * Recompute p, or no supper. This was broken for more than 10 years.
+       */
+      p = nm->pending_frames + pending_frame_index;
+
+      /*
+       * p->next_frame_index can change during node dispatch if node
+       * function decides to change graph hook up.
+       */
       nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
       nf->flags |= VLIB_FRAME_IS_ALLOCATED;
 
@@ -1308,9 +1343,16 @@ dispatch_process (vlib_main_t * vm,
       p->suspended_process_frame_index = pf - nm->suspended_process_frames;
 
       if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
-	timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time,
-			     vlib_timing_wheel_data_set_suspended_process
-			     (node->runtime_index));
+	{
+	  TWT (tw_timer_wheel) * tw =
+	    (TWT (tw_timer_wheel) *) nm->timing_wheel;
+	  p->stop_timer_handle =
+	    TW (tw_timer_start) (tw,
+				 vlib_timing_wheel_data_set_suspended_process
+				 (node->runtime_index) /* [sic] pool idex */ ,
+				 0 /* timer_id */ ,
+				 p->resume_clock_interval);
+	}
     }
   else
     p->flags &= ~VLIB_PROCESS_IS_RUNNING;
@@ -1383,9 +1425,14 @@ dispatch_suspended_process (vlib_main_t * vm,
       n_vectors = 0;
       p->n_suspends += 1;
       if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
-	timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time,
-			     vlib_timing_wheel_data_set_suspended_process
-			     (node->runtime_index));
+	{
+	  p->stop_timer_handle =
+	    TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
+				 vlib_timing_wheel_data_set_suspended_process
+				 (node->runtime_index) /* [sic] pool idex */ ,
+				 0 /* timer_id */ ,
+				 p->resume_clock_interval);
+	}
     }
   else
     {
@@ -1414,6 +1461,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
   uword i;
   u64 cpu_time_now;
   vlib_frame_queue_main_t *fqm;
+  u32 *last_node_runtime_indices = 0;
 
   /* Initialize pending node vector. */
   if (is_main)
@@ -1431,38 +1479,23 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
   else
     cpu_time_now = clib_cpu_time_now ();
 
-  /* Arrange for first level of timing wheel to cover times we care
-     most about. */
-  if (is_main)
-    {
-      nm->timing_wheel.min_sched_time = 10e-6;
-      nm->timing_wheel.max_sched_time = 10e-3;
-      timing_wheel_init (&nm->timing_wheel,
-			 cpu_time_now, vm->clib_time.clocks_per_second);
-      vec_alloc (nm->data_from_advancing_timing_wheel, 32);
-    }
+  /* Pre-allocate interupt runtime indices and lock. */
+  vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
+  vec_alloc (last_node_runtime_indices, 32);
+  if (!is_main)
+    clib_spinlock_init (&nm->pending_interrupt_lock);
 
   /* Pre-allocate expired nodes. */
-  vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
   if (!nm->polling_threshold_vector_length)
     nm->polling_threshold_vector_length = 10;
   if (!nm->interrupt_threshold_vector_length)
     nm->interrupt_threshold_vector_length = 5;
 
-  if (is_main)
-    {
-      if (!nm->polling_threshold_vector_length)
-	nm->polling_threshold_vector_length = 10;
-      if (!nm->interrupt_threshold_vector_length)
-	nm->interrupt_threshold_vector_length = 5;
-
-      nm->current_process_index = ~0;
-    }
-
   /* Start all processes. */
   if (is_main)
     {
       uword i;
+      nm->current_process_index = ~0;
       for (i = 0; i < vec_len (nm->processes); i++)
 	cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0,
 					 cpu_time_now);
@@ -1505,13 +1538,20 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
 	uword i;
 	if (l > 0)
 	  {
-	    _vec_len (nm->pending_interrupt_node_runtime_indices) = 0;
+	    u32 *tmp;
+	    if (!is_main)
+	      clib_spinlock_lock (&nm->pending_interrupt_lock);
+	    tmp = nm->pending_interrupt_node_runtime_indices;
+	    nm->pending_interrupt_node_runtime_indices =
+	      last_node_runtime_indices;
+	    last_node_runtime_indices = tmp;
+	    _vec_len (last_node_runtime_indices) = 0;
+	    if (!is_main)
+	      clib_spinlock_unlock (&nm->pending_interrupt_lock);
 	    for (i = 0; i < l; i++)
 	      {
 		n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
-				      nm->
-				      pending_interrupt_node_runtime_indices
-				      [i]);
+				      last_node_runtime_indices[i]);
 		cpu_time_now =
 		  dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
 				 VLIB_NODE_STATE_INTERRUPT,
@@ -1524,12 +1564,15 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
       if (is_main)
 	{
 	  /* Check if process nodes have expired from timing wheel. */
-	  nm->data_from_advancing_timing_wheel
-	    = timing_wheel_advance (&nm->timing_wheel, cpu_time_now,
-				    nm->data_from_advancing_timing_wheel,
-				    &nm->cpu_time_next_process_ready);
+	  ASSERT (nm->data_from_advancing_timing_wheel != 0);
+
+	  nm->data_from_advancing_timing_wheel =
+	    TW (tw_timer_expire_timers_vec)
+	    ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm),
+	     nm->data_from_advancing_timing_wheel);
 
 	  ASSERT (nm->data_from_advancing_timing_wheel != 0);
+
 	  if (PREDICT_FALSE
 	      (_vec_len (nm->data_from_advancing_timing_wheel) > 0))
 	    {
@@ -1575,8 +1618,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
 			dispatch_suspended_process (vm, di, cpu_time_now);
 		    }
 		}
-
-	      /* Reset vector. */
 	      _vec_len (nm->data_from_advancing_timing_wheel) = 0;
 	    }
 	}
@@ -1585,8 +1626,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
          Process pending vector until there is nothing left.
          All pending vectors will be processed from input -> output. */
       for (i = 0; i < _vec_len (nm->pending_frames); i++)
-	cpu_time_now = dispatch_pending_node (vm, nm->pending_frames + i,
-					      cpu_time_now);
+	cpu_time_now = dispatch_pending_node (vm, i, cpu_time_now);
       /* Reset pending vector for next iteration. */
       _vec_len (nm->pending_frames) = 0;
 
@@ -1629,6 +1669,8 @@ vlib_main_configure (vlib_main_t * vm, unformat_input_t * input)
       else if (unformat (input, "elog-events %d",
 			 &vm->elog_main.event_ring_size))
 	;
+      else if (unformat (input, "elog-post-mortem-dump"))
+	vm->elog_post_mortem_dump = 1;
       else
 	return unformat_parse_error (input);
     }
@@ -1654,6 +1696,7 @@ int
 vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
 {
   clib_error_t *volatile error;
+  vlib_node_main_t *nm = &vm->node_main;
 
   vm->queue_signal_callback = dummy_queue_signal_callback;
 
@@ -1670,7 +1713,22 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
     vm->name = "VLIB";
 
   vec_validate (vm->buffer_main, 0);
-  vlib_buffer_cb_init (vm);
+  if (vlib_buffer_callbacks)
+    {
+      /* external plugin has registered own buffer callbacks
+         so we just copy them */
+      vlib_buffer_main_t *bm = vm->buffer_main;
+      clib_memcpy (&bm->cb, vlib_buffer_callbacks,
+		   sizeof (vlib_buffer_callbacks_t));
+      bm->callbacks_registered = 1;
+    }
+  else
+    {
+      vlib_physmem_main_t *vpm = &vm->physmem_main;
+      vlib_buffer_cb_init (vm);
+      unix_physmem_init (vm, 0 /* fail_if_physical_memory_not_present */ );
+      vlib_buffer_add_mem_range (vm, vpm->virtual.start, vpm->virtual.size);
+    }
 
   if ((error = vlib_thread_init (vm)))
     {
@@ -1708,6 +1766,18 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
 				       VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
 				       "default");
 
+  nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)),
+					     CLIB_CACHE_LINE_BYTES);
+
+  vec_validate (nm->data_from_advancing_timing_wheel, 10);
+  _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+
+  /* Create the process timing wheel */
+  TW (tw_timer_wheel_init) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
+			    0 /* no callback */ ,
+			    10e-6 /* timer period 10us */ ,
+			    ~0 /* max expirations per call */ );
+
   switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE))
     {
     case VLIB_MAIN_LOOP_EXIT_NONE: