else
{
f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
- f->cpu_index = vm->cpu_index;
+ f->thread_index = vm->thread_index;
fi = vlib_frame_index_no_check (vm, f);
}
/* Poison frame when debugging. */
if (CLIB_DEBUG > 0)
{
- u32 save_cpu_index = f->cpu_index;
+ u32 save_thread_index = f->thread_index;
memset (f, 0xfe, n);
- f->cpu_index = save_cpu_index;
+ f->thread_index = save_thread_index;
}
/* Insert magic number. */
* a dangling frame reference. Each thread has its own copy of
* the next_frames vector.
*/
- if (0 && r->cpu_index != next_runtime->cpu_index)
+ if (0 && r->thread_index != next_runtime->thread_index)
{
nf->frame_index = ~0;
nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
elog_buffer_capacity (em), chroot_file);
vlib_worker_thread_barrier_sync (vm);
- error = elog_write_file (em, chroot_file);
+ error = elog_write_file (em, chroot_file, 1 /* flush ring */ );
vlib_worker_thread_barrier_release (vm);
vec_free (chroot_file);
return error;
}
+void
+elog_post_mortem_dump (void)
+{
+ vlib_main_t *vm = &vlib_global_main;
+ elog_main_t *em = &vm->elog_main;
+ u8 *filename;
+ clib_error_t *error;
+
+ if (!vm->elog_post_mortem_dump)
+ return;
+
+ filename = format (0, "/tmp/elog_post_mortem.%d%c", getpid (), 0);
+ error = elog_write_file (em, (char *) filename, 1 /* flush ring */ );
+ if (error)
+ clib_error_report (error);
+ vec_free (filename);
+}
+
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (elog_save_cli, static) = {
.path = "event-logger save",
: evm->node_call_elog_event_types,
node_index),
/* track */
- (vm->cpu_index ? &vlib_worker_threads[vm->cpu_index].
+ (vm->thread_index ? &vlib_worker_threads[vm->thread_index].
elog_track : &em->default_track),
/* data to log */ n_vectors);
}
}
-/* static_always_inline */ u64
+static_always_inline u64
dispatch_node (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_node_type_t type,
vm->cpu_time_last_node_dispatch = last_time_stamp;
- if (1 /* || vm->cpu_index == node->cpu_index */ )
+ if (1 /* || vm->thread_index == node->thread_index */ )
{
vlib_main_t *stat_vm;
&& (node->flags
& VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)))
{
+#ifdef DISPATCH_NODE_ELOG_REQUIRED
ELOG_TYPE_DECLARE (e) =
{
.function = (char *) __FUNCTION__,.format =
{
u32 node_name, vector_length, is_polling;
} *ed;
+ vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index;
+#endif
- if (dispatch_state == VLIB_NODE_STATE_INTERRUPT
- && v >= nm->polling_threshold_vector_length)
+ if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT
+ && v >= nm->polling_threshold_vector_length) &&
+ !(node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
{
vlib_node_t *n = vlib_get_node (vm, node->node_index);
n->state = VLIB_NODE_STATE_POLLING;
node->state = VLIB_NODE_STATE_POLLING;
- ASSERT (!
- (node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE));
node->flags &=
~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
node->flags |=
nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] -= 1;
nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] += 1;
- ed = ELOG_DATA (&vm->elog_main, e);
+#ifdef DISPATCH_NODE_ELOG_REQUIRED
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
+ w->elog_track);
ed->node_name = n->name_elog_string;
ed->vector_length = v;
ed->is_polling = 1;
+#endif
}
else if (dispatch_state == VLIB_NODE_STATE_POLLING
&& v <= nm->interrupt_threshold_vector_length)
{
node->flags |=
VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
- ed = ELOG_DATA (&vm->elog_main, e);
+#ifdef DISPATCH_NODE_ELOG_REQUIRED
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
+ w->elog_track);
ed->node_name = n->name_elog_string;
ed->vector_length = v;
ed->is_polling = 0;
+#endif
}
}
}
return t;
}
-/* static */ u64
-dispatch_pending_node (vlib_main_t * vm,
- vlib_pending_frame_t * p, u64 last_time_stamp)
+static u64
+dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
+ u64 last_time_stamp)
{
vlib_node_main_t *nm = &vm->node_main;
vlib_frame_t *f;
vlib_next_frame_t *nf, nf_dummy;
vlib_node_runtime_t *n;
u32 restore_frame_index;
+ vlib_pending_frame_t *p;
+
+ /* See comment below about dangling references to nm->pending_frames */
+ p = nm->pending_frames + pending_frame_index;
n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
p->node_runtime_index);
/* Frame is ready to be used again, so restore it. */
if (restore_frame_index != ~0)
{
- /* we musn't restore a frame that is flagged to be freed. This shouldn't
- happen since frames to be freed post dispatch are those used
- when the to-node frame becomes full i.e. they form a sort of queue of
- frames to a single node. If we get here then the to-node frame and the
- pending frame *were* the same, and so we removed the to-node frame.
- Therefore this frame is no longer part of the queue for that node
- and hence it cannot be it's overspill.
+ /*
+ * We musn't restore a frame that is flagged to be freed. This
+ * shouldn't happen since frames to be freed post dispatch are
+ * those used when the to-node frame becomes full i.e. they form a
+ * sort of queue of frames to a single node. If we get here then
+ * the to-node frame and the pending frame *were* the same, and so
+ * we removed the to-node frame. Therefore this frame is no
+ * longer part of the queue for that node and hence it cannot be
+ * it's overspill.
*/
ASSERT (!(f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH));
- /* p->next_frame_index can change during node dispatch if node
- function decides to change graph hook up. */
+ /*
+ * NB: dispatching node n can result in the creation and scheduling
+ * of new frames, and hence in the reallocation of nm->pending_frames.
+ * Recompute p, or no supper. This was broken for more than 10 years.
+ */
+ p = nm->pending_frames + pending_frame_index;
+
+ /*
+ * p->next_frame_index can change during node dispatch if node
+ * function decides to change graph hook up.
+ */
nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
nf->flags |= VLIB_FRAME_IS_ALLOCATED;
return t;
}
-static void
-vlib_main_loop (vlib_main_t * vm)
+static_always_inline void
+vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
{
vlib_node_main_t *nm = &vm->node_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
uword i;
u64 cpu_time_now;
+ vlib_frame_queue_main_t *fqm;
+ u32 *last_node_runtime_indices = 0;
/* Initialize pending node vector. */
- vec_resize (nm->pending_frames, 32);
- _vec_len (nm->pending_frames) = 0;
+ if (is_main)
+ {
+ vec_resize (nm->pending_frames, 32);
+ _vec_len (nm->pending_frames) = 0;
+ }
/* Mark time of main loop start. */
- cpu_time_now = vm->clib_time.last_cpu_time;
- vm->cpu_time_main_loop_start = cpu_time_now;
+ if (is_main)
+ {
+ cpu_time_now = vm->clib_time.last_cpu_time;
+ vm->cpu_time_main_loop_start = cpu_time_now;
+ }
+ else
+ cpu_time_now = clib_cpu_time_now ();
/* Arrange for first level of timing wheel to cover times we care
most about. */
- nm->timing_wheel.min_sched_time = 10e-6;
- nm->timing_wheel.max_sched_time = 10e-3;
- timing_wheel_init (&nm->timing_wheel,
- cpu_time_now, vm->clib_time.clocks_per_second);
+ if (is_main)
+ {
+ nm->timing_wheel.min_sched_time = 10e-6;
+ nm->timing_wheel.max_sched_time = 10e-3;
+ timing_wheel_init (&nm->timing_wheel,
+ cpu_time_now, vm->clib_time.clocks_per_second);
+ vec_alloc (nm->data_from_advancing_timing_wheel, 32);
+ }
- /* Pre-allocate expired nodes. */
- vec_alloc (nm->data_from_advancing_timing_wheel, 32);
+ /* Pre-allocate interupt runtime indices and lock. */
vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
+ vec_alloc (last_node_runtime_indices, 32);
+ if (!is_main)
+ clib_spinlock_init (&nm->pending_interrupt_lock);
+ /* Pre-allocate expired nodes. */
if (!nm->polling_threshold_vector_length)
nm->polling_threshold_vector_length = 10;
if (!nm->interrupt_threshold_vector_length)
nm->interrupt_threshold_vector_length = 5;
- nm->current_process_index = ~0;
-
/* Start all processes. */
- {
- uword i;
- for (i = 0; i < vec_len (nm->processes); i++)
- cpu_time_now =
- dispatch_process (vm, nm->processes[i], /* frame */ 0, cpu_time_now);
- }
+ if (is_main)
+ {
+ uword i;
+ nm->current_process_index = ~0;
+ for (i = 0; i < vec_len (nm->processes); i++)
+ cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0,
+ cpu_time_now);
+ }
while (1)
{
vlib_node_runtime_t *n;
+ if (!is_main)
+ {
+ vlib_worker_thread_barrier_check ();
+ vec_foreach (fqm, tm->frame_queue_mains)
+ vlib_frame_queue_dequeue (vm, fqm);
+ }
+
/* Process pre-input nodes. */
- vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
- cpu_time_now = dispatch_node (vm, n,
- VLIB_NODE_TYPE_PRE_INPUT,
- VLIB_NODE_STATE_POLLING,
- /* frame */ 0,
- cpu_time_now);
+ if (is_main)
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
+ cpu_time_now = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_PRE_INPUT,
+ VLIB_NODE_STATE_POLLING,
+ /* frame */ 0,
+ cpu_time_now);
/* Next process input nodes. */
vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
/* frame */ 0,
cpu_time_now);
- if (PREDICT_TRUE (vm->queue_signal_pending == 0))
+ if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0))
vm->queue_signal_callback (vm);
/* Next handle interrupts. */
uword i;
if (l > 0)
{
- _vec_len (nm->pending_interrupt_node_runtime_indices) = 0;
+ u32 *tmp;
+ if (!is_main)
+ clib_spinlock_lock (&nm->pending_interrupt_lock);
+ tmp = nm->pending_interrupt_node_runtime_indices;
+ nm->pending_interrupt_node_runtime_indices =
+ last_node_runtime_indices;
+ last_node_runtime_indices = tmp;
+ _vec_len (last_node_runtime_indices) = 0;
+ if (!is_main)
+ clib_spinlock_unlock (&nm->pending_interrupt_lock);
for (i = 0; i < l; i++)
{
n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
- nm->
- pending_interrupt_node_runtime_indices
- [i]);
+ last_node_runtime_indices[i]);
cpu_time_now =
dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
VLIB_NODE_STATE_INTERRUPT,
}
}
- /* Check if process nodes have expired from timing wheel. */
- nm->data_from_advancing_timing_wheel
- = timing_wheel_advance (&nm->timing_wheel, cpu_time_now,
- nm->data_from_advancing_timing_wheel,
- &nm->cpu_time_next_process_ready);
-
- ASSERT (nm->data_from_advancing_timing_wheel != 0);
- if (PREDICT_FALSE (_vec_len (nm->data_from_advancing_timing_wheel) > 0))
+ if (is_main)
{
- uword i;
-
- processes_timing_wheel_data:
- for (i = 0; i < _vec_len (nm->data_from_advancing_timing_wheel);
- i++)
+ /* Check if process nodes have expired from timing wheel. */
+ nm->data_from_advancing_timing_wheel
+ = timing_wheel_advance (&nm->timing_wheel, cpu_time_now,
+ nm->data_from_advancing_timing_wheel,
+ &nm->cpu_time_next_process_ready);
+
+ ASSERT (nm->data_from_advancing_timing_wheel != 0);
+ if (PREDICT_FALSE
+ (_vec_len (nm->data_from_advancing_timing_wheel) > 0))
{
- u32 d = nm->data_from_advancing_timing_wheel[i];
- u32 di = vlib_timing_wheel_data_get_index (d);
+ uword i;
- if (vlib_timing_wheel_data_is_timed_event (d))
+ processes_timing_wheel_data:
+ for (i = 0; i < _vec_len (nm->data_from_advancing_timing_wheel);
+ i++)
{
- vlib_signal_timed_event_data_t *te =
- pool_elt_at_index (nm->signal_timed_event_data_pool, di);
- vlib_node_t *n = vlib_get_node (vm, te->process_node_index);
- vlib_process_t *p =
- vec_elt (nm->processes, n->runtime_index);
- void *data;
- data =
- vlib_process_signal_event_helper (nm, n, p,
- te->event_type_index,
- te->n_data_elts,
- te->n_data_elt_bytes);
- if (te->n_data_bytes < sizeof (te->inline_event_data))
- clib_memcpy (data, te->inline_event_data,
- te->n_data_bytes);
+ u32 d = nm->data_from_advancing_timing_wheel[i];
+ u32 di = vlib_timing_wheel_data_get_index (d);
+
+ if (vlib_timing_wheel_data_is_timed_event (d))
+ {
+ vlib_signal_timed_event_data_t *te =
+ pool_elt_at_index (nm->signal_timed_event_data_pool,
+ di);
+ vlib_node_t *n =
+ vlib_get_node (vm, te->process_node_index);
+ vlib_process_t *p =
+ vec_elt (nm->processes, n->runtime_index);
+ void *data;
+ data =
+ vlib_process_signal_event_helper (nm, n, p,
+ te->event_type_index,
+ te->n_data_elts,
+ te->n_data_elt_bytes);
+ if (te->n_data_bytes < sizeof (te->inline_event_data))
+ clib_memcpy (data, te->inline_event_data,
+ te->n_data_bytes);
+ else
+ {
+ clib_memcpy (data, te->event_data_as_vector,
+ te->n_data_bytes);
+ vec_free (te->event_data_as_vector);
+ }
+ pool_put (nm->signal_timed_event_data_pool, te);
+ }
else
{
- clib_memcpy (data, te->event_data_as_vector,
- te->n_data_bytes);
- vec_free (te->event_data_as_vector);
+ cpu_time_now = clib_cpu_time_now ();
+ cpu_time_now =
+ dispatch_suspended_process (vm, di, cpu_time_now);
}
- pool_put (nm->signal_timed_event_data_pool, te);
- }
- else
- {
- cpu_time_now = clib_cpu_time_now ();
- cpu_time_now =
- dispatch_suspended_process (vm, di, cpu_time_now);
}
- }
- /* Reset vector. */
- _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+ /* Reset vector. */
+ _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+ }
}
/* Input nodes may have added work to the pending vector.
Process pending vector until there is nothing left.
All pending vectors will be processed from input -> output. */
for (i = 0; i < _vec_len (nm->pending_frames); i++)
- cpu_time_now = dispatch_pending_node (vm, nm->pending_frames + i,
- cpu_time_now);
+ cpu_time_now = dispatch_pending_node (vm, i, cpu_time_now);
/* Reset pending vector for next iteration. */
_vec_len (nm->pending_frames) = 0;
/* Pending internal nodes may resume processes. */
- if (_vec_len (nm->data_from_advancing_timing_wheel) > 0)
+ if (is_main && _vec_len (nm->data_from_advancing_timing_wheel) > 0)
goto processes_timing_wheel_data;
vlib_increment_main_loop_counter (vm);
}
}
+static void
+vlib_main_loop (vlib_main_t * vm)
+{
+ vlib_main_or_worker_loop (vm, /* is_main */ 1);
+}
+
+void
+vlib_worker_loop (vlib_main_t * vm)
+{
+ vlib_main_or_worker_loop (vm, /* is_main */ 0);
+}
+
vlib_main_t vlib_global_main;
static clib_error_t *
else if (unformat (input, "elog-events %d",
&vm->elog_main.event_ring_size))
;
+ else if (unformat (input, "elog-post-mortem-dump"))
+ vm->elog_post_mortem_dump = 1;
else
return unformat_parse_error (input);
}