#include <vppinfra/format.h>
#include <vlib/vlib.h>
#include <vlib/threads.h>
+#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
#include <vlib/unix/cj.h>
else
{
f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
- f->cpu_index = vm->cpu_index;
+ f->thread_index = vm->thread_index;
fi = vlib_frame_index_no_check (vm, f);
}
/* Poison frame when debugging. */
if (CLIB_DEBUG > 0)
{
- u32 save_cpu_index = f->cpu_index;
+ u32 save_thread_index = f->thread_index;
memset (f, 0xfe, n);
- f->cpu_index = save_cpu_index;
+ f->thread_index = save_thread_index;
}
/* Insert magic number. */
* a dangling frame reference. Each thread has its own copy of
* the next_frames vector.
*/
- if (0 && r->cpu_index != next_runtime->cpu_index)
+ if (0 && r->thread_index != next_runtime->thread_index)
{
nf->frame_index = ~0;
nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
elog_buffer_capacity (em), chroot_file);
vlib_worker_thread_barrier_sync (vm);
- error = elog_write_file (em, chroot_file);
+ error = elog_write_file (em, chroot_file, 1 /* flush ring */ );
vlib_worker_thread_barrier_release (vm);
vec_free (chroot_file);
return error;
}
+void
+elog_post_mortem_dump (void)
+{
+ vlib_main_t *vm = &vlib_global_main;
+ elog_main_t *em = &vm->elog_main;
+ u8 *filename;
+ clib_error_t *error;
+
+ if (!vm->elog_post_mortem_dump)
+ return;
+
+ filename = format (0, "/tmp/elog_post_mortem.%d%c", getpid (), 0);
+ error = elog_write_file (em, (char *) filename, 1 /* flush ring */ );
+ if (error)
+ clib_error_report (error);
+ vec_free (filename);
+}
+
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (elog_save_cli, static) = {
.path = "event-logger save",
: evm->node_call_elog_event_types,
node_index),
/* track */
- (vm->cpu_index ? &vlib_worker_threads[vm->cpu_index].
+ (vm->thread_index ? &vlib_worker_threads[vm->thread_index].
elog_track : &em->default_track),
/* data to log */ n_vectors);
}
}
-/* static_always_inline */ u64
+static_always_inline u64
dispatch_node (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_node_type_t type,
vm->cpu_time_last_node_dispatch = last_time_stamp;
- if (1 /* || vm->cpu_index == node->cpu_index */ )
+ if (1 /* || vm->thread_index == node->thread_index */ )
{
vlib_main_t *stat_vm;
&& (node->flags
& VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)))
{
+#ifdef DISPATCH_NODE_ELOG_REQUIRED
ELOG_TYPE_DECLARE (e) =
{
.function = (char *) __FUNCTION__,.format =
{
u32 node_name, vector_length, is_polling;
} *ed;
+ vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index;
+#endif
- if (dispatch_state == VLIB_NODE_STATE_INTERRUPT
- && v >= nm->polling_threshold_vector_length)
+ if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT
+ && v >= nm->polling_threshold_vector_length) &&
+ !(node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
{
vlib_node_t *n = vlib_get_node (vm, node->node_index);
n->state = VLIB_NODE_STATE_POLLING;
node->state = VLIB_NODE_STATE_POLLING;
- ASSERT (!
- (node->flags &
- VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE));
node->flags &=
~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
node->flags |=
nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] -= 1;
nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] += 1;
- ed = ELOG_DATA (&vm->elog_main, e);
+#ifdef DISPATCH_NODE_ELOG_REQUIRED
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
+ w->elog_track);
ed->node_name = n->name_elog_string;
ed->vector_length = v;
ed->is_polling = 1;
+#endif
}
else if (dispatch_state == VLIB_NODE_STATE_POLLING
&& v <= nm->interrupt_threshold_vector_length)
{
node->flags |=
VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
- ed = ELOG_DATA (&vm->elog_main, e);
+#ifdef DISPATCH_NODE_ELOG_REQUIRED
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
+ w->elog_track);
ed->node_name = n->name_elog_string;
ed->vector_length = v;
ed->is_polling = 0;
+#endif
}
}
}
return t;
}
-/* static */ u64
-dispatch_pending_node (vlib_main_t * vm,
- vlib_pending_frame_t * p, u64 last_time_stamp)
+static u64
+dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
+ u64 last_time_stamp)
{
vlib_node_main_t *nm = &vm->node_main;
vlib_frame_t *f;
vlib_next_frame_t *nf, nf_dummy;
vlib_node_runtime_t *n;
u32 restore_frame_index;
+ vlib_pending_frame_t *p;
+
+ /* See comment below about dangling references to nm->pending_frames */
+ p = nm->pending_frames + pending_frame_index;
n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
p->node_runtime_index);
/* Frame is ready to be used again, so restore it. */
if (restore_frame_index != ~0)
{
- /* we musn't restore a frame that is flagged to be freed. This shouldn't
- happen since frames to be freed post dispatch are those used
- when the to-node frame becomes full i.e. they form a sort of queue of
- frames to a single node. If we get here then the to-node frame and the
- pending frame *were* the same, and so we removed the to-node frame.
- Therefore this frame is no longer part of the queue for that node
- and hence it cannot be it's overspill.
+ /*
+ * We musn't restore a frame that is flagged to be freed. This
+ * shouldn't happen since frames to be freed post dispatch are
+ * those used when the to-node frame becomes full i.e. they form a
+ * sort of queue of frames to a single node. If we get here then
+ * the to-node frame and the pending frame *were* the same, and so
+ * we removed the to-node frame. Therefore this frame is no
+ * longer part of the queue for that node and hence it cannot be
+ * it's overspill.
*/
ASSERT (!(f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH));
- /* p->next_frame_index can change during node dispatch if node
- function decides to change graph hook up. */
+ /*
+ * NB: dispatching node n can result in the creation and scheduling
+ * of new frames, and hence in the reallocation of nm->pending_frames.
+ * Recompute p, or no supper. This was broken for more than 10 years.
+ */
+ p = nm->pending_frames + pending_frame_index;
+
+ /*
+ * p->next_frame_index can change during node dispatch if node
+ * function decides to change graph hook up.
+ */
nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
nf->flags |= VLIB_FRAME_IS_ALLOCATED;
p->suspended_process_frame_index = pf - nm->suspended_process_frames;
if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
- timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time,
- vlib_timing_wheel_data_set_suspended_process
- (node->runtime_index));
+ {
+ TWT (tw_timer_wheel) * tw =
+ (TWT (tw_timer_wheel) *) nm->timing_wheel;
+ p->stop_timer_handle =
+ TW (tw_timer_start) (tw,
+ vlib_timing_wheel_data_set_suspended_process
+ (node->runtime_index) /* [sic] pool idex */ ,
+ 0 /* timer_id */ ,
+ p->resume_clock_interval);
+ }
}
else
p->flags &= ~VLIB_PROCESS_IS_RUNNING;
n_vectors = 0;
p->n_suspends += 1;
if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
- timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time,
- vlib_timing_wheel_data_set_suspended_process
- (node->runtime_index));
+ {
+ p->stop_timer_handle =
+ TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
+ vlib_timing_wheel_data_set_suspended_process
+ (node->runtime_index) /* [sic] pool idex */ ,
+ 0 /* timer_id */ ,
+ p->resume_clock_interval);
+ }
}
else
{
uword i;
u64 cpu_time_now;
vlib_frame_queue_main_t *fqm;
+ u32 *last_node_runtime_indices = 0;
/* Initialize pending node vector. */
if (is_main)
else
cpu_time_now = clib_cpu_time_now ();
- /* Arrange for first level of timing wheel to cover times we care
- most about. */
- if (is_main)
- {
- nm->timing_wheel.min_sched_time = 10e-6;
- nm->timing_wheel.max_sched_time = 10e-3;
- timing_wheel_init (&nm->timing_wheel,
- cpu_time_now, vm->clib_time.clocks_per_second);
- vec_alloc (nm->data_from_advancing_timing_wheel, 32);
- }
-
- /* Pre-allocate expired nodes. */
+ /* Pre-allocate interupt runtime indices and lock. */
vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
+ vec_alloc (last_node_runtime_indices, 32);
+ if (!is_main)
+ clib_spinlock_init (&nm->pending_interrupt_lock);
- if (is_main)
- {
- if (!nm->polling_threshold_vector_length)
- nm->polling_threshold_vector_length = 10;
- if (!nm->interrupt_threshold_vector_length)
- nm->interrupt_threshold_vector_length = 5;
-
- nm->current_process_index = ~0;
- }
+ /* Pre-allocate expired nodes. */
+ if (!nm->polling_threshold_vector_length)
+ nm->polling_threshold_vector_length = 10;
+ if (!nm->interrupt_threshold_vector_length)
+ nm->interrupt_threshold_vector_length = 5;
/* Start all processes. */
if (is_main)
{
uword i;
+ nm->current_process_index = ~0;
for (i = 0; i < vec_len (nm->processes); i++)
cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0,
cpu_time_now);
uword i;
if (l > 0)
{
- _vec_len (nm->pending_interrupt_node_runtime_indices) = 0;
+ u32 *tmp;
+ if (!is_main)
+ clib_spinlock_lock (&nm->pending_interrupt_lock);
+ tmp = nm->pending_interrupt_node_runtime_indices;
+ nm->pending_interrupt_node_runtime_indices =
+ last_node_runtime_indices;
+ last_node_runtime_indices = tmp;
+ _vec_len (last_node_runtime_indices) = 0;
+ if (!is_main)
+ clib_spinlock_unlock (&nm->pending_interrupt_lock);
for (i = 0; i < l; i++)
{
n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
- nm->
- pending_interrupt_node_runtime_indices
- [i]);
+ last_node_runtime_indices[i]);
cpu_time_now =
dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
VLIB_NODE_STATE_INTERRUPT,
if (is_main)
{
/* Check if process nodes have expired from timing wheel. */
- nm->data_from_advancing_timing_wheel
- = timing_wheel_advance (&nm->timing_wheel, cpu_time_now,
- nm->data_from_advancing_timing_wheel,
- &nm->cpu_time_next_process_ready);
+ ASSERT (nm->data_from_advancing_timing_wheel != 0);
+
+ nm->data_from_advancing_timing_wheel =
+ TW (tw_timer_expire_timers_vec)
+ ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm),
+ nm->data_from_advancing_timing_wheel);
ASSERT (nm->data_from_advancing_timing_wheel != 0);
+
if (PREDICT_FALSE
(_vec_len (nm->data_from_advancing_timing_wheel) > 0))
{
dispatch_suspended_process (vm, di, cpu_time_now);
}
}
-
- /* Reset vector. */
_vec_len (nm->data_from_advancing_timing_wheel) = 0;
}
}
Process pending vector until there is nothing left.
All pending vectors will be processed from input -> output. */
for (i = 0; i < _vec_len (nm->pending_frames); i++)
- cpu_time_now = dispatch_pending_node (vm, nm->pending_frames + i,
- cpu_time_now);
+ cpu_time_now = dispatch_pending_node (vm, i, cpu_time_now);
/* Reset pending vector for next iteration. */
_vec_len (nm->pending_frames) = 0;
else if (unformat (input, "elog-events %d",
&vm->elog_main.event_ring_size))
;
+ else if (unformat (input, "elog-post-mortem-dump"))
+ vm->elog_post_mortem_dump = 1;
else
return unformat_parse_error (input);
}
vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
{
clib_error_t *volatile error;
+ vlib_node_main_t *nm = &vm->node_main;
vm->queue_signal_callback = dummy_queue_signal_callback;
VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
"default");
+ nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)),
+ CLIB_CACHE_LINE_BYTES);
+
+ vec_validate (nm->data_from_advancing_timing_wheel, 10);
+ _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+
+ /* Create the process timing wheel */
+ TW (tw_timer_wheel_init) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
+ 0 /* no callback */ ,
+ 10e-6 /* timer period 10us */ ,
+ ~0 /* max expirations per call */ );
+
switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE))
{
case VLIB_MAIN_LOOP_EXIT_NONE: