X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fmain.c;h=73548fbea72834851acae9fc5054e665cfaae82c;hb=22d32d916f4f3806501cf39b324be19e06b89c12;hp=a27268608f37d80a3c1dea0b5027cc190c5f1651;hpb=903fd513e32a37e55aec0cfb4cf30e000680e0c3;p=vpp.git diff --git a/src/vlib/main.c b/src/vlib/main.c index a27268608f3..73548fbea72 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -41,7 +41,9 @@ #include #include #include +#include +#include #include CJ_GLOBAL_LOG_PROTOTYPE; @@ -136,18 +138,18 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, else { f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN); - f->cpu_index = vm->cpu_index; + f->thread_index = vm->thread_index; fi = vlib_frame_index_no_check (vm, f); } /* Poison frame when debugging. */ if (CLIB_DEBUG > 0) { - u32 save_cpu_index = f->cpu_index; + u32 save_thread_index = f->thread_index; memset (f, 0xfe, n); - f->cpu_index = save_cpu_index; + f->thread_index = save_thread_index; } /* Insert magic number. */ @@ -465,7 +467,7 @@ vlib_put_next_frame (vlib_main_t * vm, vlib_frame_t *f; u32 n_vectors_in_frame; - if (vm->buffer_main->extern_buffer_mgmt == 0 && CLIB_DEBUG > 0) + if (vm->buffer_main->callbacks_registered == 0 && CLIB_DEBUG > 0) vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left); nf = vlib_node_runtime_get_next_frame (vm, r, next_index); @@ -517,7 +519,7 @@ vlib_put_next_frame (vlib_main_t * vm, * a dangling frame reference. Each thread has its own copy of * the next_frames vector. */ - if (0 && r->cpu_index != next_runtime->cpu_index) + if (0 && r->thread_index != next_runtime->thread_index) { nf->frame_index = ~0; nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED); @@ -707,6 +709,24 @@ elog_save_buffer (vlib_main_t * vm, return error; } +void +elog_post_mortem_dump (void) +{ + vlib_main_t *vm = &vlib_global_main; + elog_main_t *em = &vm->elog_main; + u8 *filename; + clib_error_t *error; + + if (!vm->elog_post_mortem_dump) + return; + + filename = format (0, "/tmp/elog_post_mortem.%d%c", getpid (), 0); + error = elog_write_file (em, (char *) filename, 1 /* flush ring */ ); + if (error) + clib_error_report (error); + vec_free (filename); +} + /* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_save_cli, static) = { .path = "event-logger save", @@ -866,7 +886,7 @@ vlib_elog_main_loop_event (vlib_main_t * vm, : evm->node_call_elog_event_types, node_index), /* track */ - (vm->cpu_index ? &vlib_worker_threads[vm->cpu_index]. + (vm->thread_index ? &vlib_worker_threads[vm->thread_index]. elog_track : &em->default_track), /* data to log */ n_vectors); } @@ -963,7 +983,7 @@ dispatch_node (vlib_main_t * vm, vm->cpu_time_last_node_dispatch = last_time_stamp; - if (1 /* || vm->cpu_index == node->cpu_index */ ) + if (1 /* || vm->thread_index == node->thread_index */ ) { vlib_main_t *stat_vm; @@ -1029,7 +1049,7 @@ dispatch_node (vlib_main_t * vm, { u32 node_name, vector_length, is_polling; } *ed; - vlib_worker_thread_t *w = vlib_worker_threads + vm->cpu_index; + vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index; #endif if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT @@ -1094,14 +1114,18 @@ dispatch_node (vlib_main_t * vm, } static u64 -dispatch_pending_node (vlib_main_t * vm, - vlib_pending_frame_t * p, u64 last_time_stamp) +dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, + u64 last_time_stamp) { vlib_node_main_t *nm = &vm->node_main; vlib_frame_t *f; vlib_next_frame_t *nf, nf_dummy; vlib_node_runtime_t *n; u32 restore_frame_index; + vlib_pending_frame_t *p; + + /* See comment below about dangling references to nm->pending_frames */ + p = nm->pending_frames + pending_frame_index; n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], p->node_runtime_index); @@ -1151,18 +1175,29 @@ dispatch_pending_node (vlib_main_t * vm, /* Frame is ready to be used again, so restore it. */ if (restore_frame_index != ~0) { - /* we musn't restore a frame that is flagged to be freed. This shouldn't - happen since frames to be freed post dispatch are those used - when the to-node frame becomes full i.e. they form a sort of queue of - frames to a single node. If we get here then the to-node frame and the - pending frame *were* the same, and so we removed the to-node frame. - Therefore this frame is no longer part of the queue for that node - and hence it cannot be it's overspill. + /* + * We musn't restore a frame that is flagged to be freed. This + * shouldn't happen since frames to be freed post dispatch are + * those used when the to-node frame becomes full i.e. they form a + * sort of queue of frames to a single node. If we get here then + * the to-node frame and the pending frame *were* the same, and so + * we removed the to-node frame. Therefore this frame is no + * longer part of the queue for that node and hence it cannot be + * it's overspill. */ ASSERT (!(f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH)); - /* p->next_frame_index can change during node dispatch if node - function decides to change graph hook up. */ + /* + * NB: dispatching node n can result in the creation and scheduling + * of new frames, and hence in the reallocation of nm->pending_frames. + * Recompute p, or no supper. This was broken for more than 10 years. + */ + p = nm->pending_frames + pending_frame_index; + + /* + * p->next_frame_index can change during node dispatch if node + * function decides to change graph hook up. + */ nf = vec_elt_at_index (nm->next_frames, p->next_frame_index); nf->flags |= VLIB_FRAME_IS_ALLOCATED; @@ -1308,9 +1343,16 @@ dispatch_process (vlib_main_t * vm, p->suspended_process_frame_index = pf - nm->suspended_process_frames; if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK) - timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time, - vlib_timing_wheel_data_set_suspended_process - (node->runtime_index)); + { + TWT (tw_timer_wheel) * tw = + (TWT (tw_timer_wheel) *) nm->timing_wheel; + p->stop_timer_handle = + TW (tw_timer_start) (tw, + vlib_timing_wheel_data_set_suspended_process + (node->runtime_index) /* [sic] pool idex */ , + 0 /* timer_id */ , + p->resume_clock_interval); + } } else p->flags &= ~VLIB_PROCESS_IS_RUNNING; @@ -1383,9 +1425,14 @@ dispatch_suspended_process (vlib_main_t * vm, n_vectors = 0; p->n_suspends += 1; if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK) - timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time, - vlib_timing_wheel_data_set_suspended_process - (node->runtime_index)); + { + p->stop_timer_handle = + TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel, + vlib_timing_wheel_data_set_suspended_process + (node->runtime_index) /* [sic] pool idex */ , + 0 /* timer_id */ , + p->resume_clock_interval); + } } else { @@ -1414,6 +1461,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) uword i; u64 cpu_time_now; vlib_frame_queue_main_t *fqm; + u32 *last_node_runtime_indices = 0; /* Initialize pending node vector. */ if (is_main) @@ -1431,38 +1479,23 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) else cpu_time_now = clib_cpu_time_now (); - /* Arrange for first level of timing wheel to cover times we care - most about. */ - if (is_main) - { - nm->timing_wheel.min_sched_time = 10e-6; - nm->timing_wheel.max_sched_time = 10e-3; - timing_wheel_init (&nm->timing_wheel, - cpu_time_now, vm->clib_time.clocks_per_second); - vec_alloc (nm->data_from_advancing_timing_wheel, 32); - } + /* Pre-allocate interupt runtime indices and lock. */ + vec_alloc (nm->pending_interrupt_node_runtime_indices, 32); + vec_alloc (last_node_runtime_indices, 32); + if (!is_main) + clib_spinlock_init (&nm->pending_interrupt_lock); /* Pre-allocate expired nodes. */ - vec_alloc (nm->pending_interrupt_node_runtime_indices, 32); if (!nm->polling_threshold_vector_length) nm->polling_threshold_vector_length = 10; if (!nm->interrupt_threshold_vector_length) nm->interrupt_threshold_vector_length = 5; - if (is_main) - { - if (!nm->polling_threshold_vector_length) - nm->polling_threshold_vector_length = 10; - if (!nm->interrupt_threshold_vector_length) - nm->interrupt_threshold_vector_length = 5; - - nm->current_process_index = ~0; - } - /* Start all processes. */ if (is_main) { uword i; + nm->current_process_index = ~0; for (i = 0; i < vec_len (nm->processes); i++) cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0, cpu_time_now); @@ -1505,13 +1538,20 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) uword i; if (l > 0) { - _vec_len (nm->pending_interrupt_node_runtime_indices) = 0; + u32 *tmp; + if (!is_main) + clib_spinlock_lock (&nm->pending_interrupt_lock); + tmp = nm->pending_interrupt_node_runtime_indices; + nm->pending_interrupt_node_runtime_indices = + last_node_runtime_indices; + last_node_runtime_indices = tmp; + _vec_len (last_node_runtime_indices) = 0; + if (!is_main) + clib_spinlock_unlock (&nm->pending_interrupt_lock); for (i = 0; i < l; i++) { n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], - nm-> - pending_interrupt_node_runtime_indices - [i]); + last_node_runtime_indices[i]); cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT, VLIB_NODE_STATE_INTERRUPT, @@ -1524,12 +1564,15 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (is_main) { /* Check if process nodes have expired from timing wheel. */ - nm->data_from_advancing_timing_wheel - = timing_wheel_advance (&nm->timing_wheel, cpu_time_now, - nm->data_from_advancing_timing_wheel, - &nm->cpu_time_next_process_ready); + ASSERT (nm->data_from_advancing_timing_wheel != 0); + + nm->data_from_advancing_timing_wheel = + TW (tw_timer_expire_timers_vec) + ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm), + nm->data_from_advancing_timing_wheel); ASSERT (nm->data_from_advancing_timing_wheel != 0); + if (PREDICT_FALSE (_vec_len (nm->data_from_advancing_timing_wheel) > 0)) { @@ -1575,8 +1618,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) dispatch_suspended_process (vm, di, cpu_time_now); } } - - /* Reset vector. */ _vec_len (nm->data_from_advancing_timing_wheel) = 0; } } @@ -1585,8 +1626,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) Process pending vector until there is nothing left. All pending vectors will be processed from input -> output. */ for (i = 0; i < _vec_len (nm->pending_frames); i++) - cpu_time_now = dispatch_pending_node (vm, nm->pending_frames + i, - cpu_time_now); + cpu_time_now = dispatch_pending_node (vm, i, cpu_time_now); /* Reset pending vector for next iteration. */ _vec_len (nm->pending_frames) = 0; @@ -1629,6 +1669,8 @@ vlib_main_configure (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "elog-events %d", &vm->elog_main.event_ring_size)) ; + else if (unformat (input, "elog-post-mortem-dump")) + vm->elog_post_mortem_dump = 1; else return unformat_parse_error (input); } @@ -1654,6 +1696,7 @@ int vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) { clib_error_t *volatile error; + vlib_node_main_t *nm = &vm->node_main; vm->queue_signal_callback = dummy_queue_signal_callback; @@ -1670,7 +1713,22 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) vm->name = "VLIB"; vec_validate (vm->buffer_main, 0); - vlib_buffer_cb_init (vm); + if (vlib_buffer_callbacks) + { + /* external plugin has registered own buffer callbacks + so we just copy them */ + vlib_buffer_main_t *bm = vm->buffer_main; + clib_memcpy (&bm->cb, vlib_buffer_callbacks, + sizeof (vlib_buffer_callbacks_t)); + bm->callbacks_registered = 1; + } + else + { + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_buffer_cb_init (vm); + unix_physmem_init (vm, 0 /* fail_if_physical_memory_not_present */ ); + vlib_buffer_add_mem_range (vm, vpm->virtual.start, vpm->virtual.size); + } if ((error = vlib_thread_init (vm))) { @@ -1708,6 +1766,18 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, "default"); + nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)), + CLIB_CACHE_LINE_BYTES); + + vec_validate (nm->data_from_advancing_timing_wheel, 10); + _vec_len (nm->data_from_advancing_timing_wheel) = 0; + + /* Create the process timing wheel */ + TW (tw_timer_wheel_init) ((TWT (tw_timer_wheel) *) nm->timing_wheel, + 0 /* no callback */ , + 10e-6 /* timer period 10us */ , + ~0 /* max expirations per call */ ); + switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE)) { case VLIB_MAIN_LOOP_EXIT_NONE: