X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fmain.c;h=e4c4438b5aa54e17bdcbd66160c35f180a220b71;hb=048a4e5a000017d0d632ebf02dcc23d9bf9ccf72;hp=55a731f16c6df41dc3936738d01c457e215043c2;hpb=9a332e1639fbfb4eb4ddf47b1681e05493ae6da3;p=vpp.git diff --git a/src/vlib/main.c b/src/vlib/main.c index 55a731f16c6..e4c4438b5aa 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -41,7 +41,9 @@ #include #include #include +#include +#include #include CJ_GLOBAL_LOG_PROTOTYPE; @@ -136,19 +138,12 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, else { f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN); - f->cpu_index = vm->cpu_index; fi = vlib_frame_index_no_check (vm, f); } /* Poison frame when debugging. */ if (CLIB_DEBUG > 0) - { - u32 save_cpu_index = f->cpu_index; - - memset (f, 0xfe, n); - - f->cpu_index = save_cpu_index; - } + memset (f, 0xfe, n); /* Insert magic number. */ { @@ -465,7 +460,7 @@ vlib_put_next_frame (vlib_main_t * vm, vlib_frame_t *f; u32 n_vectors_in_frame; - if (vm->buffer_main->extern_buffer_mgmt == 0 && CLIB_DEBUG > 0) + if (buffer_main.callbacks_registered == 0 && CLIB_DEBUG > 0) vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left); nf = vlib_node_runtime_get_next_frame (vm, r, next_index); @@ -517,7 +512,7 @@ vlib_put_next_frame (vlib_main_t * vm, * a dangling frame reference. Each thread has its own copy of * the next_frames vector. */ - if (0 && r->cpu_index != next_runtime->cpu_index) + if (0 && r->thread_index != next_runtime->thread_index) { nf->frame_index = ~0; nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED); @@ -701,12 +696,30 @@ elog_save_buffer (vlib_main_t * vm, elog_buffer_capacity (em), chroot_file); vlib_worker_thread_barrier_sync (vm); - error = elog_write_file (em, chroot_file); + error = elog_write_file (em, chroot_file, 1 /* flush ring */ ); vlib_worker_thread_barrier_release (vm); vec_free (chroot_file); return error; } +void +elog_post_mortem_dump (void) +{ + vlib_main_t *vm = &vlib_global_main; + elog_main_t *em = &vm->elog_main; + u8 *filename; + clib_error_t *error; + + if (!vm->elog_post_mortem_dump) + return; + + filename = format (0, "/tmp/elog_post_mortem.%d%c", getpid (), 0); + error = elog_write_file (em, (char *) filename, 1 /* flush ring */ ); + if (error) + clib_error_report (error); + vec_free (filename); +} + /* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_save_cli, static) = { .path = "event-logger save", @@ -866,57 +879,37 @@ vlib_elog_main_loop_event (vlib_main_t * vm, : evm->node_call_elog_event_types, node_index), /* track */ - (vm->cpu_index ? &vlib_worker_threads[vm->cpu_index]. + (vm->thread_index ? &vlib_worker_threads[vm->thread_index]. elog_track : &em->default_track), /* data to log */ n_vectors); } +#if VLIB_BUFFER_TRACE_TRAJECTORY > 0 +void (*vlib_buffer_trace_trajectory_cb) (vlib_buffer_t * b, u32 node_index); +void (*vlib_buffer_trace_trajectory_init_cb) (vlib_buffer_t * b); + void -vlib_dump_context_trace (vlib_main_t * vm, u32 bi) +vlib_buffer_trace_trajectory_init (vlib_buffer_t * b) { - vlib_node_main_t *vnm = &vm->node_main; - vlib_buffer_t *b; - u8 i, n; - - if (VLIB_BUFFER_TRACE_TRAJECTORY) + if (PREDICT_TRUE (vlib_buffer_trace_trajectory_init_cb != 0)) { - b = vlib_get_buffer (vm, bi); - n = b->pre_data[0]; - - fformat (stderr, "Context trace for bi %d b 0x%llx, visited %d\n", - bi, b, n); - - if (n == 0 || n > 20) - { - fformat (stderr, "n is unreasonable\n"); - return; - } - - - for (i = 0; i < n; i++) - { - u32 node_index; - - node_index = b->pre_data[i + 1]; + (*vlib_buffer_trace_trajectory_init_cb) (b); + } +} - if (node_index > vec_len (vnm->nodes)) - { - fformat (stderr, "Skip bogus node index %d\n", node_index); - continue; - } +#endif - fformat (stderr, "%v (%d)\n", vnm->nodes[node_index]->name, - node_index); - } - } - else +static inline void +add_trajectory_trace (vlib_buffer_t * b, u32 node_index) +{ +#if VLIB_BUFFER_TRACE_TRAJECTORY > 0 + if (PREDICT_TRUE (vlib_buffer_trace_trajectory_cb != 0)) { - fformat (stderr, - "in vlib/buffers.h, #define VLIB_BUFFER_TRACE_TRAJECTORY 1\n"); + (*vlib_buffer_trace_trajectory_cb) (b, node_index); } +#endif } - static_always_inline u64 dispatch_node (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -963,7 +956,7 @@ dispatch_node (vlib_main_t * vm, vm->cpu_time_last_node_dispatch = last_time_stamp; - if (1 /* || vm->cpu_index == node->cpu_index */ ) + if (1 /* || vm->thread_index == node->thread_index */ ) { vlib_main_t *stat_vm; @@ -982,15 +975,12 @@ dispatch_node (vlib_main_t * vm, if (VLIB_BUFFER_TRACE_TRAJECTORY && frame) { int i; - int log_index; u32 *from; from = vlib_frame_vector_args (frame); for (i = 0; i < frame->n_vectors; i++) { vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); - ASSERT (b->pre_data[0] < 32); - log_index = b->pre_data[0]++ + 1; - b->pre_data[log_index] = node->node_index; + add_trajectory_trace (b, node->node_index); } n = node->function (vm, node, frame); } @@ -1029,7 +1019,7 @@ dispatch_node (vlib_main_t * vm, { u32 node_name, vector_length, is_polling; } *ed; - vlib_worker_thread_t *w = vlib_worker_threads + vm->cpu_index; + vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index; #endif if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT @@ -1094,14 +1084,18 @@ dispatch_node (vlib_main_t * vm, } static u64 -dispatch_pending_node (vlib_main_t * vm, - vlib_pending_frame_t * p, u64 last_time_stamp) +dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, + u64 last_time_stamp) { vlib_node_main_t *nm = &vm->node_main; vlib_frame_t *f; vlib_next_frame_t *nf, nf_dummy; vlib_node_runtime_t *n; u32 restore_frame_index; + vlib_pending_frame_t *p; + + /* See comment below about dangling references to nm->pending_frames */ + p = nm->pending_frames + pending_frame_index; n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], p->node_runtime_index); @@ -1151,18 +1145,29 @@ dispatch_pending_node (vlib_main_t * vm, /* Frame is ready to be used again, so restore it. */ if (restore_frame_index != ~0) { - /* we musn't restore a frame that is flagged to be freed. This shouldn't - happen since frames to be freed post dispatch are those used - when the to-node frame becomes full i.e. they form a sort of queue of - frames to a single node. If we get here then the to-node frame and the - pending frame *were* the same, and so we removed the to-node frame. - Therefore this frame is no longer part of the queue for that node - and hence it cannot be it's overspill. + /* + * We musn't restore a frame that is flagged to be freed. This + * shouldn't happen since frames to be freed post dispatch are + * those used when the to-node frame becomes full i.e. they form a + * sort of queue of frames to a single node. If we get here then + * the to-node frame and the pending frame *were* the same, and so + * we removed the to-node frame. Therefore this frame is no + * longer part of the queue for that node and hence it cannot be + * it's overspill. */ ASSERT (!(f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH)); - /* p->next_frame_index can change during node dispatch if node - function decides to change graph hook up. */ + /* + * NB: dispatching node n can result in the creation and scheduling + * of new frames, and hence in the reallocation of nm->pending_frames. + * Recompute p, or no supper. This was broken for more than 10 years. + */ + p = nm->pending_frames + pending_frame_index; + + /* + * p->next_frame_index can change during node dispatch if node + * function decides to change graph hook up. + */ nf = vec_elt_at_index (nm->next_frames, p->next_frame_index); nf->flags |= VLIB_FRAME_IS_ALLOCATED; @@ -1271,6 +1276,7 @@ dispatch_process (vlib_main_t * vm, vlib_node_main_t *nm = &vm->node_main; vlib_node_runtime_t *node_runtime = &p->node_runtime; vlib_node_t *node = vlib_get_node (vm, node_runtime->node_index); + u32 old_process_index; u64 t; uword n_vectors, is_suspend; @@ -1286,11 +1292,12 @@ dispatch_process (vlib_main_t * vm, f ? f->n_vectors : 0, /* is_after */ 0); /* Save away current process for suspend. */ + old_process_index = nm->current_process_index; nm->current_process_index = node->runtime_index; n_vectors = vlib_process_startup (vm, p, f); - nm->current_process_index = ~0; + nm->current_process_index = old_process_index; ASSERT (n_vectors != VLIB_PROCESS_RETURN_LONGJMP_RETURN); is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND; @@ -1308,9 +1315,16 @@ dispatch_process (vlib_main_t * vm, p->suspended_process_frame_index = pf - nm->suspended_process_frames; if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK) - timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time, - vlib_timing_wheel_data_set_suspended_process - (node->runtime_index)); + { + TWT (tw_timer_wheel) * tw = + (TWT (tw_timer_wheel) *) nm->timing_wheel; + p->stop_timer_handle = + TW (tw_timer_start) (tw, + vlib_timing_wheel_data_set_suspended_process + (node->runtime_index) /* [sic] pool idex */ , + 0 /* timer_id */ , + p->resume_clock_interval); + } } else p->flags &= ~VLIB_PROCESS_IS_RUNNING; @@ -1383,9 +1397,14 @@ dispatch_suspended_process (vlib_main_t * vm, n_vectors = 0; p->n_suspends += 1; if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK) - timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time, - vlib_timing_wheel_data_set_suspended_process - (node->runtime_index)); + { + p->stop_timer_handle = + TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel, + vlib_timing_wheel_data_set_suspended_process + (node->runtime_index) /* [sic] pool idex */ , + 0 /* timer_id */ , + p->resume_clock_interval); + } } else { @@ -1406,6 +1425,13 @@ dispatch_suspended_process (vlib_main_t * vm, return t; } +void vl_api_send_pending_rpc_requests (vlib_main_t *) __attribute__ ((weak)); +void +vl_api_send_pending_rpc_requests (vlib_main_t * vm) +{ +} + + static_always_inline void vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) { @@ -1414,6 +1440,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) uword i; u64 cpu_time_now; vlib_frame_queue_main_t *fqm; + u32 *last_node_runtime_indices = 0; /* Initialize pending node vector. */ if (is_main) @@ -1431,38 +1458,23 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) else cpu_time_now = clib_cpu_time_now (); - /* Arrange for first level of timing wheel to cover times we care - most about. */ - if (is_main) - { - nm->timing_wheel.min_sched_time = 10e-6; - nm->timing_wheel.max_sched_time = 10e-3; - timing_wheel_init (&nm->timing_wheel, - cpu_time_now, vm->clib_time.clocks_per_second); - vec_alloc (nm->data_from_advancing_timing_wheel, 32); - } + /* Pre-allocate interupt runtime indices and lock. */ + vec_alloc (nm->pending_interrupt_node_runtime_indices, 32); + vec_alloc (last_node_runtime_indices, 32); + if (!is_main) + clib_spinlock_init (&nm->pending_interrupt_lock); /* Pre-allocate expired nodes. */ - vec_alloc (nm->pending_interrupt_node_runtime_indices, 32); if (!nm->polling_threshold_vector_length) nm->polling_threshold_vector_length = 10; if (!nm->interrupt_threshold_vector_length) nm->interrupt_threshold_vector_length = 5; - if (is_main) - { - if (!nm->polling_threshold_vector_length) - nm->polling_threshold_vector_length = 10; - if (!nm->interrupt_threshold_vector_length) - nm->interrupt_threshold_vector_length = 5; - - nm->current_process_index = ~0; - } - /* Start all processes. */ if (is_main) { uword i; + nm->current_process_index = ~0; for (i = 0; i < vec_len (nm->processes); i++) cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0, cpu_time_now); @@ -1472,6 +1484,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) { vlib_node_runtime_t *n; + if (PREDICT_FALSE (_vec_len (vm->pending_rpc_requests) > 0)) + vl_api_send_pending_rpc_requests (vm); + if (!is_main) { vlib_worker_thread_barrier_check (); @@ -1480,13 +1495,12 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) } /* Process pre-input nodes. */ - if (is_main) - vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) - cpu_time_now = dispatch_node (vm, n, - VLIB_NODE_TYPE_PRE_INPUT, - VLIB_NODE_STATE_POLLING, - /* frame */ 0, - cpu_time_now); + vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) + cpu_time_now = dispatch_node (vm, n, + VLIB_NODE_TYPE_PRE_INPUT, + VLIB_NODE_STATE_POLLING, + /* frame */ 0, + cpu_time_now); /* Next process input nodes. */ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]) @@ -1501,17 +1515,30 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) /* Next handle interrupts. */ { + /* unlocked read, for performance */ uword l = _vec_len (nm->pending_interrupt_node_runtime_indices); uword i; - if (l > 0) + if (PREDICT_FALSE (l > 0)) { - _vec_len (nm->pending_interrupt_node_runtime_indices) = 0; + u32 *tmp; + if (!is_main) + { + clib_spinlock_lock (&nm->pending_interrupt_lock); + /* Re-read w/ lock held, in case another thread added an item */ + l = _vec_len (nm->pending_interrupt_node_runtime_indices); + } + + tmp = nm->pending_interrupt_node_runtime_indices; + nm->pending_interrupt_node_runtime_indices = + last_node_runtime_indices; + last_node_runtime_indices = tmp; + _vec_len (last_node_runtime_indices) = 0; + if (!is_main) + clib_spinlock_unlock (&nm->pending_interrupt_lock); for (i = 0; i < l; i++) { n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], - nm-> - pending_interrupt_node_runtime_indices - [i]); + last_node_runtime_indices[i]); cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT, VLIB_NODE_STATE_INTERRUPT, @@ -1524,12 +1551,15 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (is_main) { /* Check if process nodes have expired from timing wheel. */ - nm->data_from_advancing_timing_wheel - = timing_wheel_advance (&nm->timing_wheel, cpu_time_now, - nm->data_from_advancing_timing_wheel, - &nm->cpu_time_next_process_ready); + ASSERT (nm->data_from_advancing_timing_wheel != 0); + + nm->data_from_advancing_timing_wheel = + TW (tw_timer_expire_timers_vec) + ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm), + nm->data_from_advancing_timing_wheel); ASSERT (nm->data_from_advancing_timing_wheel != 0); + if (PREDICT_FALSE (_vec_len (nm->data_from_advancing_timing_wheel) > 0)) { @@ -1575,8 +1605,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) dispatch_suspended_process (vm, di, cpu_time_now); } } - - /* Reset vector. */ _vec_len (nm->data_from_advancing_timing_wheel) = 0; } } @@ -1585,8 +1613,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) Process pending vector until there is nothing left. All pending vectors will be processed from input -> output. */ for (i = 0; i < _vec_len (nm->pending_frames); i++) - cpu_time_now = dispatch_pending_node (vm, nm->pending_frames + i, - cpu_time_now); + cpu_time_now = dispatch_pending_node (vm, i, cpu_time_now); /* Reset pending vector for next iteration. */ _vec_len (nm->pending_frames) = 0; @@ -1629,6 +1656,8 @@ vlib_main_configure (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "elog-events %d", &vm->elog_main.event_ring_size)) ; + else if (unformat (input, "elog-post-mortem-dump")) + vm->elog_post_mortem_dump = 1; else return unformat_parse_error (input); } @@ -1654,6 +1683,7 @@ int vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) { clib_error_t *volatile error; + vlib_node_main_t *nm = &vm->node_main; vm->queue_signal_callback = dummy_queue_signal_callback; @@ -1669,8 +1699,17 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) if (!vm->name) vm->name = "VLIB"; - vec_validate (vm->buffer_main, 0); - vlib_buffer_cb_init (vm); + if ((error = unix_physmem_init (vm))) + { + clib_error_report (error); + goto done; + } + + if ((error = vlib_buffer_main_init (vm))) + { + clib_error_report (error); + goto done; + } if ((error = vlib_thread_init (vm))) { @@ -1678,6 +1717,12 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) goto done; } + if ((error = vlib_call_init_function (vm, map_stat_segment_init))) + { + clib_error_report (error); + goto done; + } + /* Register static nodes so that init functions may use them. */ vlib_register_all_static_nodes (vm); @@ -1697,6 +1742,24 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) goto done; } + if ((error = vlib_call_init_function (vm, vpe_api_init))) + { + clib_error_report (error); + goto done; + } + + if ((error = vlib_call_init_function (vm, vlibmemory_init))) + { + clib_error_report (error); + goto done; + } + + if ((error = vlib_call_init_function (vm, map_api_segment_init))) + { + clib_error_report (error); + goto done; + } + /* See unix/main.c; most likely already set up */ if (vm->init_functions_called == 0) vm->init_functions_called = hash_create (0, /* value bytes */ 0); @@ -1704,9 +1767,23 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) goto done; /* Create default buffer free list. */ - vlib_buffer_get_or_create_free_list (vm, - VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - "default"); + vlib_buffer_create_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + "default"); + + nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)), + CLIB_CACHE_LINE_BYTES); + + vec_validate (nm->data_from_advancing_timing_wheel, 10); + _vec_len (nm->data_from_advancing_timing_wheel) = 0; + + /* Create the process timing wheel */ + TW (tw_timer_wheel_init) ((TWT (tw_timer_wheel) *) nm->timing_wheel, + 0 /* no callback */ , + 10e-6 /* timer period 10us */ , + ~0 /* max expirations per call */ ); + + vec_validate (vm->pending_rpc_requests, 0); + _vec_len (vm->pending_rpc_requests) = 0; switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE)) {