X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fmain.c;h=964feb06c6540ba2f911d42fd7b600c325226e3b;hb=178cf493d009995b28fdf220f04c98860ff79a9b;hp=6783068b42bf59f7b17e4b16b9bc25d8bef25de5;hpb=e3248989586ade29baba635aae66b06995917221;p=vpp.git diff --git a/src/vlib/main.c b/src/vlib/main.c index 6783068b42b..964feb06c65 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -143,7 +143,7 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, /* Poison frame when debugging. */ if (CLIB_DEBUG > 0) - memset (f, 0xfe, n); + clib_memset (f, 0xfe, n); /* Insert magic number. */ { @@ -153,10 +153,11 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, *magic = VLIB_FRAME_MAGIC; } - f->flags = VLIB_FRAME_IS_ALLOCATED | frame_flags; + f->frame_flags = VLIB_FRAME_IS_ALLOCATED | frame_flags; f->n_vectors = 0; f->scalar_size = scalar_size; f->vector_size = vector_size; + f->flags = 0; fs->n_alloc_frames += 1; @@ -200,7 +201,7 @@ vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f) vec_add2 (vm->node_main.pending_frames, p, 1); - f->flags |= VLIB_FRAME_PENDING; + f->frame_flags |= VLIB_FRAME_PENDING; p->frame_index = vlib_frame_index (vm, f); p->node_runtime_index = to_node->runtime_index; p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME; @@ -215,14 +216,14 @@ vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f) vlib_frame_size_t *fs; u32 frame_index; - ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED); + ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED); node = vlib_get_node (vm, r->node_index); fs = get_frame_size_info (nm, node->scalar_size, node->vector_size); frame_index = vlib_frame_index (vm, f); - ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED); + ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED); /* No next frames may point to freed frame. */ if (CLIB_DEBUG > 0) @@ -232,7 +233,7 @@ vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f) ASSERT (nf->frame_index != frame_index); } - f->flags &= ~VLIB_FRAME_IS_ALLOCATED; + f->frame_flags &= ~VLIB_FRAME_IS_ALLOCATED; vec_add1 (fs->free_frame_indices, frame_index); ASSERT (fs->n_alloc_frames > 0); @@ -378,7 +379,8 @@ vlib_get_next_frame_internal (vlib_main_t * vm, /* Has frame been removed from pending vector (e.g. finished dispatching)? If so we can reuse frame. */ - if ((nf->flags & VLIB_FRAME_PENDING) && !(f->flags & VLIB_FRAME_PENDING)) + if ((nf->flags & VLIB_FRAME_PENDING) + && !(f->frame_flags & VLIB_FRAME_PENDING)) { nf->flags &= ~VLIB_FRAME_PENDING; f->n_vectors = 0; @@ -393,7 +395,7 @@ vlib_get_next_frame_internal (vlib_main_t * vm, if (!(nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH)) { vlib_frame_t *f_old = vlib_get_frame (vm, nf->frame_index); - f_old->flags |= VLIB_FRAME_FREE_AFTER_DISPATCH; + f_old->frame_flags |= VLIB_FRAME_FREE_AFTER_DISPATCH; } /* Allocate new frame to replace full one. */ @@ -488,7 +490,7 @@ vlib_put_next_frame (vlib_main_t * vm, r->cached_next_index = next_index; - if (!(f->flags & VLIB_FRAME_PENDING)) + if (!(f->frame_flags & VLIB_FRAME_PENDING)) { __attribute__ ((unused)) vlib_node_t *node; vlib_node_t *next_node; @@ -504,7 +506,7 @@ vlib_put_next_frame (vlib_main_t * vm, p->node_runtime_index = nf->node_runtime_index; p->next_frame_index = nf - nm->next_frames; nf->flags |= VLIB_FRAME_PENDING; - f->flags |= VLIB_FRAME_PENDING; + f->frame_flags |= VLIB_FRAME_PENDING; /* * If we're going to dispatch this frame on another thread, @@ -539,29 +541,38 @@ vlib_put_next_frame (vlib_main_t * vm, never_inline void vlib_node_runtime_sync_stats (vlib_main_t * vm, vlib_node_runtime_t * r, - uword n_calls, uword n_vectors, uword n_clocks) + uword n_calls, uword n_vectors, uword n_clocks, + uword n_ticks) { vlib_node_t *n = vlib_get_node (vm, r->node_index); n->stats_total.calls += n_calls + r->calls_since_last_overflow; n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow; n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow; + n->stats_total.perf_counter_ticks += n_ticks + + r->perf_counter_ticks_since_last_overflow; + n->stats_total.perf_counter_vectors += n_vectors + + r->perf_counter_vectors_since_last_overflow; n->stats_total.max_clock = r->max_clock; n->stats_total.max_clock_n = r->max_clock_n; r->calls_since_last_overflow = 0; r->vectors_since_last_overflow = 0; r->clocks_since_last_overflow = 0; + r->perf_counter_ticks_since_last_overflow = 0ULL; + r->perf_counter_vectors_since_last_overflow = 0ULL; } always_inline void __attribute__ ((unused)) vlib_process_sync_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks) + uword n_calls, uword n_vectors, uword n_clocks, + uword n_ticks) { vlib_node_runtime_t *rt = &p->node_runtime; vlib_node_t *n = vlib_get_node (vm, rt->node_index); - vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks); + vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks, + n_ticks); n->stats_total.suspends += p->n_suspends; p->n_suspends = 0; } @@ -587,7 +598,7 @@ vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index); - vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0); + vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0, 0); /* Sync up runtime next frame vector counters with main node structure. */ { @@ -607,45 +618,68 @@ always_inline u32 vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_calls, - uword n_vectors, uword n_clocks) + uword n_vectors, uword n_clocks, + uword n_ticks) { u32 ca0, ca1, v0, v1, cl0, cl1, r; + u32 ptick0, ptick1, pvec0, pvec1; cl0 = cl1 = node->clocks_since_last_overflow; ca0 = ca1 = node->calls_since_last_overflow; v0 = v1 = node->vectors_since_last_overflow; + ptick0 = ptick1 = node->perf_counter_ticks_since_last_overflow; + pvec0 = pvec1 = node->perf_counter_vectors_since_last_overflow; ca1 = ca0 + n_calls; v1 = v0 + n_vectors; cl1 = cl0 + n_clocks; + ptick1 = ptick0 + n_ticks; + pvec1 = pvec0 + n_vectors; node->calls_since_last_overflow = ca1; node->clocks_since_last_overflow = cl1; node->vectors_since_last_overflow = v1; + node->perf_counter_ticks_since_last_overflow = ptick1; + node->perf_counter_vectors_since_last_overflow = pvec1; + node->max_clock_n = node->max_clock > n_clocks ? node->max_clock_n : n_vectors; node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks; r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors); - if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) + if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0) || (ptick1 < ptick0) + || (pvec1 < pvec0)) { node->calls_since_last_overflow = ca0; node->clocks_since_last_overflow = cl0; node->vectors_since_last_overflow = v0; - vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks); + node->perf_counter_ticks_since_last_overflow = ptick0; + node->perf_counter_vectors_since_last_overflow = pvec0; + + vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks, + n_ticks); } return r; } +static inline u64 +vlib_node_runtime_perf_counter (vlib_main_t * vm) +{ + if (PREDICT_FALSE (vm->vlib_node_runtime_perf_counter_cb != 0)) + return ((*vm->vlib_node_runtime_perf_counter_cb) (vm)); + return 0ULL; +} + always_inline void vlib_process_update_stats (vlib_main_t * vm, vlib_process_t * p, - uword n_calls, uword n_vectors, uword n_clocks) + uword n_calls, uword n_vectors, uword n_clocks, + uword n_ticks) { vlib_node_runtime_update_stats (vm, &p->node_runtime, - n_calls, n_vectors, n_clocks); + n_calls, n_vectors, n_clocks, n_ticks); } static clib_error_t * @@ -958,15 +992,19 @@ dispatch_node (vlib_main_t * vm, if (1 /* || vm->thread_index == node->thread_index */ ) { - vlib_main_t *stat_vm; - - stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm; + u64 pmc_before, pmc_delta; vlib_elog_main_loop_event (vm, node->node_index, last_time_stamp, frame ? frame->n_vectors : 0, /* is_after */ 0); + /* + * To validate accounting: pmc_before = last_time_stamp + * perf ticks should equal clocks/pkt... + */ + pmc_before = vlib_node_runtime_perf_counter (vm); + /* * Turn this on if you run into * "bad monkey" contexts, and you want to know exactly @@ -989,16 +1027,23 @@ dispatch_node (vlib_main_t * vm, t = clib_cpu_time_now (); + /* + * To validate accounting: pmc_delta = t - pmc_before; + * perf ticks should equal clocks/pkt... + */ + pmc_delta = vlib_node_runtime_perf_counter (vm) - pmc_before; + vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */ 1); vm->main_loop_vectors_processed += n; vm->main_loop_nodes_processed += n > 0; - v = vlib_node_runtime_update_stats (stat_vm, node, + v = vlib_node_runtime_update_stats (vm, node, /* n_calls */ 1, /* n_vectors */ n, - /* n_clocks */ t - last_time_stamp); + /* n_clocks */ t - last_time_stamp, + pmc_delta /* PMC ticks */ ); /* When in interrupt mode and vector rate crosses threshold switch to polling mode. */ @@ -1105,13 +1150,13 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, { /* No next frame: so use dummy on stack. */ nf = &nf_dummy; - nf->flags = f->flags & VLIB_NODE_FLAG_TRACE; + nf->flags = f->frame_flags & VLIB_NODE_FLAG_TRACE; nf->frame_index = ~p->frame_index; } else nf = vec_elt_at_index (nm->next_frames, p->next_frame_index); - ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED); + ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED); /* Force allocation of new frame while current frame is being dispatched. */ @@ -1125,7 +1170,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, } /* Frame must be pending. */ - ASSERT (f->flags & VLIB_FRAME_PENDING); + ASSERT (f->frame_flags & VLIB_FRAME_PENDING); ASSERT (f->n_vectors > 0); /* Copy trace flag from next frame to node. @@ -1140,7 +1185,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, VLIB_NODE_STATE_POLLING, f, last_time_stamp); - f->flags &= ~VLIB_FRAME_PENDING; + f->frame_flags &= ~VLIB_FRAME_PENDING; /* Frame is ready to be used again, so restore it. */ if (restore_frame_index != ~0) @@ -1155,7 +1200,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, * longer part of the queue for that node and hence it cannot be * it's overspill. */ - ASSERT (!(f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH)); + ASSERT (!(f->frame_flags & VLIB_FRAME_FREE_AFTER_DISPATCH)); /* * NB: dispatching node n can result in the creation and scheduling @@ -1187,7 +1232,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, } else { - if (f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH) + if (f->frame_flags & VLIB_FRAME_FREE_AFTER_DISPATCH) { ASSERT (!(n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH)); vlib_frame_free (vm, n, f); @@ -1337,7 +1382,8 @@ dispatch_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp); + /* n_clocks */ t - last_time_stamp, + /* pmc_ticks */ 0ULL); return t; } @@ -1371,9 +1417,8 @@ dispatch_suspended_process (vlib_main_t * vm, ASSERT (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT)); - pf = - pool_elt_at_index (nm->suspended_process_frames, - p->suspended_process_frame_index); + pf = pool_elt_at_index (nm->suspended_process_frames, + p->suspended_process_frame_index); node_runtime = &p->node_runtime; node = vlib_get_node (vm, node_runtime->node_index); @@ -1409,8 +1454,9 @@ dispatch_suspended_process (vlib_main_t * vm, else { p->flags &= ~VLIB_PROCESS_IS_RUNNING; + pool_put_index (nm->suspended_process_frames, + p->suspended_process_frame_index); p->suspended_process_frame_index = ~0; - pool_put (nm->suspended_process_frames, pf); } t = clib_cpu_time_now (); @@ -1420,7 +1466,8 @@ dispatch_suspended_process (vlib_main_t * vm, vlib_process_update_stats (vm, p, /* n_calls */ !is_suspend, /* n_vectors */ n_vectors, - /* n_clocks */ t - last_time_stamp); + /* n_clocks */ t - last_time_stamp, + /* pmc_ticks */ 0ULL); return t; } @@ -1470,6 +1517,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (!nm->interrupt_threshold_vector_length) nm->interrupt_threshold_vector_length = 5; + /* Make sure the performance monitor counter is disabled */ + vm->perf_counter_id = ~0; + /* Start all processes. */ if (is_main) { @@ -1485,13 +1535,19 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) vlib_node_runtime_t *n; if (PREDICT_FALSE (_vec_len (vm->pending_rpc_requests) > 0)) - vl_api_send_pending_rpc_requests (vm); + { + if (!is_main) + vl_api_send_pending_rpc_requests (vm); + } if (!is_main) { vlib_worker_thread_barrier_check (); vec_foreach (fqm, tm->frame_queue_mains) vlib_frame_queue_dequeue (vm, fqm); + if (PREDICT_FALSE (vm->worker_thread_main_loop_callback != 0)) + ((void (*)(vlib_main_t *)) vm->worker_thread_main_loop_callback) + (vm); } /* Process pre-input nodes. */ @@ -1594,12 +1650,12 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) te->n_data_elts, te->n_data_elt_bytes); if (te->n_data_bytes < sizeof (te->inline_event_data)) - clib_memcpy (data, te->inline_event_data, - te->n_data_bytes); + clib_memcpy_fast (data, te->inline_event_data, + te->n_data_bytes); else { - clib_memcpy (data, te->event_data_as_vector, - te->n_data_bytes); + clib_memcpy_fast (data, te->event_data_as_vector, + te->n_data_bytes); vec_free (te->event_data_as_vector); } pool_put (nm->signal_timed_event_data_pool, te); @@ -1704,7 +1760,7 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) if (!vm->name) vm->name = "VLIB"; - if ((error = unix_physmem_init (vm))) + if ((error = vlib_physmem_init (vm))) { clib_error_report (error); goto done; @@ -1790,6 +1846,8 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) vec_validate (vm->pending_rpc_requests, 0); _vec_len (vm->pending_rpc_requests) = 0; + vec_validate (vm->processing_rpc_requests, 0); + _vec_len (vm->processing_rpc_requests) = 0; switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE)) {