X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fthreads.c;h=48e1bab8a66eb67bd68f189ed67e5c7733098b96;hb=143610014ab91f7998014f4322c36c8b07f50d5f;hp=82263797d931e4dfcb02e29b82d5ededab9f6537;hpb=368104d06ad6d667a8cce152426916fc654b6627;p=vpp.git diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 82263797d93..48e1bab8a66 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -18,17 +18,14 @@ #include #include #include +#include #include #include #include -#include #include -DECLARE_CJ_GLOBAL_LOG; - - u32 vl (void *p) { @@ -51,13 +48,11 @@ barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed) if (!vlib_worker_threads->barrier_elog_enabled) return; - /* *INDENT-OFF* */ - ELOG_TYPE_DECLARE (e) = - { - .format = "bar-trace-%s-#%d", - .format_args = "T4i4", - }; - /* *INDENT-ON* */ + ELOG_TYPE_DECLARE (e) = { + .format = "bar-trace-%s-#%d", + .format_args = "T4i4", + }; + struct { u32 caller, count, t_entry, t_open, t_closed; @@ -78,13 +73,11 @@ barrier_trace_sync_rec (f64 t_entry) if (!vlib_worker_threads->barrier_elog_enabled) return; - /* *INDENT-OFF* */ - ELOG_TYPE_DECLARE (e) = - { - .format = "bar-syncrec-%s-#%d", - .format_args = "T4i4", - }; - /* *INDENT-ON* */ + ELOG_TYPE_DECLARE (e) = { + .format = "bar-syncrec-%s-#%d", + .format_args = "T4i4", + }; + struct { u32 caller, depth; @@ -102,13 +95,11 @@ barrier_trace_release_rec (f64 t_entry) if (!vlib_worker_threads->barrier_elog_enabled) return; - /* *INDENT-OFF* */ - ELOG_TYPE_DECLARE (e) = - { - .format = "bar-relrrec-#%d", - .format_args = "i4", - }; - /* *INDENT-ON* */ + ELOG_TYPE_DECLARE (e) = { + .format = "bar-relrrec-#%d", + .format_args = "i4", + }; + struct { u32 depth; @@ -124,13 +115,11 @@ barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main) if (!vlib_worker_threads->barrier_elog_enabled) return; - /* *INDENT-OFF* */ - ELOG_TYPE_DECLARE (e) = - { - .format = "bar-rel-#%d-e%d-u%d-t%d", - .format_args = "i4i4i4i4", - }; - /* *INDENT-ON* */ + ELOG_TYPE_DECLARE (e) = { + .format = "bar-rel-#%d-e%d-u%d-t%d", + .format_args = "i4i4i4i4", + }; + struct { u32 count, t_entry, t_update_main, t_closed_total; @@ -324,13 +313,13 @@ vlib_thread_init (vlib_main_t * vm) { uword c; /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tr->coremask, ({ + clib_bitmap_foreach (c, tr->coremask) { if (clib_bitmap_get(avail_cpu, c) == 0) return clib_error_return (0, "cpu %u is not available to be used" " for the '%s' thread",c, tr->name); avail_cpu = clib_bitmap_set(avail_cpu, c, 0); - })); + } /* *INDENT-ON* */ } else @@ -585,6 +574,7 @@ vlib_worker_thread_bootstrap_fn (void *arg) __os_thread_index = w - vlib_worker_threads; + vlib_process_start_switch_stack (vlib_mains[__os_thread_index], 0); rv = (void *) clib_calljmp ((uword (*)(uword)) w->thread_function, (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE); @@ -609,14 +599,14 @@ vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id) /* *INDENT-OFF* */ clib_sysfs_read ("/sys/devices/system/node/online", "%U", unformat_bitmap_list, &nbmp); - clib_bitmap_foreach (node, nbmp, ({ + clib_bitmap_foreach (node, nbmp) { p = format (p, "%s%u/cpulist%c", sys_node_path, node, 0); clib_sysfs_read ((char *) p, "%U", unformat_bitmap_list, &cbmp); if (clib_bitmap_get (cbmp, cpu_id)) numa_id = node; vec_reset_length (cbmp); vec_reset_length (p); - })); + } /* *INDENT-ON* */ vec_free (nbmp); vec_free (cbmp); @@ -629,6 +619,7 @@ vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id) static clib_error_t * vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id) { + clib_mem_main_t *mm = &clib_mem_main; vlib_thread_main_t *tm = &vlib_thread_main; void *(*fp_arg) (void *) = fp; void *numa_heap; @@ -637,19 +628,22 @@ vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id) vlib_get_thread_core_numa (w, cpu_id); /* Set up NUMA-bound heap if indicated */ - if (clib_per_numa_mheaps[w->numa_id] == 0) + if (mm->per_numa_mheaps[w->numa_id] == 0) { /* If the user requested a NUMA heap, create it... */ if (tm->numa_heap_size) { - numa_heap = clib_mem_init_thread_safe_numa - (0 /* DIY */ , tm->numa_heap_size, w->numa_id); - clib_per_numa_mheaps[w->numa_id] = numa_heap; + clib_mem_set_numa_affinity (w->numa_id, 1 /* force */ ); + numa_heap = clib_mem_create_heap (0 /* DIY */ , tm->numa_heap_size, + 1 /* is_locked */ , + "numa %u heap", w->numa_id); + clib_mem_set_default_numa_affinity (); + mm->per_numa_mheaps[w->numa_id] = numa_heap; } else { /* Or, use the main heap */ - clib_per_numa_mheaps[w->numa_id] = w->thread_mheap; + mm->per_numa_mheaps[w->numa_id] = w->thread_mheap; } } @@ -684,7 +678,7 @@ start_workers (vlib_main_t * vm) vlib_node_runtime_t *rt; u32 n_vlib_mains = tm->n_vlib_mains; u32 worker_thread_index; - u8 *main_heap = clib_mem_get_per_cpu_heap (); + clib_mem_heap_t *main_heap = clib_mem_get_per_cpu_heap (); vec_reset_length (vlib_worker_threads); @@ -703,16 +697,18 @@ start_workers (vlib_main_t * vm) clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); vm->elog_main.lock[0] = 0; - if (n_vlib_mains > 1) - { - /* Replace hand-crafted length-1 vector with a real vector */ - vlib_mains = 0; + clib_callback_data_init (&vm->vlib_node_runtime_perf_callbacks, + &vm->worker_thread_main_loop_callback_lock); - vec_validate_aligned (vlib_mains, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - _vec_len (vlib_mains) = 0; - vec_add1_aligned (vlib_mains, vm, CLIB_CACHE_LINE_BYTES); + /* Replace hand-crafted length-1 vector with a real vector */ + vlib_mains = 0; + vec_validate_aligned (vlib_mains, n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); + _vec_len (vlib_mains) = 0; + vec_add1_aligned (vlib_mains, vm, CLIB_CACHE_LINE_BYTES); + + if (n_vlib_mains > 1) + { vlib_worker_threads->wait_at_barrier = clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES); vlib_worker_threads->workers_at_barrier = @@ -737,6 +733,7 @@ start_workers (vlib_main_t * vm) vm->barrier_no_close_before = 0; worker_thread_index = 1; + clib_spinlock_init (&vm->worker_thread_main_loop_callback_lock); for (i = 0; i < vec_len (tm->registrations); i++) { @@ -755,8 +752,10 @@ start_workers (vlib_main_t * vm) vec_add2 (vlib_worker_threads, w, 1); /* Currently unused, may not really work */ if (tr->mheap_size) - w->thread_mheap = create_mspace (tr->mheap_size, - 0 /* unlocked */ ); + w->thread_mheap = clib_mem_create_heap (0, tr->mheap_size, + /* unlocked */ 0, + "%s%d heap", + tr->name, k); else w->thread_mheap = main_heap; @@ -793,6 +792,11 @@ start_workers (vlib_main_t * vm) _vec_len (vm_clone->pending_rpc_requests) = 0; clib_memset (&vm_clone->random_buffer, 0, sizeof (vm_clone->random_buffer)); + clib_spinlock_init + (&vm_clone->worker_thread_main_loop_callback_lock); + clib_callback_data_init + (&vm_clone->vlib_node_runtime_perf_callbacks, + &vm_clone->worker_thread_main_loop_callback_lock); nm = &vlib_mains[0]->node_main; nm_clone = &vm_clone->node_main; @@ -852,6 +856,9 @@ start_workers (vlib_main_t * vm) nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], CLIB_CACHE_LINE_BYTES); + clib_interrupt_init ( + &nm_clone->interrupts, + vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); @@ -920,8 +927,10 @@ start_workers (vlib_main_t * vm) vec_add2 (vlib_worker_threads, w, 1); if (tr->mheap_size) { - w->thread_mheap = - create_mspace (tr->mheap_size, 0 /* locked */ ); + w->thread_mheap = clib_mem_create_heap (0, tr->mheap_size, + /* locked */ 0, + "%s%d heap", + tr->name, j); } else w->thread_mheap = main_heap; @@ -963,13 +972,13 @@ start_workers (vlib_main_t * vm) { uword c; /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tr->coremask, ({ + clib_bitmap_foreach (c, tr->coremask) { w = vlib_worker_threads + worker_thread_index++; err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn, w, c); if (err) clib_error_report (err); - })); + } /* *INDENT-ON* */ } } @@ -1165,6 +1174,9 @@ vlib_worker_thread_node_refork (void) nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], CLIB_CACHE_LINE_BYTES); + clib_interrupt_resize ( + &nm_clone->interrupts, + vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) { @@ -1438,6 +1450,18 @@ vlib_worker_thread_initial_barrier_sync_and_release (vlib_main_t * vm) *vlib_worker_threads->wait_at_barrier = 0; } +/** + * Return true if the wroker thread barrier is held + */ +u8 +vlib_worker_thread_barrier_held (void) +{ + if (vec_len (vlib_mains) < 2) + return (1); + + return (*vlib_worker_threads->wait_at_barrier == 1); +} + void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name) { @@ -1469,6 +1493,10 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name) return; } + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 0 /* enter */ ); + /* * Need data to decide if we're working hard enough to honor * the barrier hold-down timer. @@ -1632,6 +1660,44 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) barrier_trace_release (t_entry, t_closed_total, t_update_main); + if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0)) + clib_call_callbacks (vm->barrier_perf_callbacks, vm, + vm->clib_time.last_cpu_time, 1 /* leave */ ); +} + +/** + * Wait until each of the workers has been once around the track + */ +void +vlib_worker_wait_one_loop (void) +{ + ASSERT (vlib_get_thread_index () == 0); + + if (vec_len (vlib_mains) < 2) + return; + + if (vlib_worker_thread_barrier_held ()) + return; + + u32 *counts = 0; + u32 ii; + + vec_validate (counts, vec_len (vlib_mains) - 1); + + /* record the current loop counts */ + vec_foreach_index (ii, vlib_mains) + counts[ii] = vlib_mains[ii]->main_loop_count; + + /* spin until each changes, apart from the main thread, or we'd be + * a while */ + for (ii = 1; ii < vec_len (counts); ii++) + { + while (counts[ii] == vlib_mains[ii]->main_loop_count) + CLIB_PAUSE (); + } + + vec_free (counts); + return; } /* @@ -1781,6 +1847,8 @@ vlib_worker_thread_fn (void *arg) vlib_main_t *vm = vlib_get_main (); clib_error_t *e; + vlib_process_finish_switch_stack (vm); + ASSERT (vm->thread_index == vlib_get_thread_index ()); vlib_worker_thread_init (w); @@ -1941,6 +2009,12 @@ VLIB_CLI_COMMAND (f_command, static) = }; /* *INDENT-ON* */ +vlib_thread_main_t * +vlib_get_thread_main_not_inline (void) +{ + return vlib_get_thread_main (); +} + /* * fd.io coding-style-patch-verification: ON *