X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fthreads.c;h=52886df37e0c6e47b7e80da008e228bdffee356a;hb=72d8758fdc2266b9f4cd53063da2d23f0855c1df;hp=def8927e29ff24935d54faed1c01c81f7edbf37a;hpb=88c6e0086b15963b4d1a268e1fe8bbc2bcd9779c;p=vpp.git diff --git a/src/vlib/threads.c b/src/vlib/threads.c index def8927e29f..52886df37e0 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -42,8 +43,8 @@ vlib_thread_main_t vlib_thread_main; * imapacts observed timings. */ -static u32 -elog_id_for_msg_name (const char *msg_name) +u32 +elog_global_id_for_msg_name (const char *msg_name) { uword *p, r; static uword *h; @@ -84,7 +85,8 @@ barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed) ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->count = (int) vlib_worker_threads[0].barrier_sync_count; - ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller); + ed->caller = elog_global_id_for_msg_name + (vlib_worker_threads[0].barrier_caller); ed->t_entry = (int) (1000000.0 * t_entry); ed->t_open = (int) (1000000.0 * t_open); ed->t_closed = (int) (1000000.0 * t_closed); @@ -110,7 +112,8 @@ barrier_trace_sync_rec (f64 t_entry) ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->depth = (int) vlib_worker_threads[0].recursion_level - 1; - ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller); + ed->caller = elog_global_id_for_msg_name + (vlib_worker_threads[0].barrier_caller); } static inline void @@ -305,7 +308,7 @@ vlib_thread_init (vlib_main_t * vm) w = vlib_worker_threads; w->thread_mheap = clib_mem_get_heap (); w->thread_stack = vlib_thread_stacks[0]; - w->lcore_id = tm->main_lcore; + w->cpu_id = tm->main_lcore; w->lwp = syscall (SYS_gettid); w->thread_id = pthread_self (); tm->n_vlib_mains = 1; @@ -390,7 +393,7 @@ vlib_frame_queue_alloc (int nelts) vlib_frame_queue_t *fq; fq = clib_mem_alloc_aligned (sizeof (*fq), CLIB_CACHE_LINE_BYTES); - memset (fq, 0, sizeof (*fq)); + clib_memset (fq, 0, sizeof (*fq)); fq->nelts = nelts; fq->vector_threshold = 128; // packets vec_validate_aligned (fq->elts, nelts - 1, CLIB_CACHE_LINE_BYTES); @@ -513,7 +516,7 @@ vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index, ASSERT (fq); - new_tail = __sync_add_and_fetch (&fq->tail, 1); + new_tail = clib_atomic_add_fetch (&fq->tail, 1); /* Wait until a ring slot is available */ while (new_tail >= fq->head + fq->nelts) @@ -573,12 +576,12 @@ vlib_worker_thread_init (vlib_worker_thread_t * w) { /* Initial barrier sync, for both worker and i/o threads */ - clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1); + clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, 1); while (*vlib_worker_threads->wait_at_barrier) ; - clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1); + clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, -1); } } @@ -600,21 +603,42 @@ vlib_worker_thread_bootstrap_fn (void *arg) return rv; } +static void +vlib_get_thread_core_socket (vlib_worker_thread_t * w, unsigned cpu_id) +{ + const char *sys_cpu_path = "/sys/devices/system/cpu/cpu"; + u8 *p = 0; + int core_id = -1, socket_id = -1; + + p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, cpu_id, 0); + clib_sysfs_read ((char *) p, "%d", &core_id); + vec_reset_length (p); + p = + format (p, "%s%u/topology/physical_package_id%c", sys_cpu_path, cpu_id, + 0); + clib_sysfs_read ((char *) p, "%d", &socket_id); + vec_free (p); + + w->core_id = core_id; + w->socket_id = socket_id; +} + static clib_error_t * -vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) +vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id) { vlib_thread_main_t *tm = &vlib_thread_main; void *(*fp_arg) (void *) = fp; - w->lcore_id = lcore_id; + w->cpu_id = cpu_id; + vlib_get_thread_core_socket (w, cpu_id); if (tm->cb.vlib_launch_thread_cb && !w->registration->use_pthreads) - return tm->cb.vlib_launch_thread_cb (fp, (void *) w, lcore_id); + return tm->cb.vlib_launch_thread_cb (fp, (void *) w, cpu_id); else { pthread_t worker; cpu_set_t cpuset; CPU_ZERO (&cpuset); - CPU_SET (lcore_id, &cpuset); + CPU_SET (cpu_id, &cpuset); if (pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w)) return clib_error_return_unix (0, "pthread_create"); @@ -675,6 +699,9 @@ start_workers (vlib_main_t * vm) vlib_worker_threads->node_reforks_required = clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES); + /* We'll need the rpc vector lock... */ + clib_spinlock_init (&vm->pending_rpc_lock); + /* Ask for an initial barrier sync */ *vlib_worker_threads->workers_at_barrier = 0; *vlib_worker_threads->wait_at_barrier = 1; @@ -692,8 +719,6 @@ start_workers (vlib_main_t * vm) for (i = 0; i < vec_len (tm->registrations); i++) { vlib_node_main_t *nm, *nm_clone; - vlib_buffer_free_list_t *fl_clone, *fl_orig; - vlib_buffer_free_list_t *orig_freelist_pool; int k; tr = tm->registrations[i]; @@ -751,8 +776,8 @@ start_workers (vlib_main_t * vm) vm_clone->pending_rpc_requests = 0; vec_validate (vm_clone->pending_rpc_requests, 0); _vec_len (vm_clone->pending_rpc_requests) = 0; - memset (&vm_clone->random_buffer, 0, - sizeof (vm_clone->random_buffer)); + clib_memset (&vm_clone->random_buffer, 0, + sizeof (vm_clone->random_buffer)); nm = &vlib_mains[0]->node_main; nm_clone = &vm_clone->node_main; @@ -774,7 +799,7 @@ start_workers (vlib_main_t * vm) /* fork the frame dispatch queue */ nm_clone->pending_frames = 0; - vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */ + vec_validate (nm_clone->pending_frames, 10); _vec_len (nm_clone->pending_frames) = 0; /* fork nodes */ @@ -788,9 +813,9 @@ start_workers (vlib_main_t * vm) clib_memcpy (n, nm->nodes[j], sizeof (*n)); /* none of the copied nodes have enqueue rights given out */ n->owner_node_index = VLIB_INVALID_NODE_INDEX; - memset (&n->stats_total, 0, sizeof (n->stats_total)); - memset (&n->stats_last_clear, 0, - sizeof (n->stats_last_clear)); + clib_memset (&n->stats_total, 0, sizeof (n->stats_total)); + clib_memset (&n->stats_last_clear, 0, + sizeof (n->stats_last_clear)); vec_add1 (nm_clone->nodes, n); n++; } @@ -823,12 +848,29 @@ start_workers (vlib_main_t * vm) n->runtime_data_bytes)); } + nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = + vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], + CLIB_CACHE_LINE_BYTES); + vec_foreach (rt, + nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + rt->thread_index = vm_clone->thread_index; + /* copy initial runtime_data from node */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); + } + nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); - /* zap the (per worker) frame freelists, etc */ - nm_clone->frame_sizes = 0; + /* Create per-thread frame freelist */ + nm_clone->frame_sizes = vec_new (vlib_frame_size_t, 1); +#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES nm_clone->frame_size_hash = hash_create (0, sizeof (uword)); +#endif /* Packet trace buffers are guaranteed to be empty, nothing to do here */ @@ -841,24 +883,6 @@ start_workers (vlib_main_t * vm) (vlib_mains[0]->error_main.counters_last_clear, CLIB_CACHE_LINE_BYTES); - /* Fork the vlib_buffer_main_t free lists, etc. */ - orig_freelist_pool = vm_clone->buffer_free_list_pool; - vm_clone->buffer_free_list_pool = 0; - - /* *INDENT-OFF* */ - pool_foreach (fl_orig, orig_freelist_pool, - ({ - pool_get_aligned (vm_clone->buffer_free_list_pool, - fl_clone, CLIB_CACHE_LINE_BYTES); - ASSERT (fl_orig - orig_freelist_pool - == fl_clone - vm_clone->buffer_free_list_pool); - - fl_clone[0] = fl_orig[0]; - fl_clone->buffers = 0; - fl_clone->n_alloc = 0; - })); -/* *INDENT-ON* */ - worker_thread_index++; } } @@ -1014,8 +1038,8 @@ vlib_worker_thread_node_refork (void) u64 *old_counters = vm_clone->error_main.counters; u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear; - clib_memcpy (&vm_clone->error_main, &vm->error_main, - sizeof (vm->error_main)); + clib_memcpy_fast (&vm_clone->error_main, &vm->error_main, + sizeof (vm->error_main)); j = vec_len (vm->error_main.counters) - 1; vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES); vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES); @@ -1056,27 +1080,27 @@ vlib_worker_thread_node_refork (void) new_n = nm->nodes[j]; old_n_clone = old_nodes_clone[j]; - clib_memcpy (new_n_clone, new_n, sizeof (*new_n)); + clib_memcpy_fast (new_n_clone, new_n, sizeof (*new_n)); /* none of the copied nodes have enqueue rights given out */ new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX; if (j >= vec_len (old_nodes_clone)) { /* new node, set to zero */ - memset (&new_n_clone->stats_total, 0, - sizeof (new_n_clone->stats_total)); - memset (&new_n_clone->stats_last_clear, 0, - sizeof (new_n_clone->stats_last_clear)); + clib_memset (&new_n_clone->stats_total, 0, + sizeof (new_n_clone->stats_total)); + clib_memset (&new_n_clone->stats_last_clear, 0, + sizeof (new_n_clone->stats_last_clear)); } else { /* Copy stats if the old data is valid */ - clib_memcpy (&new_n_clone->stats_total, - &old_n_clone->stats_total, - sizeof (new_n_clone->stats_total)); - clib_memcpy (&new_n_clone->stats_last_clear, - &old_n_clone->stats_last_clear, - sizeof (new_n_clone->stats_last_clear)); + clib_memcpy_fast (&new_n_clone->stats_total, + &old_n_clone->stats_total, + sizeof (new_n_clone->stats_total)); + clib_memcpy_fast (&new_n_clone->stats_last_clear, + &old_n_clone->stats_last_clear, + sizeof (new_n_clone->stats_last_clear)); /* keep previous node state */ new_n_clone->state = old_n_clone->state; @@ -1102,17 +1126,17 @@ vlib_worker_thread_node_refork (void) rt->thread_index = vm_clone->thread_index; /* copy runtime_data, will be overwritten later for existing rt */ if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); + clib_memcpy_fast (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); } for (j = 0; j < vec_len (old_rt); j++) { rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); rt->state = old_rt[j].state; - clib_memcpy (rt->runtime_data, old_rt[j].runtime_data, - VLIB_NODE_RUNTIME_DATA_SIZE); + clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, + VLIB_NODE_RUNTIME_DATA_SIZE); } vec_free (old_rt); @@ -1129,17 +1153,44 @@ vlib_worker_thread_node_refork (void) rt->thread_index = vm_clone->thread_index; /* copy runtime_data, will be overwritten later for existing rt */ if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); + clib_memcpy_fast (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); + } + + for (j = 0; j < vec_len (old_rt); j++) + { + rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); + rt->state = old_rt[j].state; + clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, + VLIB_NODE_RUNTIME_DATA_SIZE); + } + + vec_free (old_rt); + + /* re-clone pre-input nodes */ + old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]; + nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = + vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], + CLIB_CACHE_LINE_BYTES); + + vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + rt->thread_index = vm_clone->thread_index; + /* copy runtime_data, will be overwritten later for existing rt */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy_fast (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); } for (j = 0; j < vec_len (old_rt); j++) { rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); rt->state = old_rt[j].state; - clib_memcpy (rt->runtime_data, old_rt[j].runtime_data, - VLIB_NODE_RUNTIME_DATA_SIZE); + clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, + VLIB_NODE_RUNTIME_DATA_SIZE); } vec_free (old_rt); @@ -1286,22 +1337,6 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) VLIB_EARLY_CONFIG_FUNCTION (cpu_config, "cpu"); -#if !defined (__x86_64__) && !defined (__i386__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__) -void -__sync_fetch_and_add_8 (void) -{ - fformat (stderr, "%s called\n", __FUNCTION__); - abort (); -} - -void -__sync_add_and_fetch_8 (void) -{ - fformat (stderr, "%s called\n", __FUNCTION__); - abort (); -} -#endif - void vnet_main_fixup (vlib_fork_fixup_t which) __attribute__ ((weak)); void vnet_main_fixup (vlib_fork_fixup_t which) @@ -1347,20 +1382,23 @@ vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which) #endif void -vlib_worker_thread_barrier_sync_int (vlib_main_t * vm) +vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name) { f64 deadline; f64 now; f64 t_entry; f64 t_open; f64 t_closed; + f64 max_vector_rate; u32 count; + int i; if (vec_len (vlib_mains) < 2) return; ASSERT (vlib_get_thread_index () == 0); + vlib_worker_threads[0].barrier_caller = func_name; count = vec_len (vlib_mains) - 1; /* Record entry relative to last close */ @@ -1374,23 +1412,41 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm) return; } + /* + * Need data to decide if we're working hard enough to honor + * the barrier hold-down timer. + */ + max_vector_rate = 0.0; + for (i = 1; i < vec_len (vlib_mains); i++) + max_vector_rate = + clib_max (max_vector_rate, + vlib_last_vectors_per_main_loop_as_f64 (vlib_mains[i])); + vlib_worker_threads[0].barrier_sync_count++; /* Enforce minimum barrier open time to minimize packet loss */ ASSERT (vm->barrier_no_close_before <= (now + BARRIER_MINIMUM_OPEN_LIMIT)); - while (1) + /* + * If any worker thread seems busy, which we define + * as a vector rate above 10, we enforce the barrier hold-down timer + */ + if (max_vector_rate > 10.0) { - now = vlib_time_now (vm); - /* Barrier hold-down timer expired? */ - if (now >= vm->barrier_no_close_before) - break; - if ((vm->barrier_no_close_before - now) - > (2.0 * BARRIER_MINIMUM_OPEN_LIMIT)) + while (1) { - clib_warning ("clock change: would have waited for %.4f seconds", - (vm->barrier_no_close_before - now)); - break; + now = vlib_time_now (vm); + /* Barrier hold-down timer expired? */ + if (now >= vm->barrier_no_close_before) + break; + if ((vm->barrier_no_close_before - now) + > (2.0 * BARRIER_MINIMUM_OPEN_LIMIT)) + { + clib_warning + ("clock change: would have waited for %.4f seconds", + (vm->barrier_no_close_before - now)); + break; + } } } /* Record time of closure */ @@ -1469,14 +1525,22 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) /* Do per thread rebuilds in parallel */ refork_needed = 1; - clib_smp_atomic_add (vlib_worker_threads->node_reforks_required, - (vec_len (vlib_mains) - 1)); + clib_atomic_fetch_add (vlib_worker_threads->node_reforks_required, + (vec_len (vlib_mains) - 1)); now = vlib_time_now (vm); t_update_main = now - vm->barrier_epoch; } deadline = now + BARRIER_SYNC_TIMEOUT; + /* + * Note when we let go of the barrier. + * Workers can use this to derive a reasonably accurate + * time offset. See vlib_time_now(...) + */ + vm->time_last_barrier_release = vlib_time_now (vm); + CLIB_MEMORY_STORE_BARRIER (); + *vlib_worker_threads->wait_at_barrier = 0; while (*vlib_worker_threads->workers_at_barrier > 0) @@ -1775,6 +1839,45 @@ threads_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (threads_init); + +static clib_error_t * +show_clock_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + int i; + f64 now; + + now = vlib_time_now (vm); + + vlib_cli_output (vm, "Time now %.9f", now); + + if (vec_len (vlib_mains) == 1) + return 0; + + vlib_cli_output (vm, "Time last barrier release %.9f", + vm->time_last_barrier_release); + + for (i = 1; i < vec_len (vlib_mains); i++) + { + if (vlib_mains[i] == 0) + continue; + vlib_cli_output (vm, "Thread %d offset %.9f error %.9f", i, + vlib_mains[i]->time_offset, + vm->time_last_barrier_release - + vlib_mains[i]->time_last_barrier_release); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (f_command, static) = +{ + .path = "show clock", + .short_help = "show clock", + .function = show_clock_command_fn, +}; +/* *INDENT-ON* */ + /* * fd.io coding-style-patch-verification: ON *