vlib_thread_main_t vlib_thread_main;
uword
-os_get_cpu_number (void)
-{
- void *sp;
- uword n;
- u32 len;
-
- len = vec_len (vlib_thread_stacks);
- if (len == 0)
- return 0;
-
- /* Get any old stack address. */
- sp = &sp;
-
- n = ((uword) sp - (uword) vlib_thread_stacks[0])
- >> VLIB_LOG2_THREAD_STACK_SIZE;
-
- /* "processes" have their own stacks, and they always run in thread 0 */
- n = n >= len ? 0 : n;
-
- return n;
-}
-
-uword
-os_get_ncpus (void)
+os_get_nthreads (void)
{
u32 len;
return 0;
}
-vlib_worker_thread_t *
-vlib_alloc_thread (vlib_main_t * vm)
-{
- vlib_worker_thread_t *w;
-
- if (vec_len (vlib_worker_threads) >= vec_len (vlib_thread_stacks))
- {
- clib_warning ("out of worker threads... Quitting...");
- exit (1);
- }
- vec_add2 (vlib_worker_threads, w, 1);
- w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
- return w;
-}
-
vlib_frame_queue_t *
vlib_frame_queue_alloc (int nelts)
{
f64 b4 = vlib_time_now_ticks (vm, before);
vlib_worker_thread_barrier_check (vm, b4);
/* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */
- // vlib_frame_queue_dequeue (vm->cpu_index, vm, nm);
+ // vlib_frame_queue_dequeue (vm->thread_index, vm, nm);
}
elt = fq->elts + (new_tail & (fq->nelts - 1));
w->lwp = syscall (SYS_gettid);
w->thread_id = pthread_self ();
+ __os_thread_index = w - vlib_worker_threads;
+
rv = (void *) clib_calljmp
((uword (*)(uword)) w->thread_function,
(uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE);
vlib_worker_threads->workers_at_barrier =
clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+ vlib_worker_threads->node_reforks_required =
+ clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+
/* Ask for an initial barrier sync */
*vlib_worker_threads->workers_at_barrier = 0;
*vlib_worker_threads->wait_at_barrier = 1;
+ /* Without update or refork */
+ *vlib_worker_threads->node_reforks_required = 0;
+ vm->need_vlib_worker_thread_node_runtime_update = 0;
+
worker_thread_index = 1;
for (i = 0; i < vec_len (tm->registrations); i++)
for (k = 0; k < tr->count; k++)
{
+ vlib_node_t *n;
+
vec_add2 (vlib_worker_threads, w, 1);
if (tr->mheap_size)
w->thread_mheap =
mheap_alloc (0 /* use VM */ , tr->mheap_size);
else
w->thread_mheap = main_heap;
- w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+
+ w->thread_stack =
+ vlib_thread_stack_init (w - vlib_worker_threads);
w->thread_function = tr->function;
w->thread_function_arg = w;
w->instance_id = k;
vm_clone = clib_mem_alloc (sizeof (*vm_clone));
clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
- vm_clone->cpu_index = worker_thread_index;
+ vm_clone->thread_index = worker_thread_index;
vm_clone->heap_base = w->thread_mheap;
vm_clone->mbuf_alloc_list = 0;
vm_clone->init_functions_called =
/* fork nodes */
nm_clone->nodes = 0;
+
+ /* Allocate all nodes in single block for speed */
+ n = clib_mem_alloc_no_fail (vec_len (nm->nodes) * sizeof (*n));
+
for (j = 0; j < vec_len (nm->nodes); j++)
{
- vlib_node_t *n;
- n = clib_mem_alloc_no_fail (sizeof (*n));
clib_memcpy (n, nm->nodes[j], sizeof (*n));
/* none of the copied nodes have enqueue rights given out */
n->owner_node_index = VLIB_INVALID_NODE_INDEX;
memset (&n->stats_last_clear, 0,
sizeof (n->stats_last_clear));
vec_add1 (nm_clone->nodes, n);
+ n++;
}
nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
- vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ vec_foreach (rt,
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->cpu_index = vm_clone->cpu_index;
+ rt->thread_index = vm_clone->thread_index;
/* copy initial runtime_data from node */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy (rt->runtime_data, n->runtime_data,
clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
n->runtime_data_bytes));
- else if (CLIB_DEBUG > 0)
- memset (rt->runtime_data, 0xfe,
- VLIB_NODE_RUNTIME_DATA_SIZE);
}
nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->cpu_index = vm_clone->cpu_index;
+ rt->thread_index = vm_clone->thread_index;
/* copy initial runtime_data from node */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy (rt->runtime_data, n->runtime_data,
clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
n->runtime_data_bytes));
- else if (CLIB_DEBUG > 0)
- memset (rt->runtime_data, 0xfe,
- VLIB_NODE_RUNTIME_DATA_SIZE);
}
nm_clone->processes = vec_dup (nm->processes);
/* zap the (per worker) frame freelists, etc */
nm_clone->frame_sizes = 0;
- nm_clone->frame_size_hash = 0;
+ nm_clone->frame_size_hash = hash_create (0, sizeof (uword));
/* Packet trace buffers are guaranteed to be empty, nothing to do here */
mheap_alloc (0 /* use VM */ , tr->mheap_size);
else
w->thread_mheap = main_heap;
- w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+ w->thread_stack =
+ vlib_thread_stack_init (w - vlib_worker_threads);
w->thread_function = tr->function;
w->thread_function_arg = w;
w->instance_id = j;
VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
-void
-vlib_worker_thread_node_runtime_update (void)
+static inline void
+worker_thread_node_runtime_update_internal (void)
{
int i, j;
- vlib_worker_thread_t *w;
vlib_main_t *vm;
vlib_node_main_t *nm, *nm_clone;
- vlib_node_t **old_nodes_clone;
vlib_main_t *vm_clone;
- vlib_node_runtime_t *rt, *old_rt;
- void *oldheap;
+ vlib_node_runtime_t *rt;
never_inline void
vlib_node_runtime_sync_stats (vlib_main_t * vm,
vlib_node_runtime_t * r,
uword n_calls,
uword n_vectors, uword n_clocks);
- ASSERT (os_get_cpu_number () == 0);
-
- if (vec_len (vlib_mains) == 1)
- return;
+ ASSERT (vlib_get_thread_index () == 0);
vm = vlib_mains[0];
nm = &vm->node_main;
- ASSERT (os_get_cpu_number () == 0);
ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
/*
}
}
- for (i = 1; i < vec_len (vlib_mains); i++)
- {
- vlib_node_runtime_t *rt;
- w = vlib_worker_threads + i;
- oldheap = clib_mem_set_heap (w->thread_mheap);
+ /* Per-worker clone rebuilds are now done on each thread */
+}
- vm_clone = vlib_mains[i];
- /* Re-clone error heap */
- u64 *old_counters = vm_clone->error_main.counters;
- u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear;
- clib_memcpy (&vm_clone->error_main, &vm->error_main,
- sizeof (vm->error_main));
- j = vec_len (vm->error_main.counters) - 1;
- vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
- vm_clone->error_main.counters = old_counters;
- vm_clone->error_main.counters_last_clear = old_counters_all_clear;
+void
+vlib_worker_thread_node_refork (void)
+{
+ vlib_main_t *vm, *vm_clone;
+ vlib_node_main_t *nm, *nm_clone;
+ vlib_node_t **old_nodes_clone;
+ vlib_node_runtime_t *rt, *old_rt;
- nm_clone = &vm_clone->node_main;
- vec_free (nm_clone->next_frames);
- nm_clone->next_frames = vec_dup (nm->next_frames);
+ vlib_node_t *new_n_clone;
- for (j = 0; j < vec_len (nm_clone->next_frames); j++)
- {
- vlib_next_frame_t *nf = &nm_clone->next_frames[j];
- u32 save_node_runtime_index;
- u32 save_flags;
-
- save_node_runtime_index = nf->node_runtime_index;
- save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
- vlib_next_frame_init (nf);
- nf->node_runtime_index = save_node_runtime_index;
- nf->flags = save_flags;
- }
+ int j;
+
+ vm = vlib_mains[0];
+ nm = &vm->node_main;
+ vm_clone = vlib_get_main ();
+ nm_clone = &vm_clone->node_main;
+
+ /* Re-clone error heap */
+ u64 *old_counters = vm_clone->error_main.counters;
+ u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear;
+
+ clib_memcpy (&vm_clone->error_main, &vm->error_main,
+ sizeof (vm->error_main));
+ j = vec_len (vm->error_main.counters) - 1;
+ vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
+ vm_clone->error_main.counters = old_counters;
+ vm_clone->error_main.counters_last_clear = old_counters_all_clear;
+
+ nm_clone = &vm_clone->node_main;
+ vec_free (nm_clone->next_frames);
+ nm_clone->next_frames = vec_dup (nm->next_frames);
+
+ for (j = 0; j < vec_len (nm_clone->next_frames); j++)
+ {
+ vlib_next_frame_t *nf = &nm_clone->next_frames[j];
+ u32 save_node_runtime_index;
+ u32 save_flags;
+
+ save_node_runtime_index = nf->node_runtime_index;
+ save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
+ vlib_next_frame_init (nf);
+ nf->node_runtime_index = save_node_runtime_index;
+ nf->flags = save_flags;
+ }
- old_nodes_clone = nm_clone->nodes;
- nm_clone->nodes = 0;
+ old_nodes_clone = nm_clone->nodes;
+ nm_clone->nodes = 0;
- /* re-fork nodes */
- for (j = 0; j < vec_len (nm->nodes); j++)
- {
- vlib_node_t *old_n_clone;
- vlib_node_t *new_n, *new_n_clone;
+ /* re-fork nodes */
- new_n = nm->nodes[j];
- old_n_clone = old_nodes_clone[j];
+ /* Allocate all nodes in single block for speed */
+ new_n_clone =
+ clib_mem_alloc_no_fail (vec_len (nm->nodes) * sizeof (*new_n_clone));
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ vlib_node_t *old_n_clone;
+ vlib_node_t *new_n;
- new_n_clone = clib_mem_alloc_no_fail (sizeof (*new_n_clone));
- clib_memcpy (new_n_clone, new_n, sizeof (*new_n));
- /* none of the copied nodes have enqueue rights given out */
- new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX;
+ new_n = nm->nodes[j];
+ old_n_clone = old_nodes_clone[j];
- if (j >= vec_len (old_nodes_clone))
- {
- /* new node, set to zero */
- memset (&new_n_clone->stats_total, 0,
- sizeof (new_n_clone->stats_total));
- memset (&new_n_clone->stats_last_clear, 0,
- sizeof (new_n_clone->stats_last_clear));
- }
- else
- {
- /* Copy stats if the old data is valid */
- clib_memcpy (&new_n_clone->stats_total,
- &old_n_clone->stats_total,
- sizeof (new_n_clone->stats_total));
- clib_memcpy (&new_n_clone->stats_last_clear,
- &old_n_clone->stats_last_clear,
- sizeof (new_n_clone->stats_last_clear));
-
- /* keep previous node state */
- new_n_clone->state = old_n_clone->state;
- }
- vec_add1 (nm_clone->nodes, new_n_clone);
- }
- /* Free the old node clone */
- for (j = 0; j < vec_len (old_nodes_clone); j++)
- clib_mem_free (old_nodes_clone[j]);
- vec_free (old_nodes_clone);
-
-
- /* re-clone internal nodes */
- old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL];
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
- vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
-
- vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
- {
- vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->cpu_index = vm_clone->cpu_index;
- /* copy runtime_data, will be overwritten later for existing rt */
- if (n->runtime_data && n->runtime_data_bytes > 0)
- clib_memcpy (rt->runtime_data, n->runtime_data,
- clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
- n->runtime_data_bytes));
- }
-
- for (j = 0; j < vec_len (old_rt); j++)
+ clib_memcpy (new_n_clone, new_n, sizeof (*new_n));
+ /* none of the copied nodes have enqueue rights given out */
+ new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX;
+
+ if (j >= vec_len (old_nodes_clone))
{
- rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
- rt->state = old_rt[j].state;
- clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
- VLIB_NODE_RUNTIME_DATA_SIZE);
+ /* new node, set to zero */
+ memset (&new_n_clone->stats_total, 0,
+ sizeof (new_n_clone->stats_total));
+ memset (&new_n_clone->stats_last_clear, 0,
+ sizeof (new_n_clone->stats_last_clear));
}
-
- vec_free (old_rt);
-
- /* re-clone input nodes */
- old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
- vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
-
- vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
- {
- vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->cpu_index = vm_clone->cpu_index;
- /* copy runtime_data, will be overwritten later for existing rt */
- if (n->runtime_data && n->runtime_data_bytes > 0)
- clib_memcpy (rt->runtime_data, n->runtime_data,
- clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
- n->runtime_data_bytes));
- }
-
- for (j = 0; j < vec_len (old_rt); j++)
+ else
{
- rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
- rt->state = old_rt[j].state;
- clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
- VLIB_NODE_RUNTIME_DATA_SIZE);
+ /* Copy stats if the old data is valid */
+ clib_memcpy (&new_n_clone->stats_total,
+ &old_n_clone->stats_total,
+ sizeof (new_n_clone->stats_total));
+ clib_memcpy (&new_n_clone->stats_last_clear,
+ &old_n_clone->stats_last_clear,
+ sizeof (new_n_clone->stats_last_clear));
+
+ /* keep previous node state */
+ new_n_clone->state = old_n_clone->state;
}
+ vec_add1 (nm_clone->nodes, new_n_clone);
+ new_n_clone++;
+ }
+ /* Free the old node clones */
+ clib_mem_free (old_nodes_clone[0]);
- vec_free (old_rt);
+ vec_free (old_nodes_clone);
- nm_clone->processes = vec_dup (nm->processes);
- clib_mem_set_heap (oldheap);
+ /* re-clone internal nodes */
+ old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL];
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
- // vnet_main_fork_fixup (i);
+ vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy runtime_data, will be overwritten later for existing rt */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
+ for (j = 0; j < vec_len (old_rt); j++)
+ {
+ rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+ rt->state = old_rt[j].state;
+ clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
+ VLIB_NODE_RUNTIME_DATA_SIZE);
+ }
+
+ vec_free (old_rt);
+
+ /* re-clone input nodes */
+ old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+
+ vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ rt->thread_index = vm_clone->thread_index;
+ /* copy runtime_data, will be overwritten later for existing rt */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+
+ for (j = 0; j < vec_len (old_rt); j++)
+ {
+ rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+ rt->state = old_rt[j].state;
+ clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
+ VLIB_NODE_RUNTIME_DATA_SIZE);
}
+
+ vec_free (old_rt);
+
+ nm_clone->processes = vec_dup (nm->processes);
+}
+
+
+void
+vlib_worker_thread_node_runtime_update (void)
+{
+ /*
+ * Make a note that we need to do a node runtime update
+ * prior to releasing the barrier.
+ */
+ vlib_global_main.need_vlib_worker_thread_node_runtime_update = 1;
}
u32
VLIB_EARLY_CONFIG_FUNCTION (cpu_config, "cpu");
-#if !defined (__x86_64__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__)
+#if !defined (__x86_64__) && !defined (__i386__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__)
void
__sync_fetch_and_add_8 (void)
{
if (vlib_mains == 0)
return;
- ASSERT (os_get_cpu_number () == 0);
+ ASSERT (vlib_get_thread_index () == 0);
vlib_worker_thread_barrier_sync (vm);
switch (which)
if (vec_len (vlib_mains) < 2)
return;
+ ASSERT (vlib_get_thread_index () == 0);
+
count = vec_len (vlib_mains) - 1;
/* Tolerate recursive calls */
vlib_worker_threads[0].barrier_sync_count++;
- ASSERT (os_get_cpu_number () == 0);
-
deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
*vlib_worker_threads->wait_at_barrier = 1;
vlib_worker_thread_barrier_release (vlib_main_t * vm)
{
f64 deadline;
+ int refork_needed = 0;
if (vec_len (vlib_mains) < 2)
return;
+ ASSERT (vlib_get_thread_index () == 0);
+
if (--vlib_worker_threads[0].recursion_level > 0)
return;
+ /* Update (all) node runtimes before releasing the barrier, if needed */
+ if (vm->need_vlib_worker_thread_node_runtime_update)
+ {
+ /* Do stats elements on main thread */
+ worker_thread_node_runtime_update_internal ();
+ vm->need_vlib_worker_thread_node_runtime_update = 0;
+
+ /* Do per thread rebuilds in parallel */
+ refork_needed = 1;
+ clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
+ (vec_len (vlib_mains) - 1));
+ }
+
deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
*vlib_worker_threads->wait_at_barrier = 0;
os_panic ();
}
}
+
+ /* Wait for reforks before continuing */
+ if (refork_needed)
+ {
+ deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+
+ while (*vlib_worker_threads->node_reforks_required > 0)
+ {
+ if (vlib_time_now (vm) > deadline)
+ {
+ fformat (stderr, "%s: worker thread refork deadlock\n",
+ __FUNCTION__);
+ os_panic ();
+ }
+ }
+ }
}
/*
int
vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm)
{
- u32 thread_id = vm->cpu_index;
+ u32 thread_id = vm->thread_index;
vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
vlib_frame_queue_elt_t *elt;
u32 *from, *to;
vlib_main_t *vm = vlib_get_main ();
clib_error_t *e;
- ASSERT (vm->cpu_index == os_get_cpu_number ());
+ ASSERT (vm->thread_index == vlib_get_thread_index ());
vlib_worker_thread_init (w);
clib_time_init (&vm->clib_time);
clib_mem_set_heap (w->thread_mheap);
+ /* Wait until the dpdk init sequence is complete */
+ while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
+ vlib_worker_thread_barrier_check ();
+
e = vlib_call_init_exit_functions
(vm, vm->worker_init_function_registrations, 1 /* call_once */ );
if (e)
clib_error_report (e);
- /* Wait until the dpdk init sequence is complete */
- while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
- vlib_worker_thread_barrier_check ();
-
vlib_worker_loop (vm);
}