2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
20 #include <vppinfra/format.h>
21 #include <vlib/vlib.h>
23 #include <vlib/threads.h>
24 #include <vlib/unix/cj.h>
28 #include <rte_config.h>
29 #include <rte_common.h>
31 #include <rte_launch.h>
32 #include <rte_lcore.h>
34 DECLARE_CJ_GLOBAL_LOG;
36 #define FRAME_QUEUE_NELTS 32
41 * Weak definitions of DPDK symbols used in this file.
42 * Needed for linking test programs without DPDK libs.
44 unsigned __thread __attribute__ ((weak)) RTE_PER_LCORE (_lcore_id);
45 struct lcore_config __attribute__ ((weak)) lcore_config[];
46 unsigned __attribute__ ((weak)) rte_socket_id ();
47 int __attribute__ ((weak)) rte_eal_remote_launch ();
55 vlib_thread_main_t vlib_thread_main;
58 os_get_cpu_number (void)
64 len = vec_len (vlib_thread_stacks);
68 /* Get any old stack address. */
71 n = ((uword) sp - (uword) vlib_thread_stacks[0])
72 >> VLIB_LOG2_THREAD_STACK_SIZE;
74 /* "processes" have their own stacks, and they always run in thread 0 */
85 len = vec_len (vlib_thread_stacks);
93 vlib_set_thread_name (char *name)
95 int pthread_setname_np (pthread_t __target_thread, const char *__name);
97 pthread_t thread = pthread_self ();
101 rv = pthread_setname_np (thread, name);
103 clib_warning ("pthread_setname_np returned %d", rv);
108 sort_registrations_by_no_clone (void *a0, void *a1)
110 vlib_thread_registration_t **tr0 = a0;
111 vlib_thread_registration_t **tr1 = a1;
113 return ((i32) ((*tr0)->no_data_structure_clone)
114 - ((i32) ((*tr1)->no_data_structure_clone)));
118 vlib_sysfs_list_to_bitmap (char *filename)
123 fp = fopen (filename, "r");
128 vec_validate (buffer, 256 - 1);
129 if (fgets ((char *) buffer, 256, fp))
132 unformat_init_string (&in, (char *) buffer,
133 strlen ((char *) buffer));
134 if (unformat (&in, "%U", unformat_bitmap_list, &r) != 1)
135 clib_warning ("unformat_bitmap_list failed");
145 /* Called early in the init sequence */
148 vlib_thread_init (vlib_main_t * vm)
150 vlib_thread_main_t *tm = &vlib_thread_main;
151 vlib_worker_thread_t *w;
152 vlib_thread_registration_t *tr;
153 u32 n_vlib_mains = 1;
158 /* get bitmaps of active cpu cores and sockets */
159 tm->cpu_core_bitmap =
160 vlib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online");
161 tm->cpu_socket_bitmap =
162 vlib_sysfs_list_to_bitmap ("/sys/devices/system/node/online");
164 avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap);
167 for (i = 0; i < tm->skip_cores; i++)
169 uword c = clib_bitmap_first_set (avail_cpu);
171 return clib_error_return (0, "no available cpus to skip");
173 avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
176 /* grab cpu for main thread */
179 tm->main_lcore = clib_bitmap_first_set (avail_cpu);
180 if (tm->main_lcore == (u8) ~ 0)
181 return clib_error_return (0, "no available cpus to be used for the"
186 if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0)
187 return clib_error_return (0, "cpu %u is not available to be used"
188 " for the main thread", tm->main_lcore);
190 avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0);
192 /* assume that there is socket 0 only if there is no data from sysfs */
193 if (!tm->cpu_socket_bitmap)
194 tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1);
196 /* pin main thread to main_lcore */
201 CPU_SET (tm->main_lcore, &cpuset);
202 pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
206 /* as many threads as stacks... */
207 vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1,
208 CLIB_CACHE_LINE_BYTES);
210 /* Preallocate thread 0 */
211 _vec_len (vlib_worker_threads) = 1;
212 w = vlib_worker_threads;
213 w->thread_mheap = clib_mem_get_heap ();
214 w->thread_stack = vlib_thread_stacks[0];
215 w->dpdk_lcore_id = -1;
216 w->lwp = syscall (SYS_gettid);
217 tm->n_vlib_mains = 1;
219 /* assign threads to cores and set n_vlib_mains */
224 vec_add1 (tm->registrations, tr);
228 vec_sort_with_function (tm->registrations, sort_registrations_by_no_clone);
230 for (i = 0; i < vec_len (tm->registrations); i++)
233 tr = tm->registrations[i];
234 tr->first_index = first_index;
235 first_index += tr->count;
236 n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0;
238 /* construct coremask */
239 if (tr->use_pthreads || !tr->count)
246 clib_bitmap_foreach (c, tr->coremask, ({
247 if (clib_bitmap_get(avail_cpu, c) == 0)
248 return clib_error_return (0, "cpu %u is not available to be used"
249 " for the '%s' thread",c, tr->name);
251 avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
258 for (j = 0; j < tr->count; j++)
260 uword c = clib_bitmap_first_set (avail_cpu);
262 return clib_error_return (0,
263 "no available cpus to be used for"
264 " the '%s' thread", tr->name);
266 avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
267 tr->coremask = clib_bitmap_set (tr->coremask, c, 1);
272 clib_bitmap_free (avail_cpu);
274 tm->n_vlib_mains = n_vlib_mains;
276 vec_validate_aligned (vlib_worker_threads, first_index - 1,
277 CLIB_CACHE_LINE_BYTES);
280 tm->efd.enabled = VLIB_EFD_DISABLED;
281 tm->efd.queue_hi_thresh = ((VLIB_EFD_DEF_WORKER_HI_THRESH_PCT *
282 FRAME_QUEUE_NELTS) / 100);
286 vlib_worker_thread_t *
287 vlib_alloc_thread (vlib_main_t * vm)
289 vlib_worker_thread_t *w;
291 if (vec_len (vlib_worker_threads) >= vec_len (vlib_thread_stacks))
293 clib_warning ("out of worker threads... Quitting...");
296 vec_add2 (vlib_worker_threads, w, 1);
297 w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
302 vlib_frame_queue_alloc (int nelts)
304 vlib_frame_queue_t *fq;
306 fq = clib_mem_alloc_aligned (sizeof (*fq), CLIB_CACHE_LINE_BYTES);
307 memset (fq, 0, sizeof (*fq));
309 fq->vector_threshold = 128; // packets
310 vec_validate_aligned (fq->elts, nelts - 1, CLIB_CACHE_LINE_BYTES);
314 if (((uword) & fq->tail) & (CLIB_CACHE_LINE_BYTES - 1))
315 fformat (stderr, "WARNING: fq->tail unaligned\n");
316 if (((uword) & fq->head) & (CLIB_CACHE_LINE_BYTES - 1))
317 fformat (stderr, "WARNING: fq->head unaligned\n");
318 if (((uword) fq->elts) & (CLIB_CACHE_LINE_BYTES - 1))
319 fformat (stderr, "WARNING: fq->elts unaligned\n");
321 if (sizeof (fq->elts[0]) % CLIB_CACHE_LINE_BYTES)
322 fformat (stderr, "WARNING: fq->elts[0] size %d\n",
323 sizeof (fq->elts[0]));
324 if (nelts & (nelts - 1))
326 fformat (stderr, "FATAL: nelts MUST be a power of 2\n");
334 void vl_msg_api_handler_no_free (void *) __attribute__ ((weak));
336 vl_msg_api_handler_no_free (void *v)
340 /* Turned off, save as reference material... */
343 vlib_frame_queue_dequeue_internal (int thread_id,
344 vlib_main_t * vm, vlib_node_main_t * nm)
346 vlib_frame_queue_t *fq = vlib_frame_queues[thread_id];
347 vlib_frame_queue_elt_t *elt;
349 vlib_pending_frame_t *p;
350 vlib_node_runtime_t *r;
351 u32 node_runtime_index;
356 ASSERT (vm == vlib_mains[thread_id]);
360 if (fq->head == fq->tail)
363 elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
368 before = clib_cpu_time_now ();
371 node_runtime_index = elt->node_runtime_index;
372 msg_type = elt->msg_type;
376 case VLIB_FRAME_QUEUE_ELT_FREE_BUFFERS:
377 vlib_buffer_free (vm, vlib_frame_vector_args (f), f->n_vectors);
378 /* note fallthrough... */
379 case VLIB_FRAME_QUEUE_ELT_FREE_FRAME:
380 r = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
382 vlib_frame_free (vm, r, f);
384 case VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME:
385 vec_add2 (vm->node_main.pending_frames, p, 1);
386 f->flags |= (VLIB_FRAME_PENDING | VLIB_FRAME_FREE_AFTER_DISPATCH);
387 p->node_runtime_index = elt->node_runtime_index;
388 p->frame_index = vlib_frame_index (vm, f);
389 p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME;
390 fq->dequeue_vectors += (u64) f->n_vectors;
392 case VLIB_FRAME_QUEUE_ELT_API_MSG:
393 vl_msg_api_handler_no_free (f);
396 clib_warning ("bogus frame queue message, type %d", msg_type);
401 fq->dequeue_ticks += clib_cpu_time_now () - before;
402 CLIB_MEMORY_BARRIER ();
411 vlib_frame_queue_dequeue (int thread_id,
412 vlib_main_t * vm, vlib_node_main_t * nm)
414 return vlib_frame_queue_dequeue_internal (thread_id, vm, nm);
418 vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
419 u32 frame_queue_index, vlib_frame_t * frame,
420 vlib_frame_queue_msg_type_t type)
422 vlib_frame_queue_t *fq = vlib_frame_queues[frame_queue_index];
423 vlib_frame_queue_elt_t *elt;
426 u64 before = clib_cpu_time_now ();
430 new_tail = __sync_add_and_fetch (&fq->tail, 1);
432 /* Wait until a ring slot is available */
433 while (new_tail >= fq->head + fq->nelts)
435 f64 b4 = vlib_time_now_ticks (vm, before);
436 vlib_worker_thread_barrier_check (vm, b4);
437 /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */
438 // vlib_frame_queue_dequeue (vm->cpu_index, vm, nm);
441 elt = fq->elts + (new_tail & (fq->nelts - 1));
443 /* this would be very bad... */
448 /* Once we enqueue the frame, frame->n_vectors is owned elsewhere... */
449 save_count = frame->n_vectors;
452 elt->node_runtime_index = node_runtime_index;
453 elt->msg_type = type;
454 CLIB_MEMORY_BARRIER ();
461 /* To be called by vlib worker threads upon startup */
463 vlib_worker_thread_init (vlib_worker_thread_t * w)
465 vlib_thread_main_t *tm = vlib_get_thread_main ();
467 /* worker threads wants no signals. */
471 pthread_sigmask (SIG_SETMASK, &s, 0);
474 clib_mem_set_heap (w->thread_mheap);
476 if (vec_len (tm->thread_prefix) && w->registration->short_name)
478 w->name = format (0, "%v_%s_%d%c", tm->thread_prefix,
479 w->registration->short_name, w->instance_id, '\0');
480 vlib_set_thread_name ((char *) w->name);
483 if (!w->registration->use_pthreads)
486 /* Initial barrier sync, for both worker and i/o threads */
487 clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
489 while (*vlib_worker_threads->wait_at_barrier)
492 clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
497 vlib_worker_thread_bootstrap_fn (void *arg)
500 vlib_worker_thread_t *w = arg;
502 w->lwp = syscall (SYS_gettid);
503 w->dpdk_lcore_id = -1;
505 if (w->registration && !w->registration->use_pthreads && rte_socket_id) /* do we really have dpdk linked */
507 unsigned lcore = rte_lcore_id ();
508 lcore = lcore < RTE_MAX_LCORE ? lcore : -1;
509 w->dpdk_lcore_id = lcore;
513 rv = (void *) clib_calljmp
514 ((uword (*)(uword)) w->thread_function,
515 (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE);
516 /* NOTREACHED, we hope */
521 vlib_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
523 void *(*fp_arg) (void *) = fp;
526 if (!w->registration->use_pthreads)
527 if (rte_eal_remote_launch) /* do we have dpdk linked */
528 return rte_eal_remote_launch (fp, (void *) w, lcore_id);
538 CPU_SET (lcore_id, &cpuset);
540 ret = pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w);
542 return pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset);
548 static clib_error_t *
549 start_workers (vlib_main_t * vm)
552 vlib_worker_thread_t *w;
553 vlib_main_t *vm_clone;
555 vlib_frame_queue_t *fq;
556 vlib_thread_main_t *tm = &vlib_thread_main;
557 vlib_thread_registration_t *tr;
558 vlib_node_runtime_t *rt;
559 u32 n_vlib_mains = tm->n_vlib_mains;
560 u32 worker_thread_index;
561 u8 *main_heap = clib_mem_get_per_cpu_heap ();
562 mheap_t *main_heap_header = mheap_header (main_heap);
564 vec_reset_length (vlib_worker_threads);
566 /* Set up the main thread */
567 vec_add2_aligned (vlib_worker_threads, w, 1, CLIB_CACHE_LINE_BYTES);
568 w->elog_track.name = "main thread";
569 elog_track_register (&vm->elog_main, &w->elog_track);
571 if (vec_len (tm->thread_prefix))
573 w->name = format (0, "%v_main%c", tm->thread_prefix, '\0');
574 vlib_set_thread_name ((char *) w->name);
578 w->dpdk_lcore_id = -1;
579 if (rte_socket_id) /* do we really have dpdk linked */
581 unsigned lcore = rte_lcore_id ();
582 w->dpdk_lcore_id = lcore < RTE_MAX_LCORE ? lcore : -1;;
587 * Truth of the matter: we always use at least two
588 * threads. So, make the main heap thread-safe
589 * and make the event log thread-safe.
591 main_heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
593 clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
594 vm->elog_main.lock[0] = 0;
596 if (n_vlib_mains > 1)
598 vec_validate (vlib_mains, tm->n_vlib_mains - 1);
599 _vec_len (vlib_mains) = 0;
600 vec_add1 (vlib_mains, vm);
602 vec_validate (vlib_frame_queues, tm->n_vlib_mains - 1);
603 _vec_len (vlib_frame_queues) = 0;
604 fq = vlib_frame_queue_alloc (FRAME_QUEUE_NELTS);
605 vec_add1 (vlib_frame_queues, fq);
607 vlib_worker_threads->wait_at_barrier =
608 clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
609 vlib_worker_threads->workers_at_barrier =
610 clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
612 /* Ask for an initial barrier sync */
613 *vlib_worker_threads->workers_at_barrier = 0;
614 *vlib_worker_threads->wait_at_barrier = 1;
616 worker_thread_index = 1;
618 for (i = 0; i < vec_len (tm->registrations); i++)
620 vlib_node_main_t *nm, *nm_clone;
621 vlib_buffer_main_t *bm_clone;
622 vlib_buffer_free_list_t *fl_clone, *fl_orig;
623 vlib_buffer_free_list_t *orig_freelist_pool;
626 tr = tm->registrations[i];
631 for (k = 0; k < tr->count; k++)
633 vec_add2 (vlib_worker_threads, w, 1);
636 mheap_alloc (0 /* use VM */ , tr->mheap_size);
638 w->thread_mheap = main_heap;
639 w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
640 w->thread_function = tr->function;
641 w->thread_function_arg = w;
643 w->registration = tr;
646 (char *) format (0, "%s %d", tr->name, k + 1);
647 vec_add1 (w->elog_track.name, 0);
648 elog_track_register (&vm->elog_main, &w->elog_track);
650 if (tr->no_data_structure_clone)
653 /* Allocate "to-worker-N" frame queue */
654 if (tr->frame_queue_nelts)
656 fq = vlib_frame_queue_alloc (tr->frame_queue_nelts);
660 fq = vlib_frame_queue_alloc (FRAME_QUEUE_NELTS);
663 vec_validate (vlib_frame_queues, worker_thread_index);
664 vlib_frame_queues[worker_thread_index] = fq;
666 /* Fork vlib_global_main et al. Look for bugs here */
667 oldheap = clib_mem_set_heap (w->thread_mheap);
669 vm_clone = clib_mem_alloc (sizeof (*vm_clone));
670 clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
672 vm_clone->cpu_index = worker_thread_index;
673 vm_clone->heap_base = w->thread_mheap;
674 vm_clone->mbuf_alloc_list = 0;
675 memset (&vm_clone->random_buffer, 0,
676 sizeof (vm_clone->random_buffer));
678 nm = &vlib_mains[0]->node_main;
679 nm_clone = &vm_clone->node_main;
680 /* fork next frames array, preserving node runtime indices */
681 nm_clone->next_frames = vec_dup (nm->next_frames);
682 for (j = 0; j < vec_len (nm_clone->next_frames); j++)
684 vlib_next_frame_t *nf = &nm_clone->next_frames[j];
685 u32 save_node_runtime_index;
688 save_node_runtime_index = nf->node_runtime_index;
689 save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
690 vlib_next_frame_init (nf);
691 nf->node_runtime_index = save_node_runtime_index;
692 nf->flags = save_flags;
695 /* fork the frame dispatch queue */
696 nm_clone->pending_frames = 0;
697 vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */
698 _vec_len (nm_clone->pending_frames) = 0;
702 for (j = 0; j < vec_len (nm->nodes); j++)
705 n = clib_mem_alloc_no_fail (sizeof (*n));
706 clib_memcpy (n, nm->nodes[j], sizeof (*n));
707 /* none of the copied nodes have enqueue rights given out */
708 n->owner_node_index = VLIB_INVALID_NODE_INDEX;
709 memset (&n->stats_total, 0, sizeof (n->stats_total));
710 memset (&n->stats_last_clear, 0,
711 sizeof (n->stats_last_clear));
712 vec_add1 (nm_clone->nodes, n);
714 nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
715 vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
717 nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
718 vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
719 vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
720 rt->cpu_index = vm_clone->cpu_index;
722 nm_clone->processes = vec_dup (nm->processes);
724 /* zap the (per worker) frame freelists, etc */
725 nm_clone->frame_sizes = 0;
726 nm_clone->frame_size_hash = 0;
728 /* Packet trace buffers are guaranteed to be empty, nothing to do here */
730 clib_mem_set_heap (oldheap);
731 vec_add1 (vlib_mains, vm_clone);
733 vm_clone->error_main.counters =
734 vec_dup (vlib_mains[0]->error_main.counters);
735 vm_clone->error_main.counters_last_clear =
736 vec_dup (vlib_mains[0]->error_main.counters_last_clear);
738 /* Fork the vlib_buffer_main_t free lists, etc. */
739 bm_clone = vec_dup (vm_clone->buffer_main);
740 vm_clone->buffer_main = bm_clone;
742 orig_freelist_pool = bm_clone->buffer_free_list_pool;
743 bm_clone->buffer_free_list_pool = 0;
746 pool_foreach (fl_orig, orig_freelist_pool,
748 pool_get_aligned (bm_clone->buffer_free_list_pool,
749 fl_clone, CLIB_CACHE_LINE_BYTES);
750 ASSERT (fl_orig - orig_freelist_pool
751 == fl_clone - bm_clone->buffer_free_list_pool);
753 fl_clone[0] = fl_orig[0];
754 fl_clone->aligned_buffers = 0;
755 fl_clone->unaligned_buffers = 0;
756 fl_clone->n_alloc = 0;
760 worker_thread_index++;
766 /* only have non-data-structure copy threads to create... */
767 for (i = 0; i < vec_len (tm->registrations); i++)
769 tr = tm->registrations[i];
771 for (j = 0; j < tr->count; j++)
773 vec_add2 (vlib_worker_threads, w, 1);
776 mheap_alloc (0 /* use VM */ , tr->mheap_size);
778 w->thread_mheap = main_heap;
779 w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
780 w->thread_function = tr->function;
781 w->thread_function_arg = w;
784 (char *) format (0, "%s %d", tr->name, j + 1);
785 w->registration = tr;
786 vec_add1 (w->elog_track.name, 0);
787 elog_track_register (&vm->elog_main, &w->elog_track);
792 worker_thread_index = 1;
794 for (i = 0; i < vec_len (tm->registrations); i++)
798 tr = tm->registrations[i];
800 if (tr->use_pthreads || tm->use_pthreads)
802 for (j = 0; j < tr->count; j++)
804 w = vlib_worker_threads + worker_thread_index++;
805 if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, 0) <
807 clib_warning ("Couldn't start '%s' pthread ", tr->name);
814 clib_bitmap_foreach (c, tr->coremask, ({
815 w = vlib_worker_threads + worker_thread_index++;
816 if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, c) < 0)
817 clib_warning ("Couldn't start DPDK lcore %d", c);
823 vlib_worker_thread_barrier_sync (vm);
824 vlib_worker_thread_barrier_release (vm);
828 VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
831 vlib_worker_thread_node_runtime_update (void)
834 vlib_worker_thread_t *w;
836 vlib_node_main_t *nm, *nm_clone;
837 vlib_node_t **old_nodes_clone;
838 vlib_main_t *vm_clone;
839 vlib_node_runtime_t *rt, *old_rt;
842 vlib_node_runtime_sync_stats (vlib_main_t * vm,
843 vlib_node_runtime_t * r,
845 uword n_vectors, uword n_clocks);
847 ASSERT (os_get_cpu_number () == 0);
849 if (vec_len (vlib_mains) == 0)
855 ASSERT (os_get_cpu_number () == 0);
856 ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
859 * Scrape all runtime stats, so we don't lose node runtime(s) with
860 * pending counts, or throw away worker / io thread counts.
862 for (j = 0; j < vec_len (nm->nodes); j++)
866 vlib_node_sync_stats (vm, n);
869 for (i = 1; i < vec_len (vlib_mains); i++)
873 vm_clone = vlib_mains[i];
874 nm_clone = &vm_clone->node_main;
876 for (j = 0; j < vec_len (nm_clone->nodes); j++)
878 n = nm_clone->nodes[j];
880 rt = vlib_node_get_runtime (vm_clone, n->index);
881 vlib_node_runtime_sync_stats (vm_clone, rt, 0, 0, 0);
885 for (i = 1; i < vec_len (vlib_mains); i++)
887 vlib_node_runtime_t *rt;
888 w = vlib_worker_threads + i;
889 oldheap = clib_mem_set_heap (w->thread_mheap);
891 vm_clone = vlib_mains[i];
893 /* Re-clone error heap */
894 u64 *old_counters = vm_clone->error_main.counters;
895 u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear;
896 clib_memcpy (&vm_clone->error_main, &vm->error_main,
897 sizeof (vm->error_main));
898 j = vec_len (vm->error_main.counters) - 1;
899 vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES);
900 vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
901 vm_clone->error_main.counters = old_counters;
902 vm_clone->error_main.counters_last_clear = old_counters_all_clear;
904 nm_clone = &vm_clone->node_main;
905 vec_free (nm_clone->next_frames);
906 nm_clone->next_frames = vec_dup (nm->next_frames);
908 for (j = 0; j < vec_len (nm_clone->next_frames); j++)
910 vlib_next_frame_t *nf = &nm_clone->next_frames[j];
911 u32 save_node_runtime_index;
914 save_node_runtime_index = nf->node_runtime_index;
915 save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
916 vlib_next_frame_init (nf);
917 nf->node_runtime_index = save_node_runtime_index;
918 nf->flags = save_flags;
921 old_nodes_clone = nm_clone->nodes;
925 for (j = 0; j < vec_len (nm->nodes); j++)
927 vlib_node_t *old_n_clone;
928 vlib_node_t *new_n, *new_n_clone;
930 new_n = nm->nodes[j];
931 old_n_clone = old_nodes_clone[j];
933 new_n_clone = clib_mem_alloc_no_fail (sizeof (*new_n_clone));
934 clib_memcpy (new_n_clone, new_n, sizeof (*new_n));
935 /* none of the copied nodes have enqueue rights given out */
936 new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX;
938 if (j >= vec_len (old_nodes_clone))
940 /* new node, set to zero */
941 memset (&new_n_clone->stats_total, 0,
942 sizeof (new_n_clone->stats_total));
943 memset (&new_n_clone->stats_last_clear, 0,
944 sizeof (new_n_clone->stats_last_clear));
948 /* Copy stats if the old data is valid */
949 clib_memcpy (&new_n_clone->stats_total,
950 &old_n_clone->stats_total,
951 sizeof (new_n_clone->stats_total));
952 clib_memcpy (&new_n_clone->stats_last_clear,
953 &old_n_clone->stats_last_clear,
954 sizeof (new_n_clone->stats_last_clear));
956 /* keep previous node state */
957 new_n_clone->state = old_n_clone->state;
959 vec_add1 (nm_clone->nodes, new_n_clone);
961 /* Free the old node clone */
962 for (j = 0; j < vec_len (old_nodes_clone); j++)
963 clib_mem_free (old_nodes_clone[j]);
964 vec_free (old_nodes_clone);
966 vec_free (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
968 nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
969 vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
971 /* clone input node runtime */
972 old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
974 nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
975 vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
977 vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
979 rt->cpu_index = vm_clone->cpu_index;
982 for (j = 0; j < vec_len (old_rt); j++)
984 rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
985 rt->state = old_rt[j].state;
990 nm_clone->processes = vec_dup (nm->processes);
992 clib_mem_set_heap (oldheap);
994 // vnet_main_fork_fixup (i);
998 static clib_error_t *
999 cpu_config (vlib_main_t * vm, unformat_input_t * input)
1001 vlib_thread_registration_t *tr;
1003 vlib_thread_main_t *tm = &vlib_thread_main;
1009 tm->thread_registrations_by_name = hash_create_string (0, sizeof (uword));
1010 tm->n_thread_stacks = 1; /* account for main thread */
1016 hash_set_mem (tm->thread_registrations_by_name, tr->name, (uword) tr);
1020 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1022 if (unformat (input, "use-pthreads"))
1023 tm->use_pthreads = 1;
1024 else if (unformat (input, "thread-prefix %v", &tm->thread_prefix))
1026 else if (unformat (input, "main-core %u", &tm->main_lcore))
1028 else if (unformat (input, "skip-cores %u", &tm->skip_cores))
1030 else if (unformat (input, "coremask-%s %llx", &name, &coremask))
1032 p = hash_get_mem (tm->thread_registrations_by_name, name);
1034 return clib_error_return (0, "no such thread type '%s'", name);
1036 tr = (vlib_thread_registration_t *) p[0];
1038 if (tr->use_pthreads)
1039 return clib_error_return (0,
1040 "coremask cannot be set for '%s' threads",
1043 tr->coremask = clib_bitmap_set_multiple
1044 (tr->coremask, 0, coremask, BITS (coremask));
1045 tr->count = clib_bitmap_count_set_bits (tr->coremask);
1047 else if (unformat (input, "corelist-%s %U", &name, unformat_bitmap_list,
1050 p = hash_get_mem (tm->thread_registrations_by_name, name);
1052 return clib_error_return (0, "no such thread type '%s'", name);
1054 tr = (vlib_thread_registration_t *) p[0];
1056 if (tr->use_pthreads)
1057 return clib_error_return (0,
1058 "corelist cannot be set for '%s' threads",
1061 tr->coremask = bitmap;
1062 tr->count = clib_bitmap_count_set_bits (tr->coremask);
1064 else if (unformat (input, "%s %u", &name, &count))
1066 p = hash_get_mem (tm->thread_registrations_by_name, name);
1068 return clib_error_return (0, "no such thread type '%s'", name);
1070 tr = (vlib_thread_registration_t *) p[0];
1071 if (tr->fixed_count)
1072 return clib_error_return
1073 (0, "number of %s threads not configurable", tr->name);
1082 if (!tm->thread_prefix)
1083 tm->thread_prefix = format (0, "vpp");
1087 tm->n_thread_stacks += tr->count;
1088 tm->n_pthreads += tr->count * tr->use_pthreads;
1089 tm->n_eal_threads += tr->count * (tr->use_pthreads == 0);
1096 VLIB_EARLY_CONFIG_FUNCTION (cpu_config, "cpu");
1098 #if !defined (__x86_64__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__)
1100 __sync_fetch_and_add_8 (void)
1102 fformat (stderr, "%s called\n", __FUNCTION__);
1107 __sync_add_and_fetch_8 (void)
1109 fformat (stderr, "%s called\n", __FUNCTION__);
1114 void vnet_main_fixup (vlib_fork_fixup_t which) __attribute__ ((weak));
1116 vnet_main_fixup (vlib_fork_fixup_t which)
1121 vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
1123 vlib_main_t *vm = vlib_get_main ();
1125 if (vlib_mains == 0)
1128 ASSERT (os_get_cpu_number () == 0);
1129 vlib_worker_thread_barrier_sync (vm);
1133 case VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX:
1134 vnet_main_fixup (VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX);
1140 vlib_worker_thread_barrier_release (vm);
1144 vlib_worker_thread_barrier_sync (vlib_main_t * vm)
1152 count = vec_len (vlib_mains) - 1;
1154 /* Tolerate recursive calls */
1155 if (++vlib_worker_threads[0].recursion_level > 1)
1158 vlib_worker_threads[0].barrier_sync_count++;
1160 ASSERT (os_get_cpu_number () == 0);
1162 deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
1164 *vlib_worker_threads->wait_at_barrier = 1;
1165 while (*vlib_worker_threads->workers_at_barrier != count)
1167 if (vlib_time_now (vm) > deadline)
1169 fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
1176 vlib_worker_thread_barrier_release (vlib_main_t * vm)
1183 if (--vlib_worker_threads[0].recursion_level > 0)
1186 deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
1188 *vlib_worker_threads->wait_at_barrier = 0;
1190 while (*vlib_worker_threads->workers_at_barrier > 0)
1192 if (vlib_time_now (vm) > deadline)
1194 fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
1201 * Check the frame queue to see if any frames are available.
1202 * If so, pull the packets off the frames and put them to
1206 vlib_frame_queue_dequeue_internal (vlib_main_t * vm)
1208 u32 thread_id = vm->cpu_index;
1209 vlib_frame_queue_t *fq = vlib_frame_queues[thread_id];
1210 vlib_frame_queue_elt_t *elt;
1217 vlib_thread_main_t *tm = vlib_get_thread_main ();
1220 ASSERT (vm == vlib_mains[thread_id]);
1222 if (PREDICT_FALSE (tm->handoff_dispatch_node_index == ~0))
1225 * Gather trace data for frame queues
1227 if (PREDICT_FALSE (fq->trace))
1229 frame_queue_trace_t *fqt;
1230 frame_queue_nelt_counter_t *fqh;
1233 fqt = &tm->frame_queue_traces[thread_id];
1235 fqt->nelts = fq->nelts;
1236 fqt->head = fq->head;
1237 fqt->head_hint = fq->head_hint;
1238 fqt->tail = fq->tail;
1239 fqt->threshold = fq->vector_threshold;
1240 fqt->n_in_use = fqt->tail - fqt->head;
1241 if (fqt->n_in_use >= fqt->nelts)
1243 // if beyond max then use max
1244 fqt->n_in_use = fqt->nelts - 1;
1247 /* Record the number of elements in use in the histogram */
1248 fqh = &tm->frame_queue_histogram[thread_id];
1249 fqh->count[fqt->n_in_use]++;
1251 /* Record a snapshot of the elements in use */
1252 for (elix = 0; elix < fqt->nelts; elix++)
1254 elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1));
1255 if (1 || elt->valid)
1257 fqt->n_vectors[elix] = elt->n_vectors;
1265 if (fq->head == fq->tail)
1267 fq->head_hint = fq->head;
1271 elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
1275 fq->head_hint = fq->head;
1279 from = elt->buffer_index;
1280 msg_type = elt->msg_type;
1282 ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME);
1283 ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
1285 f = vlib_get_frame_to_node (vm, tm->handoff_dispatch_node_index);
1287 to = vlib_frame_vector_args (f);
1289 n_left_to_node = elt->n_vectors;
1291 while (n_left_to_node >= 4)
1299 n_left_to_node -= 4;
1302 while (n_left_to_node > 0)
1310 vectors += elt->n_vectors;
1311 f->n_vectors = elt->n_vectors;
1312 vlib_put_frame_to_node (vm, tm->handoff_dispatch_node_index, f);
1316 elt->msg_type = 0xfefefefe;
1317 CLIB_MEMORY_BARRIER ();
1322 * Limit the number of packets pushed into the graph
1324 if (vectors >= fq->vector_threshold)
1326 fq->head_hint = fq->head;
1334 static_always_inline void
1335 vlib_worker_thread_internal (vlib_main_t * vm)
1337 vlib_node_main_t *nm = &vm->node_main;
1338 u64 cpu_time_now = clib_cpu_time_now ();
1342 vlib_worker_thread_barrier_check ();
1344 vlib_frame_queue_dequeue_internal (vm);
1346 vlib_node_runtime_t *n;
1347 vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
1349 cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
1350 VLIB_NODE_STATE_POLLING, /* frame */ 0,
1354 if (_vec_len (nm->pending_frames))
1357 cpu_time_now = clib_cpu_time_now ();
1358 for (i = 0; i < _vec_len (nm->pending_frames); i++)
1360 vlib_pending_frame_t *p;
1362 p = nm->pending_frames + i;
1364 cpu_time_now = dispatch_pending_node (vm, p, cpu_time_now);
1366 _vec_len (nm->pending_frames) = 0;
1368 vlib_increment_main_loop_counter (vm);
1370 /* Record time stamp in case there are no enabled nodes and above
1371 calls do not update time stamp. */
1372 cpu_time_now = clib_cpu_time_now ();
1377 vlib_worker_thread_fn (void *arg)
1379 vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
1380 vlib_thread_main_t *tm = vlib_get_thread_main ();
1381 vlib_main_t *vm = vlib_get_main ();
1383 ASSERT (vm->cpu_index == os_get_cpu_number ());
1385 vlib_worker_thread_init (w);
1386 clib_time_init (&vm->clib_time);
1387 clib_mem_set_heap (w->thread_mheap);
1389 /* Wait until the dpdk init sequence is complete */
1390 while (tm->worker_thread_release == 0)
1391 vlib_worker_thread_barrier_check ();
1393 vlib_worker_thread_internal (vm);
1397 VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
1400 .function = vlib_worker_thread_fn,
1405 threads_init (vlib_main_t * vm)
1407 vlib_thread_main_t *tm = vlib_get_thread_main ();
1409 tm->handoff_dispatch_node_index = ~0;
1414 VLIB_INIT_FUNCTION (threads_init);
1417 * fd.io coding-style-patch-verification: ON
1420 * eval: (c-set-style "gnu")