X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fthreads.h;h=c1188cea9338a67422c9a56d0b2c0f38338cc744;hb=a690fdbfe179e0ea65818c03b52535bf9210efd0;hp=34ab5be8650dc8ed1830e10be940f0616279b721;hpb=7cd468a3d7dee7d6c92f69a0bb7061ae208ec727;p=vpp.git diff --git a/src/vlib/threads.h b/src/vlib/threads.h index 34ab5be8650..c1188cea933 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -62,7 +62,7 @@ typedef struct vlib_thread_registration_ #define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1) /* 0x3f, max */ #define VLIB_OFFSET_MASK (~VLIB_CPU_MASK) -#define VLIB_LOG2_THREAD_STACK_SIZE (20) +#define VLIB_LOG2_THREAD_STACK_SIZE (21) #define VLIB_THREAD_STACK_SIZE (1< 0) { - if (os_get_cpu_number ()) + if (vlib_get_thread_index ()) fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__); } } @@ -210,42 +233,27 @@ typedef enum void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which); -static inline void -vlib_worker_thread_barrier_check (void) -{ - if (PREDICT_FALSE (*vlib_worker_threads->wait_at_barrier)) - { - clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1); - while (*vlib_worker_threads->wait_at_barrier) - ; - clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1); - } -} - -#define foreach_vlib_main(body) \ -do { \ - vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \ - int ii; \ - \ - if (vec_len (vlib_mains) == 0) \ - vec_add1 (__vlib_mains, &vlib_global_main); \ - else \ - { \ - for (ii = 0; ii < vec_len (vlib_mains); ii++) \ - { \ - this_vlib_main = vlib_mains[ii]; \ - if (this_vlib_main) \ - vec_add1 (__vlib_mains, this_vlib_main); \ - } \ - } \ - \ - for (ii = 0; ii < vec_len (__vlib_mains); ii++) \ - { \ - this_vlib_main = __vlib_mains[ii]; \ - /* body uses this_vlib_main... */ \ - (body); \ - } \ - vec_free (__vlib_mains); \ +#define foreach_vlib_main(body) \ +do { \ + vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \ + int ii; \ + \ + for (ii = 0; ii < vec_len (vlib_mains); ii++) \ + { \ + this_vlib_main = vlib_mains[ii]; \ + ASSERT (ii == 0 || \ + this_vlib_main->parked_at_barrier == 1); \ + if (this_vlib_main) \ + vec_add1 (__vlib_mains, this_vlib_main); \ + } \ + \ + for (ii = 0; ii < vec_len (__vlib_mains); ii++) \ + { \ + this_vlib_main = __vlib_mains[ii]; \ + /* body uses this_vlib_main... */ \ + (body); \ + } \ + vec_free (__vlib_mains); \ } while (0); #define foreach_sched_policy \ @@ -263,6 +271,13 @@ typedef enum SCHED_POLICY_N, } sched_policy_t; +typedef struct +{ + clib_error_t *(*vlib_launch_thread_cb) (void *fp, vlib_worker_thread_t * w, + unsigned cpu_id); + clib_error_t *(*vlib_thread_set_lcore_cb) (u32 thread, u16 cpu); +} vlib_thread_callbacks_t; + typedef struct { /* Link list of registrations, built by constructors */ @@ -290,8 +305,8 @@ typedef struct /* Number of pthreads */ u32 n_pthreads; - /* Number of DPDK eal threads */ - u32 n_eal_threads; + /* Number of threads */ + u32 n_threads; /* Number of cores to skip, must match the core mask */ u32 skip_cores; @@ -300,7 +315,7 @@ typedef struct u8 *thread_prefix; /* main thread lcore */ - u8 main_lcore; + u32 main_lcore; /* Bitmap of available CPU cores */ uword *cpu_core_bitmap; @@ -320,10 +335,19 @@ typedef struct /* scheduling policy priority */ u32 sched_priority; + /* callbacks */ + vlib_thread_callbacks_t cb; + int extern_thread_mgmt; + + /* NUMA-bound heap size */ + uword numa_heap_size; + } vlib_thread_main_t; extern vlib_thread_main_t vlib_thread_main; +#include + #define VLIB_REGISTER_THREAD(x,...) \ __VA_ARGS__ vlib_thread_registration_t x; \ static void __vlib_add_thread_registration_##x (void) \ @@ -334,6 +358,13 @@ static void __vlib_add_thread_registration_##x (void) \ x.next = tm->next; \ tm->next = &x; \ } \ +static void __vlib_rm_thread_registration_##x (void) \ + __attribute__((__destructor__)) ; \ +static void __vlib_rm_thread_registration_##x (void) \ +{ \ + vlib_thread_main_t * tm = &vlib_thread_main; \ + VLIB_REMOVE_FROM_LINKED_LIST (tm->next, &x, next); \ +} \ __VA_ARGS__ vlib_thread_registration_t x always_inline u32 @@ -343,21 +374,131 @@ vlib_num_workers () } always_inline u32 -vlib_get_worker_cpu_index (u32 worker_index) +vlib_get_worker_thread_index (u32 worker_index) { return worker_index + 1; } always_inline u32 -vlib_get_worker_index (u32 cpu_index) +vlib_get_worker_index (u32 thread_index) { - return cpu_index - 1; + return thread_index - 1; } always_inline u32 vlib_get_current_worker_index () { - return os_get_cpu_number () - 1; + return vlib_get_thread_index () - 1; +} + +static inline void +vlib_worker_thread_barrier_check (void) +{ + if (PREDICT_FALSE (*vlib_worker_threads->wait_at_barrier)) + { + vlib_main_t *vm = vlib_get_main (); + u32 thread_index = vm->thread_index; + f64 t = vlib_time_now (vm); + + if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled)) + { + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; + /* *INDENT-OFF* */ + ELOG_TYPE_DECLARE (e) = { + .format = "barrier-wait-thread-%d", + .format_args = "i4", + }; + /* *INDENT-ON* */ + + struct + { + u32 thread_index; + } __clib_packed *ed; + + ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, + w->elog_track); + ed->thread_index = thread_index; + } + + if (CLIB_DEBUG > 0) + { + vm = vlib_get_main (); + vm->parked_at_barrier = 1; + } + clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, 1); + while (*vlib_worker_threads->wait_at_barrier) + ; + + /* + * Recompute the offset from thread-0 time. + * Note that vlib_time_now adds vm->time_offset, so + * clear it first. Save the resulting idea of "now", to + * see how well we're doing. See show_clock_command_fn(...) + */ + { + f64 now; + vm->time_offset = 0.0; + now = vlib_time_now (vm); + vm->time_offset = vlib_global_main.time_last_barrier_release - now; + vm->time_last_barrier_release = vlib_time_now (vm); + } + + if (CLIB_DEBUG > 0) + vm->parked_at_barrier = 0; + clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, -1); + + if (PREDICT_FALSE (*vlib_worker_threads->node_reforks_required)) + { + if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled)) + { + t = vlib_time_now (vm) - t; + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; + /* *INDENT-OFF* */ + ELOG_TYPE_DECLARE (e) = { + .format = "barrier-refork-thread-%d", + .format_args = "i4", + }; + /* *INDENT-ON* */ + + struct + { + u32 thread_index; + } __clib_packed *ed; + + ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, + w->elog_track); + ed->thread_index = thread_index; + } + + vlib_worker_thread_node_refork (); + clib_atomic_fetch_add (vlib_worker_threads->node_reforks_required, + -1); + while (*vlib_worker_threads->node_reforks_required) + ; + } + if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled)) + { + t = vlib_time_now (vm) - t; + vlib_worker_thread_t *w = vlib_worker_threads + thread_index; + /* *INDENT-OFF* */ + ELOG_TYPE_DECLARE (e) = { + .format = "barrier-released-thread-%d: %dus", + .format_args = "i4i4", + }; + /* *INDENT-ON* */ + + struct + { + u32 thread_index; + u32 duration; + } __clib_packed *ed; + + ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, + w->elog_track); + ed->thread_index = thread_index; + ed->duration = (int) (1000000.0 * t); + } + } } always_inline vlib_main_t * @@ -371,6 +512,14 @@ vlib_get_worker_vlib_main (u32 worker_index) return vm; } +static inline u8 +vlib_thread_is_main_w_barrier (void) +{ + return (!vlib_num_workers () + || ((vlib_get_thread_index () == 0 + && vlib_worker_threads->wait_at_barrier[0]))); +} + static inline void vlib_put_frame_queue_elt (vlib_frame_queue_elt_t * hf) { @@ -391,7 +540,7 @@ vlib_get_frame_queue_elt (u32 frame_queue_index, u32 index) fq = fqm->vlib_frame_queues[index]; ASSERT (fq); - new_tail = __sync_add_and_fetch (&fq->tail, 1); + new_tail = clib_atomic_add_fetch (&fq->tail, 1); /* Wait until a ring slot is available */ while (new_tail >= fq->head_hint + fq->nelts) @@ -459,6 +608,18 @@ vlib_get_worker_handoff_queue_elt (u32 frame_queue_index, return elt; } +u8 *vlib_thread_stack_init (uword thread_index); +int vlib_thread_cb_register (struct vlib_main_t *vm, + vlib_thread_callbacks_t * cb); +extern void *rpc_call_main_thread_cb_fn; + +void +vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t * + args); +void vlib_rpc_call_main_thread (void *function, u8 * args, u32 size); +void vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id); + + #endif /* included_vlib_threads_h */ /*