src/vlib/threads.h

   1 /*
   2  * Copyright (c) 2015 Cisco and/or its affiliates.
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at:
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License.
  14  */
  15 #ifndef included_vlib_threads_h
  16 #define included_vlib_threads_h
  17
  18 #include <vlib/main.h>
  19 #include <vppinfra/callback.h>
  20 #ifdef __linux__
  21 #include <linux/sched.h>
  22 #elif __FreeBSD__
  23 #include <sys/sched.h>
  24 #endif /* __linux__ */
  25
  26 void vlib_set_thread_name (char *name);
  27
  28 /* arg is actually a vlib__thread_t * */
  29 typedef void (vlib_thread_function_t) (void *arg);
  30
  31 typedef struct vlib_thread_registration_
  32 {
  33   /* constructor generated list of thread registrations */
  34   struct vlib_thread_registration_ *next;
  35
  36   /* config parameters */
  37   char *name;
  38   char *short_name;
  39   vlib_thread_function_t *function;
  40   uword mheap_size;
  41   int fixed_count;
  42   u32 count;
  43   int no_data_structure_clone;
  44   u32 frame_queue_nelts;
  45
  46   /* All threads of this type run on pthreads */
  47   int use_pthreads;
  48   u32 first_index;
  49   uword *coremask;
  50 } vlib_thread_registration_t;
  51
  52 #define VLIB_LOG2_THREAD_STACK_SIZE (21)
  53 #define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE)
  54
  55 typedef struct
  56 {
  57   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  58   volatile u32 valid;
  59   u32 maybe_trace : 1;
  60   u32 n_vectors;
  61   u32 offset;
  62   STRUCT_MARK (end_of_reset);
  63
  64   CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
  65   u32 buffer_index[VLIB_FRAME_SIZE];
  66   u32 aux_data[VLIB_FRAME_SIZE];
  67 }
  68 vlib_frame_queue_elt_t;
  69
  70 typedef struct
  71 {
  72   /* First cache line */
  73   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  74   volatile u32 *wait_at_barrier;
  75   volatile u32 *workers_at_barrier;
  76
  77   /* Second Cache Line */
  78     CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
  79   void *thread_mheap;
  80   u8 *thread_stack;
  81   void (*thread_function) (void *);
  82   void *thread_function_arg;
  83   i64 recursion_level;
  84   elog_track_t elog_track;
  85   u32 instance_id;
  86   vlib_thread_registration_t *registration;
  87   u8 *name;
  88   u64 barrier_sync_count;
  89   u8 barrier_elog_enabled;
  90   const char *barrier_caller;
  91   const char *barrier_context;
  92   volatile u32 *node_reforks_required;
  93   volatile u32 wait_before_barrier;
  94   volatile u32 workers_before_barrier;
  95   volatile u32 done_work_before_barrier;
  96
  97   long lwp;
  98   int cpu_id;
  99   int core_id;
 100   int numa_id;
 101   pthread_t thread_id;
 102 } vlib_worker_thread_t;
 103
 104 extern vlib_worker_thread_t *vlib_worker_threads;
 105
 106 typedef struct
 107 {
 108   /* static data */
 109   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
 110   vlib_frame_queue_elt_t *elts;
 111   u64 vector_threshold;
 112   u64 trace;
 113   u32 nelts;
 114
 115   /* modified by enqueue side  */
 116   CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
 117   volatile u64 tail;
 118
 119   /* modified by dequeue side  */
 120   CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
 121   volatile u64 head;
 122 }
 123 vlib_frame_queue_t;
 124
 125 struct vlib_frame_queue_main_t_;
 126 typedef u32 (vlib_frame_queue_dequeue_fn_t) (
 127   vlib_main_t *vm, struct vlib_frame_queue_main_t_ *fqm);
 128 typedef struct vlib_frame_queue_main_t_
 129 {
 130   u32 node_index;
 131   u32 frame_queue_nelts;
 132
 133   vlib_frame_queue_t **vlib_frame_queues;
 134
 135   /* for frame queue tracing */
 136   frame_queue_trace_t *frame_queue_traces;
 137   frame_queue_nelt_counter_t *frame_queue_histogram;
 138   vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
 139 } vlib_frame_queue_main_t;
 140
 141 typedef struct
 142 {
 143   uword node_index;
 144   uword type_opaque;
 145   uword data;
 146 } vlib_process_signal_event_mt_args_t;
 147
 148 /* Called early, in thread 0's context */
 149 clib_error_t *vlib_thread_init (vlib_main_t * vm);
 150
 151 void vlib_worker_thread_node_runtime_update (void);
 152
 153 void vlib_create_worker_threads (vlib_main_t * vm, int n,
 154                                  void (*thread_function) (void *));
 155
 156 void vlib_worker_thread_init (vlib_worker_thread_t * w);
 157 u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 158
 159 /* Check for a barrier sync request every 30ms */
 160 #define BARRIER_SYNC_DELAY (0.030000)
 161
 162 #if CLIB_DEBUG > 0
 163 /* long barrier timeout, for gdb... */
 164 #define BARRIER_SYNC_TIMEOUT (600.1)
 165 #else
 166 #define BARRIER_SYNC_TIMEOUT (1.0)
 167 #endif
 168
 169 #define vlib_worker_thread_barrier_sync(X) {vlib_worker_thread_barrier_sync_int(X, __FUNCTION__);}
 170
 171 void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm,
 172                                           const char *func_name);
 173 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 174 u8 vlib_worker_thread_barrier_held (void);
 175 void vlib_worker_thread_initial_barrier_sync_and_release (vlib_main_t * vm);
 176 void vlib_worker_thread_node_refork (void);
 177 /**
 178  * Wait until each of the workers has been once around the track
 179  */
 180 void vlib_worker_wait_one_loop (void);
 181 /**
 182  * Flush worker's pending rpc requests to main thread's rpc queue
 183  */
 184 void vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm);
 185
 186 static_always_inline uword
 187 vlib_get_thread_index (void)
 188 {
 189   return __os_thread_index;
 190 }
 191
 192 always_inline void
 193 vlib_smp_unsafe_warning (void)
 194 {
 195   if (CLIB_DEBUG > 0)
 196     {
 197       if (vlib_get_thread_index ())
 198         fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
 199     }
 200 }
 201
 202 always_inline int
 203 __foreach_vlib_main_helper (vlib_main_t *ii, vlib_main_t **p)
 204 {
 205   vlib_main_t *vm;
 206   u32 index = ii - (vlib_main_t *) 0;
 207
 208   if (index >= vec_len (vlib_global_main.vlib_mains))
 209     return 0;
 210
 211   *p = vm = vlib_global_main.vlib_mains[index];
 212   ASSERT (index == 0 || vm->parked_at_barrier == 1);
 213   return 1;
 214 }
 215
 216 #define foreach_vlib_main()                                                   \
 217   for (vlib_main_t *ii = 0, *this_vlib_main;                                  \
 218        __foreach_vlib_main_helper (ii, &this_vlib_main); ii++)                \
 219     if (this_vlib_main)
 220
 221 #define foreach_sched_policy_posix                                            \
 222   _ (SCHED_OTHER, OTHER, "other")                                             \
 223   _ (SCHED_FIFO, FIFO, "fifo")                                                \
 224   _ (SCHED_RR, RR, "rr")
 225 #define foreach_sched_policy_linux                                            \
 226   _ (SCHED_BATCH, BATCH, "batch")                                             \
 227   _ (SCHED_IDLE, IDLE, "idle")
 228
 229 #ifdef __linux__
 230 #define foreach_sched_policy                                                  \
 231   foreach_sched_policy_posix foreach_sched_policy_linux
 232 #else
 233 #define foreach_sched_policy foreach_sched_policy_posix
 234 #endif /* __linux__ */
 235
 236 typedef enum
 237 {
 238 #define _(v,f,s) SCHED_POLICY_##f = v,
 239   foreach_sched_policy
 240 #undef _
 241     SCHED_POLICY_N,
 242 } sched_policy_t;
 243
 244 typedef struct
 245 {
 246   /* Link list of registrations, built by constructors */
 247   vlib_thread_registration_t *next;
 248
 249   /* Vector of registrations, w/ non-data-structure clones at the top */
 250   vlib_thread_registration_t **registrations;
 251
 252   uword *thread_registrations_by_name;
 253
 254   vlib_worker_thread_t *worker_threads;
 255
 256   int use_pthreads;
 257
 258   /* Number of vlib_main / vnet_main clones */
 259   u32 n_vlib_mains;
 260
 261   /* Number of thread stacks to create */
 262   u32 n_thread_stacks;
 263
 264   /* Number of pthreads */
 265   u32 n_pthreads;
 266
 267   /* Number of threads */
 268   u32 n_threads;
 269
 270   /* Number of cores to skip, must match the core mask */
 271   u32 skip_cores;
 272
 273   /* Thread prefix name */
 274   u8 *thread_prefix;
 275
 276   /* main thread lcore */
 277   u32 main_lcore;
 278
 279   /* Bitmap of available CPU cores */
 280   uword *cpu_core_bitmap;
 281
 282   /* Bitmap of available CPU sockets (NUMA nodes) */
 283   uword *cpu_socket_bitmap;
 284
 285   /* Worker handoff queues */
 286   vlib_frame_queue_main_t *frame_queue_mains;
 287
 288   /* worker thread initialization barrier */
 289   volatile u32 worker_thread_release;
 290
 291   /* scheduling policy */
 292   u32 sched_policy;
 293
 294   /* scheduling policy priority */
 295   u32 sched_priority;
 296
 297   /* NUMA-bound heap size */
 298   uword numa_heap_size;
 299
 300 } vlib_thread_main_t;
 301
 302 extern vlib_thread_main_t vlib_thread_main;
 303
 304 #include <vlib/global_funcs.h>
 305
 306 #define VLIB_REGISTER_THREAD(x,...)                     \
 307   __VA_ARGS__ vlib_thread_registration_t x;             \
 308 static void __vlib_add_thread_registration_##x (void)   \
 309   __attribute__((__constructor__)) ;                    \
 310 static void __vlib_add_thread_registration_##x (void)   \
 311 {                                                       \
 312   vlib_thread_main_t * tm = &vlib_thread_main;          \
 313   x.next = tm->next;                                    \
 314   tm->next = &x;                                        \
 315 }                                                       \
 316 static void __vlib_rm_thread_registration_##x (void)    \
 317   __attribute__((__destructor__)) ;                     \
 318 static void __vlib_rm_thread_registration_##x (void)    \
 319 {                                                       \
 320   vlib_thread_main_t * tm = &vlib_thread_main;          \
 321   VLIB_REMOVE_FROM_LINKED_LIST (tm->next, &x, next);    \
 322 }                                                       \
 323 __VA_ARGS__ vlib_thread_registration_t x
 324
 325 always_inline u32
 326 vlib_num_workers ()
 327 {
 328   return vlib_thread_main.n_vlib_mains - 1;
 329 }
 330
 331 always_inline u32
 332 vlib_get_worker_thread_index (u32 worker_index)
 333 {
 334   return worker_index + 1;
 335 }
 336
 337 always_inline u32
 338 vlib_get_worker_index (u32 thread_index)
 339 {
 340   return thread_index - 1;
 341 }
 342
 343 always_inline u32
 344 vlib_get_current_worker_index ()
 345 {
 346   return vlib_get_thread_index () - 1;
 347 }
 348
 349 static inline void
 350 vlib_worker_thread_barrier_check (void)
 351 {
 352   if (PREDICT_FALSE (*vlib_worker_threads->wait_at_barrier))
 353     {
 354       vlib_global_main_t *vgm = vlib_get_global_main ();
 355       vlib_main_t *vm = vlib_get_main ();
 356       u32 thread_index = vm->thread_index;
 357       f64 t = vlib_time_now (vm);
 358
 359       if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
 360         clib_call_callbacks (vm->barrier_perf_callbacks, vm,
 361                              vm->clib_time.last_cpu_time, 0 /* enter */ );
 362
 363       if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled))
 364         {
 365           vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
 366           ELOG_TYPE_DECLARE (e) = {
 367             .format = "barrier-wait-thread-%d",
 368             .format_args = "i4",
 369           };
 370
 371           struct
 372           {
 373             u32 thread_index;
 374           } __clib_packed *ed;
 375
 376           ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);
 377           ed->thread_index = thread_index;
 378         }
 379
 380       if (CLIB_DEBUG > 0)
 381         {
 382           vm = vlib_get_main ();
 383           vm->parked_at_barrier = 1;
 384         }
 385       clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, 1);
 386       while (*vlib_worker_threads->wait_at_barrier)
 387         ;
 388
 389       /*
 390        * Recompute the offset from thread-0 time.
 391        * Note that vlib_time_now adds vm->time_offset, so
 392        * clear it first. Save the resulting idea of "now", to
 393        * see how well we're doing. See show_clock_command_fn(...)
 394        */
 395       {
 396         f64 now;
 397         vm->time_offset = 0.0;
 398         now = vlib_time_now (vm);
 399         vm->time_offset = vgm->vlib_mains[0]->time_last_barrier_release - now;
 400         vm->time_last_barrier_release = vlib_time_now (vm);
 401       }
 402
 403       if (CLIB_DEBUG > 0)
 404         vm->parked_at_barrier = 0;
 405       clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, -1);
 406
 407       if (PREDICT_FALSE (*vlib_worker_threads->node_reforks_required))
 408         {
 409           if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled))
 410             {
 411               t = vlib_time_now (vm) - t;
 412               vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
 413               ELOG_TYPE_DECLARE (e) = {
 414                 .format = "barrier-refork-thread-%d",
 415                 .format_args = "i4",
 416               };
 417
 418               struct
 419               {
 420                 u32 thread_index;
 421               } __clib_packed *ed;
 422
 423               ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
 424                                     w->elog_track);
 425               ed->thread_index = thread_index;
 426             }
 427
 428           vlib_worker_thread_node_refork ();
 429           clib_atomic_fetch_add (vlib_worker_threads->node_reforks_required,
 430                                  -1);
 431           while (*vlib_worker_threads->node_reforks_required)
 432             ;
 433         }
 434       if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled))
 435         {
 436           t = vlib_time_now (vm) - t;
 437           vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
 438           ELOG_TYPE_DECLARE (e) = {
 439             .format = "barrier-released-thread-%d: %dus",
 440             .format_args = "i4i4",
 441           };
 442
 443           struct
 444           {
 445             u32 thread_index;
 446             u32 duration;
 447           } __clib_packed *ed;
 448
 449           ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);
 450           ed->thread_index = thread_index;
 451           ed->duration = (int) (1000000.0 * t);
 452         }
 453
 454       if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
 455         clib_call_callbacks (vm->barrier_perf_callbacks, vm,
 456                              vm->clib_time.last_cpu_time, 1 /* leave */ );
 457     }
 458 }
 459
 460 always_inline vlib_main_t *
 461 vlib_get_worker_vlib_main (u32 worker_index)
 462 {
 463   vlib_main_t *vm;
 464   vlib_thread_main_t *tm = &vlib_thread_main;
 465   ASSERT (worker_index < tm->n_vlib_mains - 1);
 466   vm = vlib_get_main_by_index (worker_index + 1);
 467   ASSERT (vm);
 468   return vm;
 469 }
 470
 471 static inline u8
 472 vlib_thread_is_main_w_barrier (void)
 473 {
 474   return (!vlib_num_workers ()
 475           || ((vlib_get_thread_index () == 0
 476                && vlib_worker_threads->wait_at_barrier[0])));
 477 }
 478
 479 u8 *vlib_thread_stack_init (uword thread_index);
 480 extern void *rpc_call_main_thread_cb_fn;
 481
 482 void
 483 vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t *
 484                                      args);
 485 void vlib_rpc_call_main_thread (void *function, u8 * args, u32 size);
 486 void vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id);
 487 vlib_thread_main_t *vlib_get_thread_main_not_inline (void);
 488
 489 /**
 490  * Force workers sync from within worker
 491  *
 492  * Must be paired with @ref vlib_workers_continue
 493  */
 494 void vlib_workers_sync (void);
 495 /**
 496  * Release barrier after workers sync
 497  */
 498 void vlib_workers_continue (void);
 499
 500 #endif /* included_vlib_threads_h */
 501
 502 /*
 503  * fd.io coding-style-patch-verification: ON
 504  *
 505  * Local Variables:
 506  * eval: (c-set-style "gnu")
 507  * End:
 508  */