src/vlib/threads.h

   1 /*
   2  * Copyright (c) 2015 Cisco and/or its affiliates.
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at:
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License.
  14  */
  15 #ifndef included_vlib_threads_h
  16 #define included_vlib_threads_h
  17
  18 #include <vlib/main.h>
  19 #include <linux/sched.h>
  20
  21 extern vlib_main_t **vlib_mains;
  22
  23 void vlib_set_thread_name (char *name);
  24
  25 /* arg is actually a vlib__thread_t * */
  26 typedef void (vlib_thread_function_t) (void *arg);
  27
  28 typedef struct vlib_thread_registration_
  29 {
  30   /* constructor generated list of thread registrations */
  31   struct vlib_thread_registration_ *next;
  32
  33   /* config parameters */
  34   char *name;
  35   char *short_name;
  36   vlib_thread_function_t *function;
  37   uword mheap_size;
  38   int fixed_count;
  39   u32 count;
  40   int no_data_structure_clone;
  41   u32 frame_queue_nelts;
  42
  43   /* All threads of this type run on pthreads */
  44   int use_pthreads;
  45   u32 first_index;
  46   uword *coremask;
  47 } vlib_thread_registration_t;
  48
  49 /*
  50  * Frames have their cpu / vlib_main_t index in the low-order N bits
  51  * Make VLIB_MAX_CPUS a power-of-two, please...
  52  */
  53
  54 #ifndef VLIB_MAX_CPUS
  55 #define VLIB_MAX_CPUS 256
  56 #endif
  57
  58 #if VLIB_MAX_CPUS > CLIB_MAX_MHEAPS
  59 #error Please increase number of per-cpu mheaps
  60 #endif
  61
  62 #define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1)       /* 0x3f, max */
  63 #define VLIB_OFFSET_MASK (~VLIB_CPU_MASK)
  64
  65 #define VLIB_LOG2_THREAD_STACK_SIZE (21)
  66 #define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE)
  67
  68 typedef enum
  69 {
  70   VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME,
  71 } vlib_frame_queue_msg_type_t;
  72
  73 typedef struct
  74 {
  75   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  76   volatile u32 valid;
  77   u32 msg_type;
  78   u32 n_vectors;
  79   u32 last_n_vectors;
  80
  81   /* 256 * 4 = 1024 bytes, even mult of cache line size */
  82   u32 buffer_index[VLIB_FRAME_SIZE];
  83 }
  84 vlib_frame_queue_elt_t;
  85
  86 typedef struct
  87 {
  88   /* First cache line */
  89   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  90   volatile u32 *wait_at_barrier;
  91   volatile u32 *workers_at_barrier;
  92
  93   /* Second Cache Line */
  94     CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
  95   void *thread_mheap;
  96   u8 *thread_stack;
  97   void (*thread_function) (void *);
  98   void *thread_function_arg;
  99   i64 recursion_level;
 100   elog_track_t elog_track;
 101   u32 instance_id;
 102   vlib_thread_registration_t *registration;
 103   u8 *name;
 104   u64 barrier_sync_count;
 105   u8 barrier_elog_enabled;
 106   const char *barrier_caller;
 107   const char *barrier_context;
 108   volatile u32 *node_reforks_required;
 109
 110   long lwp;
 111   int cpu_id;
 112   int core_id;
 113   int numa_id;
 114   pthread_t thread_id;
 115 } vlib_worker_thread_t;
 116
 117 extern vlib_worker_thread_t *vlib_worker_threads;
 118
 119 typedef struct
 120 {
 121   /* enqueue side */
 122   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
 123   volatile u64 tail;
 124   u64 enqueues;
 125   u64 enqueue_ticks;
 126   u64 enqueue_vectors;
 127   u32 enqueue_full_events;
 128
 129   /* dequeue side */
 130     CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
 131   volatile u64 head;
 132   u64 dequeues;
 133   u64 dequeue_ticks;
 134   u64 dequeue_vectors;
 135   u64 trace;
 136   u64 vector_threshold;
 137
 138   /* dequeue hint to enqueue side */
 139     CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
 140   volatile u64 head_hint;
 141
 142   /* read-only, constant, shared */
 143     CLIB_CACHE_LINE_ALIGN_MARK (cacheline3);
 144   vlib_frame_queue_elt_t *elts;
 145   u32 nelts;
 146 }
 147 vlib_frame_queue_t;
 148
 149 typedef struct
 150 {
 151   vlib_frame_queue_elt_t **handoff_queue_elt_by_thread_index;
 152   vlib_frame_queue_t **congested_handoff_queue_by_thread_index;
 153 } vlib_frame_queue_per_thread_data_t;
 154
 155 typedef struct
 156 {
 157   u32 node_index;
 158   u32 frame_queue_nelts;
 159   u32 queue_hi_thresh;
 160
 161   vlib_frame_queue_t **vlib_frame_queues;
 162   vlib_frame_queue_per_thread_data_t *per_thread_data;
 163
 164   /* for frame queue tracing */
 165   frame_queue_trace_t *frame_queue_traces;
 166   frame_queue_nelt_counter_t *frame_queue_histogram;
 167 } vlib_frame_queue_main_t;
 168
 169 typedef struct
 170 {
 171   uword node_index;
 172   uword type_opaque;
 173   uword data;
 174 } vlib_process_signal_event_mt_args_t;
 175
 176 /* Called early, in thread 0's context */
 177 clib_error_t *vlib_thread_init (vlib_main_t * vm);
 178
 179 int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
 180                               u32 frame_queue_index, vlib_frame_t * frame,
 181                               vlib_frame_queue_msg_type_t type);
 182
 183 int
 184 vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm);
 185
 186 void vlib_worker_thread_node_runtime_update (void);
 187
 188 void vlib_create_worker_threads (vlib_main_t * vm, int n,
 189                                  void (*thread_function) (void *));
 190
 191 void vlib_worker_thread_init (vlib_worker_thread_t * w);
 192 u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 193
 194 /* Check for a barrier sync request every 30ms */
 195 #define BARRIER_SYNC_DELAY (0.030000)
 196
 197 #if CLIB_DEBUG > 0
 198 /* long barrier timeout, for gdb... */
 199 #define BARRIER_SYNC_TIMEOUT (600.1)
 200 #else
 201 #define BARRIER_SYNC_TIMEOUT (1.0)
 202 #endif
 203
 204 #define vlib_worker_thread_barrier_sync(X) {vlib_worker_thread_barrier_sync_int(X, __FUNCTION__);}
 205
 206 void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm,
 207                                           const char *func_name);
 208 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 209 void vlib_worker_thread_initial_barrier_sync_and_release (vlib_main_t * vm);
 210 void vlib_worker_thread_node_refork (void);
 211
 212 static_always_inline uword
 213 vlib_get_thread_index (void)
 214 {
 215   return __os_thread_index;
 216 }
 217
 218 always_inline void
 219 vlib_smp_unsafe_warning (void)
 220 {
 221   if (CLIB_DEBUG > 0)
 222     {
 223       if (vlib_get_thread_index ())
 224         fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
 225     }
 226 }
 227
 228 typedef enum
 229 {
 230   VLIB_WORKER_THREAD_FORK_FIXUP_ILLEGAL = 0,
 231   VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX,
 232 } vlib_fork_fixup_t;
 233
 234 void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which);
 235
 236 #define foreach_vlib_main(body)                         \
 237 do {                                                    \
 238   vlib_main_t ** __vlib_mains = 0, *this_vlib_main;     \
 239   int ii;                                               \
 240                                                         \
 241   for (ii = 0; ii < vec_len (vlib_mains); ii++)         \
 242     {                                                   \
 243       this_vlib_main = vlib_mains[ii];                  \
 244       ASSERT (ii == 0 ||                                \
 245               this_vlib_main->parked_at_barrier == 1);  \
 246       if (this_vlib_main)                               \
 247         vec_add1 (__vlib_mains, this_vlib_main);        \
 248     }                                                   \
 249                                                         \
 250   for (ii = 0; ii < vec_len (__vlib_mains); ii++)       \
 251     {                                                   \
 252       this_vlib_main = __vlib_mains[ii];                \
 253       /* body uses this_vlib_main... */                 \
 254       (body);                                           \
 255     }                                                   \
 256   vec_free (__vlib_mains);                              \
 257 } while (0);
 258
 259 #define foreach_sched_policy \
 260   _(SCHED_OTHER, OTHER, "other") \
 261   _(SCHED_BATCH, BATCH, "batch") \
 262   _(SCHED_IDLE, IDLE, "idle")   \
 263   _(SCHED_FIFO, FIFO, "fifo")   \
 264   _(SCHED_RR, RR, "rr")
 265
 266 typedef enum
 267 {
 268 #define _(v,f,s) SCHED_POLICY_##f = v,
 269   foreach_sched_policy
 270 #undef _
 271     SCHED_POLICY_N,
 272 } sched_policy_t;
 273
 274 typedef struct
 275 {
 276   clib_error_t *(*vlib_launch_thread_cb) (void *fp, vlib_worker_thread_t * w,
 277                                           unsigned cpu_id);
 278   clib_error_t *(*vlib_thread_set_lcore_cb) (u32 thread, u16 cpu);
 279 } vlib_thread_callbacks_t;
 280
 281 typedef struct
 282 {
 283   /* Link list of registrations, built by constructors */
 284   vlib_thread_registration_t *next;
 285
 286   /* Vector of registrations, w/ non-data-structure clones at the top */
 287   vlib_thread_registration_t **registrations;
 288
 289   uword *thread_registrations_by_name;
 290
 291   vlib_worker_thread_t *worker_threads;
 292
 293   /*
 294    * Launch all threads as pthreads,
 295    * not eal_rte_launch (strict affinity) threads
 296    */
 297   int use_pthreads;
 298
 299   /* Number of vlib_main / vnet_main clones */
 300   u32 n_vlib_mains;
 301
 302   /* Number of thread stacks to create */
 303   u32 n_thread_stacks;
 304
 305   /* Number of pthreads */
 306   u32 n_pthreads;
 307
 308   /* Number of threads */
 309   u32 n_threads;
 310
 311   /* Number of cores to skip, must match the core mask */
 312   u32 skip_cores;
 313
 314   /* Thread prefix name */
 315   u8 *thread_prefix;
 316
 317   /* main thread lcore */
 318   u32 main_lcore;
 319
 320   /* Bitmap of available CPU cores */
 321   uword *cpu_core_bitmap;
 322
 323   /* Bitmap of available CPU sockets (NUMA nodes) */
 324   uword *cpu_socket_bitmap;
 325
 326   /* Worker handoff queues */
 327   vlib_frame_queue_main_t *frame_queue_mains;
 328
 329   /* worker thread initialization barrier */
 330   volatile u32 worker_thread_release;
 331
 332   /* scheduling policy */
 333   u32 sched_policy;
 334
 335   /* scheduling policy priority */
 336   u32 sched_priority;
 337
 338   /* callbacks */
 339   vlib_thread_callbacks_t cb;
 340   int extern_thread_mgmt;
 341
 342   /* NUMA-bound heap size */
 343   uword numa_heap_size;
 344
 345 } vlib_thread_main_t;
 346
 347 extern vlib_thread_main_t vlib_thread_main;
 348
 349 #include <vlib/global_funcs.h>
 350
 351 #define VLIB_REGISTER_THREAD(x,...)                     \
 352   __VA_ARGS__ vlib_thread_registration_t x;             \
 353 static void __vlib_add_thread_registration_##x (void)   \
 354   __attribute__((__constructor__)) ;                    \
 355 static void __vlib_add_thread_registration_##x (void)   \
 356 {                                                       \
 357   vlib_thread_main_t * tm = &vlib_thread_main;          \
 358   x.next = tm->next;                                    \
 359   tm->next = &x;                                        \
 360 }                                                       \
 361 static void __vlib_rm_thread_registration_##x (void)    \
 362   __attribute__((__destructor__)) ;                     \
 363 static void __vlib_rm_thread_registration_##x (void)    \
 364 {                                                       \
 365   vlib_thread_main_t * tm = &vlib_thread_main;          \
 366   VLIB_REMOVE_FROM_LINKED_LIST (tm->next, &x, next);    \
 367 }                                                       \
 368 __VA_ARGS__ vlib_thread_registration_t x
 369
 370 always_inline u32
 371 vlib_num_workers ()
 372 {
 373   return vlib_thread_main.n_vlib_mains - 1;
 374 }
 375
 376 always_inline u32
 377 vlib_get_worker_thread_index (u32 worker_index)
 378 {
 379   return worker_index + 1;
 380 }
 381
 382 always_inline u32
 383 vlib_get_worker_index (u32 thread_index)
 384 {
 385   return thread_index - 1;
 386 }
 387
 388 always_inline u32
 389 vlib_get_current_worker_index ()
 390 {
 391   return vlib_get_thread_index () - 1;
 392 }
 393
 394 static inline void
 395 vlib_worker_thread_barrier_check (void)
 396 {
 397   if (PREDICT_FALSE (*vlib_worker_threads->wait_at_barrier))
 398     {
 399       vlib_main_t *vm = vlib_get_main ();
 400       u32 thread_index = vm->thread_index;
 401       f64 t = vlib_time_now (vm);
 402
 403       if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled))
 404         {
 405           vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
 406           /* *INDENT-OFF* */
 407           ELOG_TYPE_DECLARE (e) = {
 408             .format = "barrier-wait-thread-%d",
 409             .format_args = "i4",
 410           };
 411           /* *INDENT-ON* */
 412
 413           struct
 414           {
 415             u32 thread_index;
 416           } __clib_packed *ed;
 417
 418           ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
 419                                 w->elog_track);
 420           ed->thread_index = thread_index;
 421         }
 422
 423       if (CLIB_DEBUG > 0)
 424         {
 425           vm = vlib_get_main ();
 426           vm->parked_at_barrier = 1;
 427         }
 428       clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, 1);
 429       while (*vlib_worker_threads->wait_at_barrier)
 430         ;
 431
 432       /*
 433        * Recompute the offset from thread-0 time.
 434        * Note that vlib_time_now adds vm->time_offset, so
 435        * clear it first. Save the resulting idea of "now", to
 436        * see how well we're doing. See show_clock_command_fn(...)
 437        */
 438       {
 439         f64 now;
 440         vm->time_offset = 0.0;
 441         now = vlib_time_now (vm);
 442         vm->time_offset = vlib_global_main.time_last_barrier_release - now;
 443         vm->time_last_barrier_release = vlib_time_now (vm);
 444       }
 445
 446       if (CLIB_DEBUG > 0)
 447         vm->parked_at_barrier = 0;
 448       clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, -1);
 449
 450       if (PREDICT_FALSE (*vlib_worker_threads->node_reforks_required))
 451         {
 452           if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled))
 453             {
 454               t = vlib_time_now (vm) - t;
 455               vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
 456               /* *INDENT-OFF* */
 457               ELOG_TYPE_DECLARE (e) = {
 458                 .format = "barrier-refork-thread-%d",
 459                 .format_args = "i4",
 460               };
 461               /* *INDENT-ON* */
 462
 463               struct
 464               {
 465                 u32 thread_index;
 466               } __clib_packed *ed;
 467
 468               ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
 469                                     w->elog_track);
 470               ed->thread_index = thread_index;
 471             }
 472
 473           vlib_worker_thread_node_refork ();
 474           clib_atomic_fetch_add (vlib_worker_threads->node_reforks_required,
 475                                  -1);
 476           while (*vlib_worker_threads->node_reforks_required)
 477             ;
 478         }
 479       if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled))
 480         {
 481           t = vlib_time_now (vm) - t;
 482           vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
 483           /* *INDENT-OFF* */
 484           ELOG_TYPE_DECLARE (e) = {
 485             .format = "barrier-released-thread-%d: %dus",
 486             .format_args = "i4i4",
 487           };
 488           /* *INDENT-ON* */
 489
 490           struct
 491           {
 492             u32 thread_index;
 493             u32 duration;
 494           } __clib_packed *ed;
 495
 496           ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
 497                                 w->elog_track);
 498           ed->thread_index = thread_index;
 499           ed->duration = (int) (1000000.0 * t);
 500         }
 501     }
 502 }
 503
 504 always_inline vlib_main_t *
 505 vlib_get_worker_vlib_main (u32 worker_index)
 506 {
 507   vlib_main_t *vm;
 508   vlib_thread_main_t *tm = &vlib_thread_main;
 509   ASSERT (worker_index < tm->n_vlib_mains - 1);
 510   vm = vlib_mains[worker_index + 1];
 511   ASSERT (vm);
 512   return vm;
 513 }
 514
 515 static inline u8
 516 vlib_thread_is_main_w_barrier (void)
 517 {
 518   return (!vlib_num_workers ()
 519           || ((vlib_get_thread_index () == 0
 520                && vlib_worker_threads->wait_at_barrier[0])));
 521 }
 522
 523 static inline void
 524 vlib_put_frame_queue_elt (vlib_frame_queue_elt_t * hf)
 525 {
 526   CLIB_MEMORY_BARRIER ();
 527   hf->valid = 1;
 528 }
 529
 530 static inline vlib_frame_queue_elt_t *
 531 vlib_get_frame_queue_elt (u32 frame_queue_index, u32 index)
 532 {
 533   vlib_frame_queue_t *fq;
 534   vlib_frame_queue_elt_t *elt;
 535   vlib_thread_main_t *tm = &vlib_thread_main;
 536   vlib_frame_queue_main_t *fqm =
 537     vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
 538   u64 new_tail;
 539
 540   fq = fqm->vlib_frame_queues[index];
 541   ASSERT (fq);
 542
 543   new_tail = clib_atomic_add_fetch (&fq->tail, 1);
 544
 545   /* Wait until a ring slot is available */
 546   while (new_tail >= fq->head_hint + fq->nelts)
 547     vlib_worker_thread_barrier_check ();
 548
 549   elt = fq->elts + (new_tail & (fq->nelts - 1));
 550
 551   /* this would be very bad... */
 552   while (elt->valid)
 553     ;
 554
 555   elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME;
 556   elt->last_n_vectors = elt->n_vectors = 0;
 557
 558   return elt;
 559 }
 560
 561 static inline vlib_frame_queue_t *
 562 is_vlib_frame_queue_congested (u32 frame_queue_index,
 563                                u32 index,
 564                                u32 queue_hi_thresh,
 565                                vlib_frame_queue_t **
 566                                handoff_queue_by_worker_index)
 567 {
 568   vlib_frame_queue_t *fq;
 569   vlib_thread_main_t *tm = &vlib_thread_main;
 570   vlib_frame_queue_main_t *fqm =
 571     vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
 572
 573   fq = handoff_queue_by_worker_index[index];
 574   if (fq != (vlib_frame_queue_t *) (~0))
 575     return fq;
 576
 577   fq = fqm->vlib_frame_queues[index];
 578   ASSERT (fq);
 579
 580   if (PREDICT_FALSE (fq->tail >= (fq->head_hint + queue_hi_thresh)))
 581     {
 582       /* a valid entry in the array will indicate the queue has reached
 583        * the specified threshold and is congested
 584        */
 585       handoff_queue_by_worker_index[index] = fq;
 586       fq->enqueue_full_events++;
 587       return fq;
 588     }
 589
 590   return NULL;
 591 }
 592
 593 static inline vlib_frame_queue_elt_t *
 594 vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
 595                                    u32 vlib_worker_index,
 596                                    vlib_frame_queue_elt_t **
 597                                    handoff_queue_elt_by_worker_index)
 598 {
 599   vlib_frame_queue_elt_t *elt;
 600
 601   if (handoff_queue_elt_by_worker_index[vlib_worker_index])
 602     return handoff_queue_elt_by_worker_index[vlib_worker_index];
 603
 604   elt = vlib_get_frame_queue_elt (frame_queue_index, vlib_worker_index);
 605
 606   handoff_queue_elt_by_worker_index[vlib_worker_index] = elt;
 607
 608   return elt;
 609 }
 610
 611 u8 *vlib_thread_stack_init (uword thread_index);
 612 int vlib_thread_cb_register (struct vlib_main_t *vm,
 613                              vlib_thread_callbacks_t * cb);
 614 extern void *rpc_call_main_thread_cb_fn;
 615
 616 void
 617 vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t *
 618                                      args);
 619 void vlib_rpc_call_main_thread (void *function, u8 * args, u32 size);
 620 void vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id);
 621
 622
 623 #endif /* included_vlib_threads_h */
 624
 625 /*
 626  * fd.io coding-style-patch-verification: ON
 627  *
 628  * Local Variables:
 629  * eval: (c-set-style "gnu")
 630  * End:
 631  */