New upstream version 18.08
[deb_dpdk.git] / examples / performance-thread / common / lthread_sched.c
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  * Copyright 2015 Intel Corporation.
4  * Copyright 2012 Hasan Alayli <halayli@gmail.com>
5  */
6
7 #define RTE_MEM 1
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdint.h>
13 #include <stddef.h>
14 #include <limits.h>
15 #include <inttypes.h>
16 #include <unistd.h>
17 #include <pthread.h>
18 #include <fcntl.h>
19 #include <sys/time.h>
20 #include <sys/mman.h>
21 #include <sched.h>
22
23 #include <rte_prefetch.h>
24 #include <rte_per_lcore.h>
25 #include <rte_atomic.h>
26 #include <rte_atomic_64.h>
27 #include <rte_log.h>
28 #include <rte_common.h>
29 #include <rte_branch_prediction.h>
30
31 #include "lthread_api.h"
32 #include "lthread_int.h"
33 #include "lthread_sched.h"
34 #include "lthread_objcache.h"
35 #include "lthread_timer.h"
36 #include "lthread_mutex.h"
37 #include "lthread_cond.h"
38 #include "lthread_tls.h"
39 #include "lthread_diag.h"
40
41 /*
42  * This file implements the lthread scheduler
43  * The scheduler is the function lthread_run()
44  * This must be run as the main loop of an EAL thread.
45  *
46  * Currently once a scheduler is created it cannot be destroyed
47  * When a scheduler shuts down it is assumed that the application is terminating
48  */
49
50 static rte_atomic16_t num_schedulers;
51 static rte_atomic16_t active_schedulers;
52
53 /* one scheduler per lcore */
54 RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
55
56 struct lthread_sched *schedcore[LTHREAD_MAX_LCORES];
57
58 diag_callback diag_cb;
59
60 uint64_t diag_mask;
61
62
63 /* constructor */
64 RTE_INIT(lthread_sched_ctor)
65 {
66         memset(schedcore, 0, sizeof(schedcore));
67         rte_atomic16_init(&num_schedulers);
68         rte_atomic16_set(&num_schedulers, 1);
69         rte_atomic16_init(&active_schedulers);
70         rte_atomic16_set(&active_schedulers, 0);
71         diag_cb = NULL;
72 }
73
74
75 enum sched_alloc_phase {
76         SCHED_ALLOC_OK,
77         SCHED_ALLOC_QNODE_POOL,
78         SCHED_ALLOC_READY_QUEUE,
79         SCHED_ALLOC_PREADY_QUEUE,
80         SCHED_ALLOC_LTHREAD_CACHE,
81         SCHED_ALLOC_STACK_CACHE,
82         SCHED_ALLOC_PERLT_CACHE,
83         SCHED_ALLOC_TLS_CACHE,
84         SCHED_ALLOC_COND_CACHE,
85         SCHED_ALLOC_MUTEX_CACHE,
86 };
87
88 static int
89 _lthread_sched_alloc_resources(struct lthread_sched *new_sched)
90 {
91         int alloc_status;
92
93         do {
94                 /* Initialize per scheduler queue node pool */
95                 alloc_status = SCHED_ALLOC_QNODE_POOL;
96                 new_sched->qnode_pool =
97                         _qnode_pool_create("qnode pool", LTHREAD_PREALLOC);
98                 if (new_sched->qnode_pool == NULL)
99                         break;
100
101                 /* Initialize per scheduler local ready queue */
102                 alloc_status = SCHED_ALLOC_READY_QUEUE;
103                 new_sched->ready = _lthread_queue_create("ready queue");
104                 if (new_sched->ready == NULL)
105                         break;
106
107                 /* Initialize per scheduler local peer ready queue */
108                 alloc_status = SCHED_ALLOC_PREADY_QUEUE;
109                 new_sched->pready = _lthread_queue_create("pready queue");
110                 if (new_sched->pready == NULL)
111                         break;
112
113                 /* Initialize per scheduler local free lthread cache */
114                 alloc_status = SCHED_ALLOC_LTHREAD_CACHE;
115                 new_sched->lthread_cache =
116                         _lthread_objcache_create("lthread cache",
117                                                 sizeof(struct lthread),
118                                                 LTHREAD_PREALLOC);
119                 if (new_sched->lthread_cache == NULL)
120                         break;
121
122                 /* Initialize per scheduler local free stack cache */
123                 alloc_status = SCHED_ALLOC_STACK_CACHE;
124                 new_sched->stack_cache =
125                         _lthread_objcache_create("stack_cache",
126                                                 sizeof(struct lthread_stack),
127                                                 LTHREAD_PREALLOC);
128                 if (new_sched->stack_cache == NULL)
129                         break;
130
131                 /* Initialize per scheduler local free per lthread data cache */
132                 alloc_status = SCHED_ALLOC_PERLT_CACHE;
133                 new_sched->per_lthread_cache =
134                         _lthread_objcache_create("per_lt cache",
135                                                 RTE_PER_LTHREAD_SECTION_SIZE,
136                                                 LTHREAD_PREALLOC);
137                 if (new_sched->per_lthread_cache == NULL)
138                         break;
139
140                 /* Initialize per scheduler local free tls cache */
141                 alloc_status = SCHED_ALLOC_TLS_CACHE;
142                 new_sched->tls_cache =
143                         _lthread_objcache_create("TLS cache",
144                                                 sizeof(struct lthread_tls),
145                                                 LTHREAD_PREALLOC);
146                 if (new_sched->tls_cache == NULL)
147                         break;
148
149                 /* Initialize per scheduler local free cond var cache */
150                 alloc_status = SCHED_ALLOC_COND_CACHE;
151                 new_sched->cond_cache =
152                         _lthread_objcache_create("cond cache",
153                                                 sizeof(struct lthread_cond),
154                                                 LTHREAD_PREALLOC);
155                 if (new_sched->cond_cache == NULL)
156                         break;
157
158                 /* Initialize per scheduler local free mutex cache */
159                 alloc_status = SCHED_ALLOC_MUTEX_CACHE;
160                 new_sched->mutex_cache =
161                         _lthread_objcache_create("mutex cache",
162                                                 sizeof(struct lthread_mutex),
163                                                 LTHREAD_PREALLOC);
164                 if (new_sched->mutex_cache == NULL)
165                         break;
166
167                 alloc_status = SCHED_ALLOC_OK;
168         } while (0);
169
170         /* roll back on any failure */
171         switch (alloc_status) {
172         case SCHED_ALLOC_MUTEX_CACHE:
173                 _lthread_objcache_destroy(new_sched->cond_cache);
174                 /* fall through */
175         case SCHED_ALLOC_COND_CACHE:
176                 _lthread_objcache_destroy(new_sched->tls_cache);
177                 /* fall through */
178         case SCHED_ALLOC_TLS_CACHE:
179                 _lthread_objcache_destroy(new_sched->per_lthread_cache);
180                 /* fall through */
181         case SCHED_ALLOC_PERLT_CACHE:
182                 _lthread_objcache_destroy(new_sched->stack_cache);
183                 /* fall through */
184         case SCHED_ALLOC_STACK_CACHE:
185                 _lthread_objcache_destroy(new_sched->lthread_cache);
186                 /* fall through */
187         case SCHED_ALLOC_LTHREAD_CACHE:
188                 _lthread_queue_destroy(new_sched->pready);
189                 /* fall through */
190         case SCHED_ALLOC_PREADY_QUEUE:
191                 _lthread_queue_destroy(new_sched->ready);
192                 /* fall through */
193         case SCHED_ALLOC_READY_QUEUE:
194                 _qnode_pool_destroy(new_sched->qnode_pool);
195                 /* fall through */
196         case SCHED_ALLOC_QNODE_POOL:
197                 /* fall through */
198         case SCHED_ALLOC_OK:
199                 break;
200         }
201         return alloc_status;
202 }
203
204
205 /*
206  * Create a scheduler on the current lcore
207  */
208 struct lthread_sched *_lthread_sched_create(size_t stack_size)
209 {
210         int status;
211         struct lthread_sched *new_sched;
212         unsigned lcoreid = rte_lcore_id();
213
214         RTE_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE);
215
216         if (stack_size == 0)
217                 stack_size = LTHREAD_MAX_STACK_SIZE;
218
219         new_sched =
220              rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched),
221                                 RTE_CACHE_LINE_SIZE,
222                                 rte_socket_id());
223         if (new_sched == NULL) {
224                 RTE_LOG(CRIT, LTHREAD,
225                         "Failed to allocate memory for scheduler\n");
226                 return NULL;
227         }
228
229         _lthread_key_pool_init();
230
231         new_sched->stack_size = stack_size;
232         new_sched->birth = rte_rdtsc();
233         THIS_SCHED = new_sched;
234
235         status = _lthread_sched_alloc_resources(new_sched);
236         if (status != SCHED_ALLOC_OK) {
237                 RTE_LOG(CRIT, LTHREAD,
238                         "Failed to allocate resources for scheduler code = %d\n",
239                         status);
240                 rte_free(new_sched);
241                 return NULL;
242         }
243
244         bzero(&new_sched->ctx, sizeof(struct ctx));
245
246         new_sched->lcore_id = lcoreid;
247
248         schedcore[lcoreid] = new_sched;
249
250         new_sched->run_flag = 1;
251
252         DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0);
253
254         rte_wmb();
255         return new_sched;
256 }
257
258 /*
259  * Set the number of schedulers in the system
260  */
261 int lthread_num_schedulers_set(int num)
262 {
263         rte_atomic16_set(&num_schedulers, num);
264         return (int)rte_atomic16_read(&num_schedulers);
265 }
266
267 /*
268  * Return the number of schedulers active
269  */
270 int lthread_active_schedulers(void)
271 {
272         return (int)rte_atomic16_read(&active_schedulers);
273 }
274
275
276 /**
277  * shutdown the scheduler running on the specified lcore
278  */
279 void lthread_scheduler_shutdown(unsigned lcoreid)
280 {
281         uint64_t coreid = (uint64_t) lcoreid;
282
283         if (coreid < LTHREAD_MAX_LCORES) {
284                 if (schedcore[coreid] != NULL)
285                         schedcore[coreid]->run_flag = 0;
286         }
287 }
288
289 /**
290  * shutdown all schedulers
291  */
292 void lthread_scheduler_shutdown_all(void)
293 {
294         uint64_t i;
295
296         /*
297          * give time for all schedulers to have started
298          * Note we use sched_yield() rather than pthread_yield() to allow
299          * for the possibility of a pthread wrapper on lthread_yield(),
300          * something that is not possible unless the scheduler is running.
301          */
302         while (rte_atomic16_read(&active_schedulers) <
303                rte_atomic16_read(&num_schedulers))
304                 sched_yield();
305
306         for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
307                 if (schedcore[i] != NULL)
308                         schedcore[i]->run_flag = 0;
309         }
310 }
311
312 /*
313  * Resume a suspended lthread
314  */
315 static __rte_always_inline void
316 _lthread_resume(struct lthread *lt);
317 static inline void _lthread_resume(struct lthread *lt)
318 {
319         struct lthread_sched *sched = THIS_SCHED;
320         struct lthread_stack *s;
321         uint64_t state = lt->state;
322 #if LTHREAD_DIAG
323         int init = 0;
324 #endif
325
326         sched->current_lthread = lt;
327
328         if (state & (BIT(ST_LT_CANCELLED) | BIT(ST_LT_EXITED))) {
329                 /* if detached we can free the thread now */
330                 if (state & BIT(ST_LT_DETACH)) {
331                         _lthread_free(lt);
332                         sched->current_lthread = NULL;
333                         return;
334                 }
335         }
336
337         if (state & BIT(ST_LT_INIT)) {
338                 /* first time this thread has been run */
339                 /* assign thread to this scheduler */
340                 lt->sched = THIS_SCHED;
341
342                 /* allocate stack */
343                 s = _stack_alloc();
344
345                 lt->stack_container = s;
346                 _lthread_set_stack(lt, s->stack, s->stack_size);
347
348                 /* allocate memory for TLS used by this thread */
349                 _lthread_tls_alloc(lt);
350
351                 lt->state = BIT(ST_LT_READY);
352 #if LTHREAD_DIAG
353                 init = 1;
354 #endif
355         }
356
357         DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESUMED, init, lt);
358
359         /* switch to the new thread */
360         ctx_switch(&lt->ctx, &sched->ctx);
361
362         /* If posting to a queue that could be read by another lcore
363          * we defer the queue write till now to ensure the context has been
364          * saved before the other core tries to resume it
365          * This applies to blocking on mutex, cond, and to set_affinity
366          */
367         if (lt->pending_wr_queue != NULL) {
368                 struct lthread_queue *dest = lt->pending_wr_queue;
369
370                 lt->pending_wr_queue = NULL;
371
372                 /* queue the current thread to the specified queue */
373                 _lthread_queue_insert_mp(dest, lt);
374         }
375
376         sched->current_lthread = NULL;
377 }
378
379 /*
380  * Handle sleep timer expiry
381 */
382 void
383 _sched_timer_cb(struct rte_timer *tim, void *arg)
384 {
385         struct lthread *lt = (struct lthread *) arg;
386         uint64_t state = lt->state;
387
388         DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_EXPIRED, &lt->tim, 0);
389
390         rte_timer_stop(tim);
391
392         if (lt->state & BIT(ST_LT_CANCELLED))
393                 (THIS_SCHED)->nb_blocked_threads--;
394
395         lt->state = state | BIT(ST_LT_EXPIRED);
396         _lthread_resume(lt);
397         lt->state = state & CLEARBIT(ST_LT_EXPIRED);
398 }
399
400
401
402 /*
403  * Returns 0 if there is a pending job in scheduler or 1 if done and can exit.
404  */
405 static inline int _lthread_sched_isdone(struct lthread_sched *sched)
406 {
407         return (sched->run_flag == 0) &&
408                         (_lthread_queue_empty(sched->ready)) &&
409                         (_lthread_queue_empty(sched->pready)) &&
410                         (sched->nb_blocked_threads == 0);
411 }
412
413 /*
414  * Wait for all schedulers to start
415  */
416 static inline void _lthread_schedulers_sync_start(void)
417 {
418         rte_atomic16_inc(&active_schedulers);
419
420         /* wait for lthread schedulers
421          * Note we use sched_yield() rather than pthread_yield() to allow
422          * for the possibility of a pthread wrapper on lthread_yield(),
423          * something that is not possible unless the scheduler is running.
424          */
425         while (rte_atomic16_read(&active_schedulers) <
426                rte_atomic16_read(&num_schedulers))
427                 sched_yield();
428
429 }
430
431 /*
432  * Wait for all schedulers to stop
433  */
434 static inline void _lthread_schedulers_sync_stop(void)
435 {
436         rte_atomic16_dec(&active_schedulers);
437         rte_atomic16_dec(&num_schedulers);
438
439         /* wait for schedulers
440          * Note we use sched_yield() rather than pthread_yield() to allow
441          * for the possibility of a pthread wrapper on lthread_yield(),
442          * something that is not possible unless the scheduler is running.
443          */
444         while (rte_atomic16_read(&active_schedulers) > 0)
445                 sched_yield();
446
447 }
448
449
450 /*
451  * Run the lthread scheduler
452  * This loop is the heart of the system
453  */
454 void lthread_run(void)
455 {
456
457         struct lthread_sched *sched = THIS_SCHED;
458         struct lthread *lt = NULL;
459
460         RTE_LOG(INFO, LTHREAD,
461                 "starting scheduler %p on lcore %u phys core %u\n",
462                 sched, rte_lcore_id(),
463                 rte_lcore_index(rte_lcore_id()));
464
465         /* if more than one, wait for all schedulers to start */
466         _lthread_schedulers_sync_start();
467
468
469         /*
470          * This is the main scheduling loop
471          * So long as there are tasks in existence we run this loop.
472          * We check for:-
473          *   expired timers,
474          *   the local ready queue,
475          *   and the peer ready queue,
476          *
477          * and resume lthreads ad infinitum.
478          */
479         while (!_lthread_sched_isdone(sched)) {
480
481                 rte_timer_manage();
482
483                 lt = _lthread_queue_poll(sched->ready);
484                 if (lt != NULL)
485                         _lthread_resume(lt);
486                 lt = _lthread_queue_poll(sched->pready);
487                 if (lt != NULL)
488                         _lthread_resume(lt);
489         }
490
491
492         /* if more than one wait for all schedulers to stop */
493         _lthread_schedulers_sync_stop();
494
495         (THIS_SCHED) = NULL;
496
497         RTE_LOG(INFO, LTHREAD,
498                 "stopping scheduler %p on lcore %u phys core %u\n",
499                 sched, rte_lcore_id(),
500                 rte_lcore_index(rte_lcore_id()));
501         fflush(stdout);
502 }
503
504 /*
505  * Return the scheduler for this lcore
506  *
507  */
508 struct lthread_sched *_lthread_sched_get(unsigned int lcore_id)
509 {
510         struct lthread_sched *res = NULL;
511
512         if (lcore_id < LTHREAD_MAX_LCORES)
513                 res = schedcore[lcore_id];
514
515         return res;
516 }
517
518 /*
519  * migrate the current thread to another scheduler running
520  * on the specified lcore.
521  */
522 int lthread_set_affinity(unsigned lcoreid)
523 {
524         struct lthread *lt = THIS_LTHREAD;
525         struct lthread_sched *dest_sched;
526
527         if (unlikely(lcoreid >= LTHREAD_MAX_LCORES))
528                 return POSIX_ERRNO(EINVAL);
529
530         DIAG_EVENT(lt, LT_DIAG_LTHREAD_AFFINITY, lcoreid, 0);
531
532         dest_sched = schedcore[lcoreid];
533
534         if (unlikely(dest_sched == NULL))
535                 return POSIX_ERRNO(EINVAL);
536
537         if (likely(dest_sched != THIS_SCHED)) {
538                 lt->sched = dest_sched;
539                 lt->pending_wr_queue = dest_sched->pready;
540                 _affinitize();
541                 return 0;
542         }
543         return 0;
544 }