Add config option to use dlmalloc instead of mheap
[vpp.git] / src / vlib / threads.c
index 5005646..c53c5d6 100644 (file)
@@ -348,9 +348,13 @@ vlib_thread_init (vlib_main_t * vm)
     }
 
   /* grab cpu for main thread */
-  if (!tm->main_lcore)
+  if (tm->main_lcore == ~0)
     {
-      tm->main_lcore = clib_bitmap_first_set (avail_cpu);
+      /* if main-lcore is not set, we try to use lcore 1 */
+      if (clib_bitmap_get (avail_cpu, 1))
+       tm->main_lcore = 1;
+      else
+       tm->main_lcore = clib_bitmap_first_set (avail_cpu);
       if (tm->main_lcore == (u8) ~ 0)
        return clib_error_return (0, "no available cpus to be used for the"
                                  " main thread");
@@ -723,7 +727,6 @@ start_workers (vlib_main_t * vm)
   u32 n_vlib_mains = tm->n_vlib_mains;
   u32 worker_thread_index;
   u8 *main_heap = clib_mem_get_per_cpu_heap ();
-  mheap_t *main_heap_header = mheap_header (main_heap);
 
   vec_reset_length (vlib_worker_threads);
 
@@ -738,12 +741,6 @@ start_workers (vlib_main_t * vm)
       vlib_set_thread_name ((char *) w->name);
     }
 
-  /*
-   * Truth of the matter: we always use at least two
-   * threads. So, make the main heap thread-safe
-   * and make the event log thread-safe.
-   */
-  main_heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
   vm->elog_main.lock =
     clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
   vm->elog_main.lock[0] = 0;
@@ -797,9 +794,17 @@ start_workers (vlib_main_t * vm)
              vlib_node_t *n;
 
              vec_add2 (vlib_worker_threads, w, 1);
+             /* Currently unused, may not really work */
              if (tr->mheap_size)
-               w->thread_mheap =
-                 mheap_alloc (0 /* use VM */ , tr->mheap_size);
+               {
+#if USE_DLMALLOC == 0
+                 w->thread_mheap =
+                   mheap_alloc (0 /* use VM */ , tr->mheap_size);
+#else
+                 w->thread_mheap = create_mspace (tr->mheap_size,
+                                                  0 /* unlocked */ );
+#endif
+               }
              else
                w->thread_mheap = main_heap;
 
@@ -821,12 +826,14 @@ start_workers (vlib_main_t * vm)
              /* Fork vlib_global_main et al. Look for bugs here */
              oldheap = clib_mem_set_heap (w->thread_mheap);
 
-             vm_clone = clib_mem_alloc (sizeof (*vm_clone));
+             vm_clone = clib_mem_alloc_aligned (sizeof (*vm_clone),
+                                                CLIB_CACHE_LINE_BYTES);
              clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
 
              vm_clone->thread_index = worker_thread_index;
              vm_clone->heap_base = w->thread_mheap;
-             vm_clone->mbuf_alloc_list = 0;
+             vm_clone->heap_aligned_base = (void *)
+               (((uword) w->thread_mheap) & ~(VLIB_FRAME_ALIGN - 1));
              vm_clone->init_functions_called =
                hash_create (0, /* value bytes */ 0);
              vm_clone->pending_rpc_requests = 0;
@@ -838,7 +845,8 @@ start_workers (vlib_main_t * vm)
              nm = &vlib_mains[0]->node_main;
              nm_clone = &vm_clone->node_main;
              /* fork next frames array, preserving node runtime indices */
-             nm_clone->next_frames = vec_dup (nm->next_frames);
+             nm_clone->next_frames = vec_dup_aligned (nm->next_frames,
+                                                      CLIB_CACHE_LINE_BYTES);
              for (j = 0; j < vec_len (nm_clone->next_frames); j++)
                {
                  vlib_next_frame_t *nf = &nm_clone->next_frames[j];
@@ -875,7 +883,8 @@ start_workers (vlib_main_t * vm)
                  n++;
                }
              nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
-               vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+               vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+                                CLIB_CACHE_LINE_BYTES);
              vec_foreach (rt,
                           nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
              {
@@ -889,7 +898,8 @@ start_workers (vlib_main_t * vm)
              }
 
              nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
-               vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+               vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
+                                CLIB_CACHE_LINE_BYTES);
              vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
              {
                vlib_node_t *n = vlib_get_node (vm, rt->node_index);
@@ -901,7 +911,8 @@ start_workers (vlib_main_t * vm)
                                         n->runtime_data_bytes));
              }
 
-             nm_clone->processes = vec_dup (nm->processes);
+             nm_clone->processes = vec_dup_aligned (nm->processes,
+                                                    CLIB_CACHE_LINE_BYTES);
 
              /* zap the (per worker) frame freelists, etc */
              nm_clone->frame_sizes = 0;
@@ -912,10 +923,11 @@ start_workers (vlib_main_t * vm)
              clib_mem_set_heap (oldheap);
              vec_add1_aligned (vlib_mains, vm_clone, CLIB_CACHE_LINE_BYTES);
 
-             vm_clone->error_main.counters =
-               vec_dup (vlib_mains[0]->error_main.counters);
-             vm_clone->error_main.counters_last_clear =
-               vec_dup (vlib_mains[0]->error_main.counters_last_clear);
+             vm_clone->error_main.counters = vec_dup_aligned
+               (vlib_mains[0]->error_main.counters, CLIB_CACHE_LINE_BYTES);
+             vm_clone->error_main.counters_last_clear = vec_dup_aligned
+               (vlib_mains[0]->error_main.counters_last_clear,
+                CLIB_CACHE_LINE_BYTES);
 
              /* Fork the vlib_buffer_main_t free lists, etc. */
              orig_freelist_pool = vm_clone->buffer_free_list_pool;
@@ -950,8 +962,15 @@ start_workers (vlib_main_t * vm)
            {
              vec_add2 (vlib_worker_threads, w, 1);
              if (tr->mheap_size)
-               w->thread_mheap =
-                 mheap_alloc (0 /* use VM */ , tr->mheap_size);
+               {
+#if USE_DLMALLOC == 0
+                 w->thread_mheap =
+                   mheap_alloc (0 /* use VM */ , tr->mheap_size);
+#else
+                 w->thread_mheap =
+                   create_mspace (tr->mheap_size, 0 /* locked */ );
+#endif
+               }
              else
                w->thread_mheap = main_heap;
              w->thread_stack =
@@ -1093,7 +1112,8 @@ vlib_worker_thread_node_refork (void)
 
   nm_clone = &vm_clone->node_main;
   vec_free (nm_clone->next_frames);
-  nm_clone->next_frames = vec_dup (nm->next_frames);
+  nm_clone->next_frames = vec_dup_aligned (nm->next_frames,
+                                          CLIB_CACHE_LINE_BYTES);
 
   for (j = 0; j < vec_len (nm_clone->next_frames); j++)
     {
@@ -1161,7 +1181,8 @@ vlib_worker_thread_node_refork (void)
   /* re-clone internal nodes */
   old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL];
   nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
-    vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+    vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+                    CLIB_CACHE_LINE_BYTES);
 
   vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
   {
@@ -1187,7 +1208,8 @@ vlib_worker_thread_node_refork (void)
   /* re-clone input nodes */
   old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
   nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
-    vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+    vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
+                    CLIB_CACHE_LINE_BYTES);
 
   vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
   {
@@ -1210,7 +1232,8 @@ vlib_worker_thread_node_refork (void)
 
   vec_free (old_rt);
 
-  nm_clone->processes = vec_dup (nm->processes);
+  nm_clone->processes = vec_dup_aligned (nm->processes,
+                                        CLIB_CACHE_LINE_BYTES);
 }
 
 void
@@ -1253,6 +1276,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
   tm->n_thread_stacks = 1;     /* account for main thread */
   tm->sched_policy = ~0;
   tm->sched_priority = ~0;
+  tm->main_lcore = ~0;
 
   tr = tm->next;
 
@@ -1483,6 +1507,18 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm)
 
 }
 
+void vlib_stat_segment_lock (void) __attribute__ ((weak));
+void
+vlib_stat_segment_lock (void)
+{
+}
+
+void vlib_stat_segment_unlock (void) __attribute__ ((weak));
+void
+vlib_stat_segment_unlock (void)
+{
+}
+
 void
 vlib_worker_thread_barrier_release (vlib_main_t * vm)
 {
@@ -1512,6 +1548,13 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
   /* Update (all) node runtimes before releasing the barrier, if needed */
   if (vm->need_vlib_worker_thread_node_runtime_update)
     {
+      /*
+       * Lock stat segment here, so we's safe when
+       * rebuilding the stat segment node clones from the
+       * stat thread...
+       */
+      vlib_stat_segment_lock ();
+
       /* Do stats elements on main thread */
       worker_thread_node_runtime_update_internal ();
       vm->need_vlib_worker_thread_node_runtime_update = 0;
@@ -1553,6 +1596,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
              os_panic ();
            }
        }
+      vlib_stat_segment_unlock ();
     }
 
   t_closed_total = now - vm->barrier_epoch;