Add congestion drop in interface handoff

[vpp.git] / src / vlib / threads.c
diff --git a/src/vlib/threads.c b/src/vlib/threads.c

index c17fde4..70f22b1 100644 (file)
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -24,7 +24,7 @@
  
  DECLARE_CJ_GLOBAL_LOG;
  
-#define FRAME_QUEUE_NELTS 32
+#define FRAME_QUEUE_NELTS 64
  
  u32
  vl (void *p)
@@ -337,10 +337,6 @@ vlib_thread_init (vlib_main_t * vm)
  
    avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap);
  
-  /* by default we skip core 0, unless it is the only one available */
-  if (tm->skip_cores == ~0)
-    tm->skip_cores = (clib_bitmap_count_set_bits (avail_cpu) < 2) ? 0 : 1;
-
    /* skip cores */
    for (i = 0; i < tm->skip_cores; i++)
      {
@@ -352,9 +348,13 @@ vlib_thread_init (vlib_main_t * vm)
      }
  
    /* grab cpu for main thread */
-  if (!tm->main_lcore)
+  if (tm->main_lcore == ~0)
      {
-      tm->main_lcore = clib_bitmap_first_set (avail_cpu);
+      /* if main-lcore is not set, we try to use lcore 1 */
+      if (clib_bitmap_get (avail_cpu, 1))
+       tm->main_lcore = 1;
+      else
+       tm->main_lcore = clib_bitmap_first_set (avail_cpu);
        if (tm->main_lcore == (u8) ~ 0)
         return clib_error_return (0, "no available cpus to be used for the"
                                   " main thread");
@@ -376,6 +376,13 @@ vlib_thread_init (vlib_main_t * vm)
      {
        tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore);
      }
+  else
+    {
+      cpu_set_t cpuset;
+      CPU_ZERO (&cpuset);
+      CPU_SET (tm->main_lcore, &cpuset);
+      pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
+    }
  
    /* as many threads as stacks... */
    vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1,
@@ -720,7 +727,6 @@ start_workers (vlib_main_t * vm)
    u32 n_vlib_mains = tm->n_vlib_mains;
    u32 worker_thread_index;
    u8 *main_heap = clib_mem_get_per_cpu_heap ();
-  mheap_t *main_heap_header = mheap_header (main_heap);
  
    vec_reset_length (vlib_worker_threads);
  
@@ -735,12 +741,6 @@ start_workers (vlib_main_t * vm)
        vlib_set_thread_name ((char *) w->name);
      }
  
-  /*
-   * Truth of the matter: we always use at least two
-   * threads. So, make the main heap thread-safe
-   * and make the event log thread-safe.
-   */
-  main_heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
    vm->elog_main.lock =
      clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
    vm->elog_main.lock[0] = 0;
@@ -794,9 +794,17 @@ start_workers (vlib_main_t * vm)
               vlib_node_t *n;
  
               vec_add2 (vlib_worker_threads, w, 1);
+             /* Currently unused, may not really work */
               if (tr->mheap_size)
-               w->thread_mheap =
-                 mheap_alloc (0 /* use VM */ , tr->mheap_size);
+               {
+#if USE_DLMALLOC == 0
+                 w->thread_mheap =
+                   mheap_alloc (0 /* use VM */ , tr->mheap_size);
+#else
+                 w->thread_mheap = create_mspace (tr->mheap_size,
+                                                  0 /* unlocked */ );
+#endif
+               }
               else
                 w->thread_mheap = main_heap;
  
@@ -824,6 +832,8 @@ start_workers (vlib_main_t * vm)
  
               vm_clone->thread_index = worker_thread_index;
               vm_clone->heap_base = w->thread_mheap;
+             vm_clone->heap_aligned_base = (void *)
+               (((uword) w->thread_mheap) & ~(VLIB_FRAME_ALIGN - 1));
               vm_clone->init_functions_called =
                 hash_create (0, /* value bytes */ 0);
               vm_clone->pending_rpc_requests = 0;
@@ -952,8 +962,15 @@ start_workers (vlib_main_t * vm)
             {
               vec_add2 (vlib_worker_threads, w, 1);
               if (tr->mheap_size)
-               w->thread_mheap =
-                 mheap_alloc (0 /* use VM */ , tr->mheap_size);
+               {
+#if USE_DLMALLOC == 0
+                 w->thread_mheap =
+                   mheap_alloc (0 /* use VM */ , tr->mheap_size);
+#else
+                 w->thread_mheap =
+                   create_mspace (tr->mheap_size, 0 /* locked */ );
+#endif
+               }
               else
                 w->thread_mheap = main_heap;
               w->thread_stack =
@@ -1259,7 +1276,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
    tm->n_thread_stacks = 1;     /* account for main thread */
    tm->sched_policy = ~0;
    tm->sched_priority = ~0;
-  tm->skip_cores = ~0;
+  tm->main_lcore = ~0;
  
    tr = tm->next;
  
@@ -1778,16 +1795,29 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
    if (frame_queue_nelts == 0)
      frame_queue_nelts = FRAME_QUEUE_NELTS;
  
+  ASSERT (frame_queue_nelts >= 8);
+
    vec_add2 (tm->frame_queue_mains, fqm, 1);
  
    fqm->node_index = node_index;
+  fqm->frame_queue_nelts = frame_queue_nelts;
+  fqm->queue_hi_thresh = frame_queue_nelts - 2;
  
    vec_validate (fqm->vlib_frame_queues, tm->n_vlib_mains - 1);
+  vec_validate (fqm->per_thread_data, tm->n_vlib_mains - 1);
    _vec_len (fqm->vlib_frame_queues) = 0;
    for (i = 0; i < tm->n_vlib_mains; i++)
      {
+      vlib_frame_queue_per_thread_data_t *ptd;
        fq = vlib_frame_queue_alloc (frame_queue_nelts);
        vec_add1 (fqm->vlib_frame_queues, fq);
+
+      ptd = vec_elt_at_index (fqm->per_thread_data, i);
+      vec_validate (ptd->handoff_queue_elt_by_thread_index,
+                   tm->n_vlib_mains - 1);
+      vec_validate_init_empty (ptd->congested_handoff_queue_by_thread_index,
+                              tm->n_vlib_mains - 1,
+                              (vlib_frame_queue_t *) (~0));
      }
  
    return (fqm - tm->frame_queue_mains);