vpp_lite: add cpu pinning support (VPP-467)

author Pavel Kotucek <pkotucek@cisco.com>

Fri, 7 Oct 2016 06:37:28 +0000 (08:37 +0200)

committer Damjan Marion <dmarion.lists@gmail.com>

Thu, 13 Oct 2016 08:13:37 +0000 (08:13 +0000)
author Pavel Kotucek <pkotucek@cisco.com>
Fri, 7 Oct 2016 06:37:28 +0000 (08:37 +0200)
committer Damjan Marion <dmarion.lists@gmail.com>
Thu, 13 Oct 2016 08:13:37 +0000 (08:13 +0000)
diff --git a/vlib/vlib/node_cli.c b/vlib/vlib/node_cli.c

index af9b47d..05d0f0b 100644 (file)
--- a/vlib/vlib/node_cli.c
+++ b/vlib/vlib/node_cli.c
@@ -337,9 +337,9 @@ show_node_runtime (vlib_main_t * vm,
               if (j > 0)
                 vlib_cli_output (vm, "---------------");
  
-             if (w->dpdk_lcore_id > -1)
+             if (w->lcore_id > -1)
                 vlib_cli_output (vm, "Thread %d %s (lcore %u)", j, w->name,
-                                w->dpdk_lcore_id);
+                                w->lcore_id);
               else
                 vlib_cli_output (vm, "Thread %d %s", j, w->name);
             }
diff --git a/vlib/vlib/threads.c b/vlib/vlib/threads.c

index e371699..70505b0 100644 (file)
--- a/vlib/vlib/threads.c
+++ b/vlib/vlib/threads.c
@@ -211,8 +211,9 @@ vlib_thread_init (vlib_main_t * vm)
    w = vlib_worker_threads;
    w->thread_mheap = clib_mem_get_heap ();
    w->thread_stack = vlib_thread_stacks[0];
-  w->dpdk_lcore_id = -1;
+  w->lcore_id = tm->main_lcore;
    w->lwp = syscall (SYS_gettid);
+  w->thread_id = pthread_self ();
    tm->n_vlib_mains = 1;
  
    if (tm->sched_policy != ~0)
@@ -510,15 +511,7 @@ vlib_worker_thread_bootstrap_fn (void *arg)
    vlib_worker_thread_t *w = arg;
  
    w->lwp = syscall (SYS_gettid);
-  w->dpdk_lcore_id = -1;
-#if DPDK==1
-  if (w->registration && !w->registration->use_pthreads && rte_socket_id)      /* do we really have dpdk linked */
-    {
-      unsigned lcore = rte_lcore_id ();
-      lcore = lcore < RTE_MAX_LCORE ? lcore : -1;
-      w->dpdk_lcore_id = lcore;
-    }
-#endif
+  w->thread_id = pthread_self ();
  
    rv = (void *) clib_calljmp
      ((uword (*)(uword)) w->thread_function,
@@ -532,6 +525,7 @@ vlib_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
  {
    void *(*fp_arg) (void *) = fp;
  
+  w->lcore_id = lcore_id;
  #if DPDK==1
    if (!w->registration->use_pthreads)
      if (rte_eal_remote_launch) /* do we have dpdk linked */
@@ -584,15 +578,6 @@ start_workers (vlib_main_t * vm)
        vlib_set_thread_name ((char *) w->name);
      }
  
-#if DPDK==1
-  w->dpdk_lcore_id = -1;
-  if (rte_socket_id)           /* do we really have dpdk linked */
-    {
-      unsigned lcore = rte_lcore_id ();
-      w->dpdk_lcore_id = lcore < RTE_MAX_LCORE ? lcore : -1;;
-    }
-#endif
-
    /*
     * Truth of the matter: we always use at least two
     * threads. So, make the main heap thread-safe
diff --git a/vlib/vlib/threads.h b/vlib/vlib/threads.h

index 589d1f3..e65794c 100644 (file)
--- a/vlib/vlib/threads.h
+++ b/vlib/vlib/threads.h
@@ -105,7 +105,8 @@ typedef struct
    u64 barrier_sync_count;
  
    long lwp;
-  int dpdk_lcore_id;
+  int lcore_id;
+  pthread_t thread_id;
  } vlib_worker_thread_t;
  
  vlib_worker_thread_t *vlib_worker_threads;
diff --git a/vlib/vlib/threads_cli.c b/vlib/vlib/threads_cli.c

index e788b04..631fe0c 100644 (file)
--- a/vlib/vlib/threads_cli.c
+++ b/vlib/vlib/threads_cli.c
@@ -12,12 +12,13 @@
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
+#define _GNU_SOURCE
  
  #include <vppinfra/format.h>
  #include <vlib/vlib.h>
  
  #include <vlib/threads.h>
-#include <linux/sched.h>
+#include <vlib/unix/unix.h>
  
  static u8 *
  format_sched_policy_and_priority (u8 * s, va_list * args)
@@ -62,15 +63,52 @@ show_threads_fn (vlib_main_t * vm,
  
        line = format (line, "%-25U", format_sched_policy_and_priority, w->lwp);
  
-#if DPDK==1
-      int lcore = w->dpdk_lcore_id;
-      if (lcore > -1)
+      int lcore = -1;
+      cpu_set_t cpuset;
+      CPU_ZERO (&cpuset);
+      int ret = -1;
+
+      ret =
+       pthread_getaffinity_np (w->thread_id, sizeof (cpu_set_t), &cpuset);
+      if (!ret)
+       {
+         int c;
+         for (c = 0; c < CPU_SETSIZE; c++)
+           if (CPU_ISSET (c, &cpuset))
+             {
+               if (lcore > -1)
+                 {
+                   lcore = -2;
+                   break;
+                 }
+               lcore = c;
+             }
+       }
+      else
         {
-         line = format (line, "%-7u%-7u%-7u",
-                        lcore,
-                        lcore_config[lcore].core_id,
-                        lcore_config[lcore].socket_id);
+         lcore = w->lcore_id;
+       }
  
+      if (lcore > -1)
+       {
+         const char *sys_cpu_path = "/sys/devices/system/cpu/cpu";
+         int socket_id = -1;
+         int core_id = -1;
+         u8 *p = 0;
+
+         p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, lcore, 0);
+         vlib_sysfs_read ((char *) p, "%d", &core_id);
+
+         vec_reset_length (p);
+         p =
+           format (p,
+                   "%s%u/topology/physical_package_id%c",
+                   sys_cpu_path, lcore, 0);
+         vlib_sysfs_read ((char *) p, "%d", &socket_id);
+         vec_free (p);
+
+         line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id);
+#if DPDK==1
           switch (lcore_config[lcore].state)
             {
             case WAIT:
@@ -85,8 +123,15 @@ show_threads_fn (vlib_main_t * vm,
             default:
               line = format (line, "unknown");
             }
-       }
  #endif
+       }
+      else
+       {
+         line =
+           format (line, "%-7s%-7s%-7s%", (lcore == -2) ? "M" : "n/a", "n/a",
+                   "n/a");
+       }
+
        vlib_cli_output (vm, "%v", line);
        vec_free (line);
      }
diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c

index 7941f9e..2683030 100644 (file)
--- a/vnet/vnet/devices/dpdk/cli.c
+++ b/vnet/vnet/devices/dpdk/cli.c
@@ -757,7 +757,7 @@ show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input,
        if (vec_len (dm->devices_by_cpu[cpu]))
         vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu,
                          vlib_worker_threads[cpu].name,
-                        vlib_worker_threads[cpu].dpdk_lcore_id);
+                        vlib_worker_threads[cpu].lcore_id);
  
        /* *INDENT-OFF* */
        vec_foreach(dq, dm->devices_by_cpu[cpu])
@@ -857,7 +857,7 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input,
                dq->queue_id = queue;
                dq->device = xd->device_index;
                xd->cpu_socket_id_by_queue[queue] =
-                rte_lcore_to_socket_id(vlib_worker_threads[cpu].dpdk_lcore_id);
+                rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id);
  
                vec_sort_with_function(dm->devices_by_cpu[i],
                                       dpdk_device_queue_sort);
@@ -907,7 +907,7 @@ show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input,
        if (vec_len (dm->devices_by_hqos_cpu[cpu]))
         vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu,
                          vlib_worker_threads[cpu].name,
-                        vlib_worker_threads[cpu].dpdk_lcore_id);
+                        vlib_worker_threads[cpu].lcore_id);
  
        vec_foreach (dq, dm->devices_by_hqos_cpu[cpu])
        {
diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c

index a5c056c..73edc4a 100644 (file)
--- a/vnet/vnet/devices/dpdk/init.c
+++ b/vnet/vnet/devices/dpdk/init.c
@@ -652,7 +652,7 @@ dpdk_lib_init (dpdk_main_t * dm)
           /* *INDENT-OFF* */
           clib_bitmap_foreach (i, devconf->workers, ({
             int cpu = dm->input_cpu_first_index + i;
-           unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
+           unsigned lcore = vlib_worker_threads[cpu].lcore_id;
             vec_validate(xd->cpu_socket_id_by_queue, q);
             xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore);
             vec_add2(dm->devices_by_cpu[cpu], dq, 1);
@@ -665,7 +665,7 @@ dpdk_lib_init (dpdk_main_t * dm)
         for (q = 0; q < xd->rx_q_used; q++)
           {
             int cpu = dm->input_cpu_first_index + next_cpu;
-           unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
+           unsigned lcore = vlib_worker_threads[cpu].lcore_id;
  
             /*
              * numa node for worker thread handling this queue
diff --git a/vnet/vnet/devices/dpdk/vhost_user.c b/vnet/vnet/devices/dpdk/vhost_user.c

index 9e53c96..46fae60 100644 (file)
--- a/vnet/vnet/devices/dpdk/vhost_user.c
+++ b/vnet/vnet/devices/dpdk/vhost_user.c
@@ -393,7 +393,7 @@ dpdk_create_vhost_user_if_internal (u32 * hw_if_index, u32 if_id, u8 * hwaddr)
      {
        int cpu = dm->input_cpu_first_index + (next_cpu % dm->input_cpu_count);
  
-      unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id;
+      unsigned lcore = vlib_worker_threads[cpu].lcore_id;
        vec_validate (xd->cpu_socket_id_by_queue, q);
        xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore);
  
diff --git a/vpp/conf/startup.conf b/vpp/conf/startup.conf

index 84a0264..bce0020 100644 (file)
--- a/vpp/conf/startup.conf
+++ b/vpp/conf/startup.conf
@@ -14,13 +14,30 @@ api-segment {
  }
  
  cpu {
+       ## In the VPP there is one main thread and optionally the user can create worker(s)
+       ## The main thread and worker thread(s) can be pinned to CPU core(s) manually or automatically
+
+       ## Manual pinning of thread(s) to CPU core(s)
+
         ## Set logical CPU core where main thread runs
         # main-core 1
  
         ## Set logical CPU core(s) where worker threads are running
-       ## by default there is no worker threads started
         # corelist-workers 2-3,18-19
  
+       ## Automatic pinning of thread(s) to CPU core(s)
+
+       ## Sets number of CPU core(s) to be skipped (1 ... N-1)
+       ## Skipped CPU core(s) are not used for pinning main thread and working thread(s).
+       ## The main thread is automatically pinned to the first available CPU core and worker(s)
+       ## are pinned to next free CPU core(s) after core assigned to main thread
+       # skip-cores 4
+
+       ## Specify a number of workers to be created
+       ## Workers are pinned to N consecutive CPU cores while skipping "skip-cores" CPU core(s)
+       ## and main thread's CPU core
+       # workers 2
+
         ## Set scheduling policy and priority of main and worker threads
  
         ## Scheduling policy options are: other (SCHED_OTHER), batch (SCHED_BATCH)
author	Pavel Kotucek <pkotucek@cisco.com>
	Fri, 7 Oct 2016 06:37:28 +0000 (08:37 +0200)
committer	Damjan Marion <dmarion.lists@gmail.com>
	Thu, 13 Oct 2016 08:13:37 +0000 (08:13 +0000)
vlib/vlib/node_cli.c		patch \| blob \| history
vlib/vlib/threads.c		patch \| blob \| history
vlib/vlib/threads.h		patch \| blob \| history
vlib/vlib/threads_cli.c		patch \| blob \| history
vnet/vnet/devices/dpdk/cli.c		patch \| blob \| history
vnet/vnet/devices/dpdk/init.c		patch \| blob \| history
vnet/vnet/devices/dpdk/vhost_user.c		patch \| blob \| history
vpp/conf/startup.conf		patch \| blob \| history