From 9876520f9ba746ed4d9923f392911c4f1888a105 Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Fri, 7 Oct 2016 08:37:28 +0200 Subject: [PATCH] vpp_lite: add cpu pinning support (VPP-467) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Proper cpu pinning in vpp_lite platform, like in normal vpp image. Extended “show threads” command to show propper information. Changed handling of coreID and socketID for threads in "show threads" CLI, pthread_getaffinity is used instead of info stored in DPDK. Change-Id: Ic8299ec5e284472bb10a37a95fadeed57b6edae8 Signed-off-by: Pavel Kotucek --- vlib/vlib/node_cli.c | 4 +-- vlib/vlib/threads.c | 23 +++----------- vlib/vlib/threads.h | 3 +- vlib/vlib/threads_cli.c | 63 +++++++++++++++++++++++++++++++------ vnet/vnet/devices/dpdk/cli.c | 6 ++-- vnet/vnet/devices/dpdk/init.c | 4 +-- vnet/vnet/devices/dpdk/vhost_user.c | 2 +- vpp/conf/startup.conf | 19 ++++++++++- 8 files changed, 86 insertions(+), 38 deletions(-) diff --git a/vlib/vlib/node_cli.c b/vlib/vlib/node_cli.c index af9b47dd05d..05d0f0b5a95 100644 --- a/vlib/vlib/node_cli.c +++ b/vlib/vlib/node_cli.c @@ -337,9 +337,9 @@ show_node_runtime (vlib_main_t * vm, if (j > 0) vlib_cli_output (vm, "---------------"); - if (w->dpdk_lcore_id > -1) + if (w->lcore_id > -1) vlib_cli_output (vm, "Thread %d %s (lcore %u)", j, w->name, - w->dpdk_lcore_id); + w->lcore_id); else vlib_cli_output (vm, "Thread %d %s", j, w->name); } diff --git a/vlib/vlib/threads.c b/vlib/vlib/threads.c index e371699f1d4..70505b072ff 100644 --- a/vlib/vlib/threads.c +++ b/vlib/vlib/threads.c @@ -211,8 +211,9 @@ vlib_thread_init (vlib_main_t * vm) w = vlib_worker_threads; w->thread_mheap = clib_mem_get_heap (); w->thread_stack = vlib_thread_stacks[0]; - w->dpdk_lcore_id = -1; + w->lcore_id = tm->main_lcore; w->lwp = syscall (SYS_gettid); + w->thread_id = pthread_self (); tm->n_vlib_mains = 1; if (tm->sched_policy != ~0) @@ -510,15 +511,7 @@ vlib_worker_thread_bootstrap_fn (void *arg) vlib_worker_thread_t *w = arg; w->lwp = syscall (SYS_gettid); - w->dpdk_lcore_id = -1; -#if DPDK==1 - if (w->registration && !w->registration->use_pthreads && rte_socket_id) /* do we really have dpdk linked */ - { - unsigned lcore = rte_lcore_id (); - lcore = lcore < RTE_MAX_LCORE ? lcore : -1; - w->dpdk_lcore_id = lcore; - } -#endif + w->thread_id = pthread_self (); rv = (void *) clib_calljmp ((uword (*)(uword)) w->thread_function, @@ -532,6 +525,7 @@ vlib_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) { void *(*fp_arg) (void *) = fp; + w->lcore_id = lcore_id; #if DPDK==1 if (!w->registration->use_pthreads) if (rte_eal_remote_launch) /* do we have dpdk linked */ @@ -584,15 +578,6 @@ start_workers (vlib_main_t * vm) vlib_set_thread_name ((char *) w->name); } -#if DPDK==1 - w->dpdk_lcore_id = -1; - if (rte_socket_id) /* do we really have dpdk linked */ - { - unsigned lcore = rte_lcore_id (); - w->dpdk_lcore_id = lcore < RTE_MAX_LCORE ? lcore : -1;; - } -#endif - /* * Truth of the matter: we always use at least two * threads. So, make the main heap thread-safe diff --git a/vlib/vlib/threads.h b/vlib/vlib/threads.h index 589d1f3a1ec..e65794cfb6b 100644 --- a/vlib/vlib/threads.h +++ b/vlib/vlib/threads.h @@ -105,7 +105,8 @@ typedef struct u64 barrier_sync_count; long lwp; - int dpdk_lcore_id; + int lcore_id; + pthread_t thread_id; } vlib_worker_thread_t; vlib_worker_thread_t *vlib_worker_threads; diff --git a/vlib/vlib/threads_cli.c b/vlib/vlib/threads_cli.c index e788b04b795..631fe0c6244 100644 --- a/vlib/vlib/threads_cli.c +++ b/vlib/vlib/threads_cli.c @@ -12,12 +12,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#define _GNU_SOURCE #include #include #include -#include +#include static u8 * format_sched_policy_and_priority (u8 * s, va_list * args) @@ -62,15 +63,52 @@ show_threads_fn (vlib_main_t * vm, line = format (line, "%-25U", format_sched_policy_and_priority, w->lwp); -#if DPDK==1 - int lcore = w->dpdk_lcore_id; - if (lcore > -1) + int lcore = -1; + cpu_set_t cpuset; + CPU_ZERO (&cpuset); + int ret = -1; + + ret = + pthread_getaffinity_np (w->thread_id, sizeof (cpu_set_t), &cpuset); + if (!ret) + { + int c; + for (c = 0; c < CPU_SETSIZE; c++) + if (CPU_ISSET (c, &cpuset)) + { + if (lcore > -1) + { + lcore = -2; + break; + } + lcore = c; + } + } + else { - line = format (line, "%-7u%-7u%-7u", - lcore, - lcore_config[lcore].core_id, - lcore_config[lcore].socket_id); + lcore = w->lcore_id; + } + if (lcore > -1) + { + const char *sys_cpu_path = "/sys/devices/system/cpu/cpu"; + int socket_id = -1; + int core_id = -1; + u8 *p = 0; + + p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, lcore, 0); + vlib_sysfs_read ((char *) p, "%d", &core_id); + + vec_reset_length (p); + p = + format (p, + "%s%u/topology/physical_package_id%c", + sys_cpu_path, lcore, 0); + vlib_sysfs_read ((char *) p, "%d", &socket_id); + vec_free (p); + + line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id); +#if DPDK==1 switch (lcore_config[lcore].state) { case WAIT: @@ -85,8 +123,15 @@ show_threads_fn (vlib_main_t * vm, default: line = format (line, "unknown"); } - } #endif + } + else + { + line = + format (line, "%-7s%-7s%-7s%", (lcore == -2) ? "M" : "n/a", "n/a", + "n/a"); + } + vlib_cli_output (vm, "%v", line); vec_free (line); } diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c index 7941f9e0e16..2683030658a 100644 --- a/vnet/vnet/devices/dpdk/cli.c +++ b/vnet/vnet/devices/dpdk/cli.c @@ -757,7 +757,7 @@ show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, if (vec_len (dm->devices_by_cpu[cpu])) vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, vlib_worker_threads[cpu].name, - vlib_worker_threads[cpu].dpdk_lcore_id); + vlib_worker_threads[cpu].lcore_id); /* *INDENT-OFF* */ vec_foreach(dq, dm->devices_by_cpu[cpu]) @@ -857,7 +857,7 @@ set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, dq->queue_id = queue; dq->device = xd->device_index; xd->cpu_socket_id_by_queue[queue] = - rte_lcore_to_socket_id(vlib_worker_threads[cpu].dpdk_lcore_id); + rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id); vec_sort_with_function(dm->devices_by_cpu[i], dpdk_device_queue_sort); @@ -907,7 +907,7 @@ show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, if (vec_len (dm->devices_by_hqos_cpu[cpu])) vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, vlib_worker_threads[cpu].name, - vlib_worker_threads[cpu].dpdk_lcore_id); + vlib_worker_threads[cpu].lcore_id); vec_foreach (dq, dm->devices_by_hqos_cpu[cpu]) { diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c index a5c056c6a68..73edc4a97a5 100644 --- a/vnet/vnet/devices/dpdk/init.c +++ b/vnet/vnet/devices/dpdk/init.c @@ -652,7 +652,7 @@ dpdk_lib_init (dpdk_main_t * dm) /* *INDENT-OFF* */ clib_bitmap_foreach (i, devconf->workers, ({ int cpu = dm->input_cpu_first_index + i; - unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; vec_validate(xd->cpu_socket_id_by_queue, q); xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); vec_add2(dm->devices_by_cpu[cpu], dq, 1); @@ -665,7 +665,7 @@ dpdk_lib_init (dpdk_main_t * dm) for (q = 0; q < xd->rx_q_used; q++) { int cpu = dm->input_cpu_first_index + next_cpu; - unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; /* * numa node for worker thread handling this queue diff --git a/vnet/vnet/devices/dpdk/vhost_user.c b/vnet/vnet/devices/dpdk/vhost_user.c index 9e53c96f599..46fae60dac7 100644 --- a/vnet/vnet/devices/dpdk/vhost_user.c +++ b/vnet/vnet/devices/dpdk/vhost_user.c @@ -393,7 +393,7 @@ dpdk_create_vhost_user_if_internal (u32 * hw_if_index, u32 if_id, u8 * hwaddr) { int cpu = dm->input_cpu_first_index + (next_cpu % dm->input_cpu_count); - unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; vec_validate (xd->cpu_socket_id_by_queue, q); xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); diff --git a/vpp/conf/startup.conf b/vpp/conf/startup.conf index 84a026474c9..bce002027bf 100644 --- a/vpp/conf/startup.conf +++ b/vpp/conf/startup.conf @@ -14,13 +14,30 @@ api-segment { } cpu { + ## In the VPP there is one main thread and optionally the user can create worker(s) + ## The main thread and worker thread(s) can be pinned to CPU core(s) manually or automatically + + ## Manual pinning of thread(s) to CPU core(s) + ## Set logical CPU core where main thread runs # main-core 1 ## Set logical CPU core(s) where worker threads are running - ## by default there is no worker threads started # corelist-workers 2-3,18-19 + ## Automatic pinning of thread(s) to CPU core(s) + + ## Sets number of CPU core(s) to be skipped (1 ... N-1) + ## Skipped CPU core(s) are not used for pinning main thread and working thread(s). + ## The main thread is automatically pinned to the first available CPU core and worker(s) + ## are pinned to next free CPU core(s) after core assigned to main thread + # skip-cores 4 + + ## Specify a number of workers to be created + ## Workers are pinned to N consecutive CPU cores while skipping "skip-cores" CPU core(s) + ## and main thread's CPU core + # workers 2 + ## Set scheduling policy and priority of main and worker threads ## Scheduling policy options are: other (SCHED_OTHER), batch (SCHED_BATCH) -- 2.16.6