From bc20bdf2073accf423bef5bdba735c1c0ca0402c Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 17 Dec 2015 14:28:18 +0100 Subject: [PATCH] Store per-thread node error counters This fixed performance issue in muti-threaded setup due to sharing of the same cacheline between multiple threads Change-Id: I930ee44c17a83d4da350d15b4b97b8bb4633a9b0 Signed-off-by: Damjan Marion --- vlib/vlib/error.c | 62 ++++++++++++++++++++++++++++++++++++++++++----------- vlib/vlib/threads.c | 18 +++++++++++++--- 2 files changed, 65 insertions(+), 15 deletions(-) diff --git a/vlib/vlib/error.c b/vlib/vlib/error.c index 59b89cefc3a..b04ac916d8e 100644 --- a/vlib/vlib/error.c +++ b/vlib/vlib/error.c @@ -151,6 +151,8 @@ void vlib_register_errors (vlib_main_t * vm, vlib_node_t * n = vlib_get_node (vm, node_index); uword l; + ASSERT(os_get_cpu_number() == 0); + /* Free up any previous error strings. */ if (n->n_errors > 0) heap_dealloc (em->error_strings_heap, n->error_heap_handle); @@ -209,26 +211,59 @@ show_errors (vlib_main_t * vm, vlib_node_t * n; u32 code, i, ni; u64 c; + int index = 0; + int verbose = 0; + u64 * sums = 0; + + if (unformat (input, "verbose")) + verbose = 1; + + vec_validate(sums, vec_len(em->counters)); vlib_cli_output (vm, "%=16s%=40s%=20s", "Count", "Node", "Reason"); + foreach_vlib_main(({ + em = &this_vlib_main->error_main; + + if (verbose) + vlib_cli_output(vm, "Thread %u (%v):", index, vlib_worker_threads[index].name); + + for (ni = 0; ni < vec_len (this_vlib_main->node_main.nodes); ni++) + { + n = vlib_get_node (this_vlib_main, ni); + for (code = 0; code < n->n_errors; code++) + { + i = n->error_heap_index + code; + c = em->counters[i]; + if (i < vec_len (em->counters_last_clear)) + c -= em->counters_last_clear[i]; + sums[i] += c; + + if (c == 0 || !verbose) + continue; + + vlib_cli_output (vm, "%16Ld%=40v%s", c, n->name, em->error_strings_heap[i]); + } + } + index++; + })); + + if (verbose) + vlib_cli_output(vm, "Total:"); + for (ni = 0; ni < vec_len (vm->node_main.nodes); ni++) { n = vlib_get_node (vm, ni); for (code = 0; code < n->n_errors; code++) { i = n->error_heap_index + code; - c = em->counters[i]; - if (i < vec_len (em->counters_last_clear)) - c -= em->counters_last_clear[i]; - - if (c == 0) - continue; - - vlib_cli_output (vm, "%16Ld%=40v%s", c, n->name, em->error_strings_heap[i]); + if (sums[i]) + vlib_cli_output (vm, "%16Ld%=40v%s", sums[i], n->name, em->error_strings_heap[i]); } } + vec_free(sums); + return 0; } @@ -249,12 +284,15 @@ clear_error_counters (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - vlib_error_main_t * em = &vm->error_main; + vlib_error_main_t * em; u32 i; - vec_validate (em->counters_last_clear, vec_len (em->counters) - 1); - for (i = 0; i < vec_len (em->counters); i++) - em->counters_last_clear[i] = em->counters[i]; + foreach_vlib_main(({ + em = &this_vlib_main->error_main; + vec_validate (em->counters_last_clear, vec_len (em->counters) - 1); + for (i = 0; i < vec_len (em->counters); i++) + em->counters_last_clear[i] = em->counters[i]; + })); return 0; } diff --git a/vlib/vlib/threads.c b/vlib/vlib/threads.c index 4621f843dd5..405a4d644bc 100644 --- a/vlib/vlib/threads.c +++ b/vlib/vlib/threads.c @@ -70,7 +70,7 @@ os_get_cpu_number (void) /* Get any old stack address. */ sp = &sp; - n = ((uword)sp - (uword)vlib_thread_stacks[0]) + n = ((uword)sp - (uword)vlib_thread_stacks[0]) >> VLIB_LOG2_THREAD_STACK_SIZE; /* "processes" have their own stacks, and they always run in thread 0 */ @@ -675,6 +675,11 @@ static clib_error_t * start_workers (vlib_main_t * vm) unix_physmem_init (vm_clone, 0 /* physmem not required */); + vm_clone->error_main.counters = + vec_dup(vlib_mains[0]->error_main.counters); + vm_clone->error_main.counters_last_clear = + vec_dup(vlib_mains[0]->error_main.counters_last_clear); + /* Fork the vlib_buffer_main_t free lists, etc. */ bm_clone = vec_dup (vm_clone->buffer_main); vm_clone->buffer_main = bm_clone; @@ -817,16 +822,23 @@ void vlib_worker_thread_node_runtime_update(void) vlib_node_runtime_t * rt; w = vlib_worker_threads + i; oldheap = clib_mem_set_heap (w->thread_mheap); - + vm_clone = vlib_mains[i]; /* Re-clone error heap */ + u64 * old_counters = vm_clone->error_main.counters; + u64 * old_counters_all_clear = vm_clone->error_main.counters_last_clear; memcpy (&vm_clone->error_main, &vm->error_main, sizeof (vm->error_main)); + j = vec_len(vm->error_main.counters) - 1; + vec_validate_aligned(old_counters, j, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned(old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES); + vm_clone->error_main.counters = old_counters; + vm_clone->error_main.counters_last_clear = old_counters_all_clear; nm_clone = &vm_clone->node_main; vec_free (nm_clone->next_frames); nm_clone->next_frames = vec_dup (nm->next_frames); - + for (j = 0; j < vec_len (nm_clone->next_frames); j++) { vlib_next_frame_t *nf = &nm_clone->next_frames[j]; -- 2.16.6