From 1c6e5cf6905b2bd951982da9b712b13e2cb21295 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Mon, 5 Feb 2018 17:27:57 +0100 Subject: [PATCH] acl-plugin: an elog-based tracing implementation for troubleshooting the conn cleaner threads interactions This replaces some of the early-stage commented-out printf()s with an elog-based debug collector. It is aimed to be "better than nothing" initial implementation to be available in the field. It will be refined/updated based on use. This initial code is focused on the main/worker threads interactions, hence uses just the worker tracks. This code adds a developer debug CLI "set acl-plugin session table event-trace 1", which allows to gather the events pertaining to connection cleaning. The CLI is deliberately not part of the online help, as the express declaration that the semantics/trace levels, etc. are subject to change without notice. Change-Id: I3536309f737b73e50639cd5780822dcde667fc2c Signed-off-by: Andrew Yourtchenko --- src/plugins/acl/acl.c | 15 ++++++ src/plugins/acl/acl.h | 2 + src/plugins/acl/fa_node.c | 51 +++++++----------- src/plugins/acl/fa_node.h | 117 ++++++++++++++++++++++++++++++++++++++++++ test/test_acl_plugin_conns.py | 3 ++ 5 files changed, 156 insertions(+), 32 deletions(-) diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 8e1880a1d6b..1f1517c333d 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -2522,6 +2522,21 @@ acl_set_aclplugin_fn (vlib_main_t * vm, goto done; } } + if (unformat (input, "event-trace")) + { + if (!unformat (input, "%u", &val)) + { + error = clib_error_return (0, + "expecting trace level, got `%U`", + format_unformat_error, input); + goto done; + } + else + { + am->trace_sessions = val; + goto done; + } + } goto done; } if (unformat (input, "timeout")) diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index d38c12990de..41e216f089b 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -225,6 +225,8 @@ typedef struct { uword fa_conn_table_hash_memory_size; u64 fa_conn_table_max_entries; + int trace_sessions; + /* * If the cleaner has to delete more than this number * of connections, it halves the sleep time. diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index 3ac45c297ff..c8e3d2d3a3e 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -922,6 +922,7 @@ acl_fa_check_idle_sessions(acl_main_t *am, u16 thread_index, u64 now) && (acl_fa_conn_time_to_check(am, pw, now, thread_index, pw->fa_conn_list_head[tt]))) { fsid.session_index = pw->fa_conn_list_head[tt]; + elog_acl_maybe_trace_X2(am, "acl_fa_check_idle_sessions: expire session %d on thread %d", "i4i4", (u32)fsid.session_index, (u32)thread_index); vec_add1(pw->expired, fsid.session_index); acl_fa_conn_list_delete_session(am, fsid); } @@ -1410,9 +1411,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, u16 thread_index = os_get_thread_index (); acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; int num_expired; -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("\nacl_fa_worker_conn_cleaner: thread index %d now %lu\n\n", thread_index, now); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner interrupt: now %lu", "i8", now); /* allow another interrupt to be queued */ pw->interrupt_is_pending = 0; if (pw->clear_in_process) { @@ -1430,9 +1429,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, */ if ((pw->pending_clear_sw_if_index_bitmap == 0) || (pw->serviced_sw_if_index_bitmap == 0)) { -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("WORKER-CLEAR: someone tried to call clear, but one of the bitmaps are empty"); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, someone tried to call clear but one of the bitmaps are empty", "i8", now); clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap); } else { #ifdef FA_NODE_VERBOSE_DEBUG @@ -1446,9 +1443,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, if (clib_bitmap_is_zero(pw->pending_clear_sw_if_index_bitmap)) { /* if the cross-section is a zero vector, no need to do anything. */ -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("WORKER: clearing done - nothing to do"); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, clearing done, nothing to do", "i8", now); pw->clear_in_process = 0; } else { #ifdef FA_NODE_VERBOSE_DEBUG @@ -1456,6 +1451,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap, format_bitmap_hex, pw->serviced_sw_if_index_bitmap); #endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: swiping until %lu", "i8", now); /* swipe through the connection lists until enqueue timestamps become above "now" */ pw->swipe_end_time = now; } @@ -1463,18 +1459,15 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, } num_expired = acl_fa_check_idle_sessions(am, thread_index, now); // clib_warning("WORKER-CLEAR: checked %d sessions (clear_in_progress: %d)", num_expired, pw->clear_in_process); + elog_acl_maybe_trace_X2(am, "acl_fa_worker_conn_cleaner: checked %d sessions (clear_in_process: %d)", "i4i4", (u32)num_expired, (u32)pw->clear_in_process); if (pw->clear_in_process) { if (0 == num_expired) { /* we were clearing but we could not process any more connections. time to stop. */ clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap); pw->clear_in_process = 0; -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("WORKER: clearing done, all done"); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, clearing done - all done", "i8", now); } else { -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("WORKER-CLEAR: more work to do, raising interrupt"); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, more work to do - requesting interrupt", "i8", now); /* should continue clearing.. So could they please sent an interrupt again? */ pw->interrupt_is_needed = 1; } @@ -1492,6 +1485,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, pw->interrupt_is_needed = 0; pw->interrupt_is_unwanted = 0; } + elog_acl_maybe_trace_X3(am, "acl_fa_worker_conn_cleaner: now %lu, interrupt needed: %u, interrupt unwanted: %u", "i8i4i4", now, ((u32)pw->interrupt_is_needed), ((u32)pw->interrupt_is_unwanted)); } pw->interrupt_generation = am->fa_interrupt_generation; return 0; @@ -1505,6 +1499,7 @@ send_one_worker_interrupt (vlib_main_t * vm, acl_main_t *am, int thread_index) pw->interrupt_is_pending = 1; vlib_node_set_interrupt_pending (vlib_mains[thread_index], acl_fa_worker_session_cleaner_process_node.index); + elog_acl_maybe_trace_X1(am, "send_one_worker_interrupt: send interrupt to worker %d", "i4", ((u32)thread_index)); /* if the interrupt was requested, mark that done. */ /* pw->interrupt_is_needed = 0; */ } @@ -1562,9 +1557,8 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, for(tt = 0; tt < vec_len(pw->fa_conn_list_head); tt++) { u64 head_expiry = acl_fa_get_list_head_expiry_time(am, pw, now, ti, tt); if ((head_expiry < next_expire) && !pw->interrupt_is_pending) { -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("Head expiry: %lu, now: %lu, next_expire: %lu (worker: %d, tt: %d)", head_expiry, now, next_expire, ti, tt); -#endif + elog_acl_maybe_trace_X3(am, "acl_fa_session_cleaner_process: now %lu, worker: %d tt: %d", "i8i2i2", now, ti, tt); + elog_acl_maybe_trace_X2(am, "acl_fa_session_cleaner_process: head expiry: %lu, is earlier than curr next expire: %lu", "i8i8", head_expiry, next_expire); next_expire = head_expiry; } if (~0 != pw->fa_conn_list_head[tt]) { @@ -1577,6 +1571,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (!has_pending_conns && (0 == am->fa_total_enabled_count)) { am->fa_cleaner_cnt_wait_without_timeout++; + elog_acl_maybe_trace_X1(am, "acl_conn_cleaner: now %lu entering wait without timeout", "i8", now); (void) vlib_process_wait_for_event (vm); event_type = vlib_process_get_events (vm, &event_data); } @@ -1591,6 +1586,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, else { am->fa_cleaner_cnt_wait_with_timeout++; + elog_acl_maybe_trace_X2(am, "acl_conn_cleaner: now %lu entering wait with timeout %.6f sec", "i8f8", now, timeout); (void) vlib_process_wait_for_event_or_clock (vm, timeout); event_type = vlib_process_get_events (vm, &event_data); } @@ -1609,17 +1605,12 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, uword *clear_sw_if_index_bitmap = 0; uword *sw_if_index0; int clear_all = 0; -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX received"); -#endif + now = clib_cpu_time_now (); + elog_acl_maybe_trace_X1(am, "acl_fa_session_cleaner_process: now %lu, received ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX", "i8", now); vec_foreach (sw_if_index0, event_data) { am->fa_cleaner_cnt_delete_by_sw_index++; -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning - ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d", - *sw_if_index0); -#endif + elog_acl_maybe_trace_X1(am, "acl_fa_session_cleaner_process: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX %d", "i4", *sw_if_index0); if (*sw_if_index0 == ~0) { clear_all = 1; @@ -1639,9 +1630,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, CLIB_MEMORY_BARRIER (); while (pw0->clear_in_process) { CLIB_MEMORY_BARRIER (); -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d...", pw0 - am->per_worker_data); -#endif + elog_acl_maybe_trace_X1(am, "ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d", "i4", (u32)(pw0 - am->per_worker_data)); vlib_process_suspend(vm, 0.0001); if (pw0->interrupt_is_needed) { send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data)); @@ -1673,9 +1662,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, CLIB_MEMORY_BARRIER (); while (pw0->clear_in_process) { CLIB_MEMORY_BARRIER (); -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning("ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d...", pw0 - am->per_worker_data); -#endif + elog_acl_maybe_trace_X1(am, "ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d", "i4", (u32)(pw0 - am->per_worker_data)); vlib_process_suspend(vm, 0.0001); if (pw0->interrupt_is_needed) { send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data)); diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index 4f2d39b40e4..7ef558e17ea 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -172,4 +172,121 @@ void show_fa_sessions_hash(vlib_main_t * vm, u32 verbose); u8 *format_acl_plugin_5tuple (u8 * s, va_list * args); +/* use like: elog_acl_maybe_trace_X1(am, "foobar: %d", "i4", int32_value); */ + +#define elog_acl_maybe_trace_X1(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1) \ +do { \ + if (am->trace_sessions) { \ + CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1)]; } *static_check); \ + u16 thread_index = os_get_thread_index (); \ + vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = \ + { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED(struct \ + { \ + u16 thread; \ + typeof(acl_elog_val1) val1; \ + }) *ed; \ + ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + } \ +} while (0) + + +/* use like: elog_acl_maybe_trace_X2(am, "foobar: %d some u64: %lu", "i4i8", int32_value, int64_value); */ + +#define elog_acl_maybe_trace_X2(am, acl_elog_trace_format_label, acl_elog_trace_format_args, \ + acl_elog_val1, acl_elog_val2) \ +do { \ + if (am->trace_sessions) { \ + CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)]; } *static_check); \ + u16 thread_index = os_get_thread_index (); \ + vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = \ + { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED(struct \ + { \ + u16 thread; \ + typeof(acl_elog_val1) val1; \ + typeof(acl_elog_val2) val2; \ + }) *ed; \ + ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + ed->val2 = acl_elog_val2; \ + } \ +} while (0) + + +/* use like: elog_acl_maybe_trace_X3(am, "foobar: %d some u64 %lu baz: %d", "i4i8i4", int32_value, u64_value, int_value); */ + +#define elog_acl_maybe_trace_X3(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \ + acl_elog_val2, acl_elog_val3) \ +do { \ + if (am->trace_sessions) { \ + CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \ + - sizeof(acl_elog_val3)]; } *static_check); \ + u16 thread_index = os_get_thread_index (); \ + vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = \ + { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED(struct \ + { \ + u16 thread; \ + typeof(acl_elog_val1) val1; \ + typeof(acl_elog_val2) val2; \ + typeof(acl_elog_val3) val3; \ + }) *ed; \ + ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + ed->val2 = acl_elog_val2; \ + ed->val3 = acl_elog_val3; \ + } \ +} while (0) + + +/* use like: elog_acl_maybe_trace_X4(am, "foobar: %d some int %d baz: %d bar: %d", "i4i4i4i4", int32_value, int32_value2, int_value, int_value); */ + +#define elog_acl_maybe_trace_X4(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \ + acl_elog_val2, acl_elog_val3, acl_elog_val4) \ +do { \ + if (am->trace_sessions) { \ + CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \ + - sizeof(acl_elog_val3) -sizeof(acl_elog_val4)]; } *static_check); \ + u16 thread_index = os_get_thread_index (); \ + vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \ + ELOG_TYPE_DECLARE (e) = \ + { \ + .format = "(%02d) " acl_elog_trace_format_label, \ + .format_args = "i2" acl_elog_trace_format_args, \ + }; \ + CLIB_PACKED(struct \ + { \ + u16 thread; \ + typeof(acl_elog_val1) val1; \ + typeof(acl_elog_val2) val2; \ + typeof(acl_elog_val3) val3; \ + typeof(acl_elog_val4) val4; \ + }) *ed; \ + ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \ + ed->thread = thread_index; \ + ed->val1 = acl_elog_val1; \ + ed->val2 = acl_elog_val2; \ + ed->val3 = acl_elog_val3; \ + ed->val4 = acl_elog_val4; \ + } \ +} while (0) + + #endif diff --git a/test/test_acl_plugin_conns.py b/test/test_acl_plugin_conns.py index 43e8b69f930..ce02a1d3a22 100644 --- a/test/test_acl_plugin_conns.py +++ b/test/test_acl_plugin_conns.py @@ -133,6 +133,8 @@ class ACLPluginConnTestCase(VppTestCase): super(ACLPluginConnTestCase, self).setUpClass() # create pg0 and pg1 self.create_pg_interfaces(range(2)) + cmd = "set acl-plugin session table event-trace 1" + self.logger.info(self.vapi.cli(cmd)) for i in self.pg_interfaces: i.admin_up() i.config_ip4() @@ -151,6 +153,7 @@ class ACLPluginConnTestCase(VppTestCase): self.logger.info(self.vapi.cli("show acl-plugin acl")) self.logger.info(self.vapi.cli("show acl-plugin interface")) self.logger.info(self.vapi.cli("show acl-plugin tables")) + self.logger.info(self.vapi.cli("show event-logger all")) def run_basic_conn_test(self, af, acl_side): """ Basic conn timeout test """ -- 2.16.6