From f988e696149f42828444c69762c036d9684b6bb0 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Mon, 27 Nov 2017 04:34:14 -0500 Subject: [PATCH] tcp: fix retransmissions under buffer shortage - add debugging scaffolding for simulating buffer shortage Change-Id: Ice519d74f9c4e4094c4586c548185135b7bb5f2d Signed-off-by: Florin Coras --- src/vnet/session/session_node.c | 3 +-- src/vnet/tcp/tcp.c | 7 ++++-- src/vnet/tcp/tcp.h | 12 +++++++++ src/vnet/tcp/tcp_debug.h | 34 +++++++++++++++++++++++++ src/vnet/tcp/tcp_input.c | 5 ++-- src/vnet/tcp/tcp_output.c | 56 ++++++++++++++++++++--------------------- src/vnet/util/trajectory.c | 6 ++++- 7 files changed, 88 insertions(+), 35 deletions(-) diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index 78116178d2b..ce460e9a94c 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -325,8 +325,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, /* *INDENT-ON* */ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - if (VLIB_BUFFER_TRACE_TRAJECTORY) - b0->pre_data[1] = 3; + tcp_trajectory_add_start (b0, 3); if (PREDICT_FALSE (n_trace > 0)) { diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index a0a5f190fa4..b16b2a7dfb2 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -233,6 +233,8 @@ tcp_connection_reset (tcp_connection_t * tc) tcp_connection_cleanup (tc); break; case TCP_STATE_ESTABLISHED: + stream_session_reset_notify (&tc->connection); + /* fall through */ case TCP_STATE_CLOSE_WAIT: case TCP_STATE_FIN_WAIT_1: case TCP_STATE_FIN_WAIT_2: @@ -242,7 +244,6 @@ tcp_connection_reset (tcp_connection_t * tc) /* Make sure all timers are cleared */ tcp_connection_timers_reset (tc); - stream_session_reset_notify (&tc->connection); /* Wait for cleanup from session layer but not forever */ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); @@ -1319,7 +1320,9 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) &tm->local_endpoints_table_buckets)) ; - + else if (unformat (input, "buffer-fail-fraction %f", + &tm->buffer_fail_fraction)) + ; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index b057b883fd8..1ddfac0ce96 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -410,6 +410,9 @@ typedef struct _tcp_main u8 punt_unknown4; u8 punt_unknown6; + + /** fault-injection */ + f64 buffer_fail_fraction; } tcp_main_t; extern tcp_main_t tcp_main; @@ -432,6 +435,15 @@ tcp_buffer_hdr (vlib_buffer_t * b) + vnet_buffer (b)->tcp.hdr_offset); } +#if (VLIB_BUFFER_TRACE_TRAJECTORY) +#define tcp_trajectory_add_start(b, start) \ +{ \ + (*vlib_buffer_trace_trajectory_cb) (b, start); \ +} +#else +#define tcp_trajectory_add_start(b, start) +#endif + clib_error_t *vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en); void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index eb318cde4b9..5d4f7d6879d 100755 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -22,6 +22,7 @@ #define TCP_DEBUG_SM (0) #define TCP_DEBUG_CC (0) #define TCP_DEBUG_CC_STAT (1) +#define TCP_DEBUG_BUFFER_ALLOCATION (0) #define foreach_tcp_dbg_evt \ _(INIT, "") \ @@ -747,6 +748,39 @@ if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \ } \ } +/* + * Buffer allocation + */ +#if TCP_DEBUG_BUFFER_ALLOCATION + +#define TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL(thread_index) \ +{ \ + static u32 *buffer_fail_counters; \ + if (PREDICT_FALSE (buffer_fail_counters == 0)) \ + { \ + u32 num_threads; \ + vlib_thread_main_t *vtm = vlib_get_thread_main (); \ + num_threads = 1 /* main thread */ + vtm->n_threads; \ + vec_validate (buffer_fail_counters, num_threads - 1); \ + } \ + if (PREDICT_FALSE (tcp_main.buffer_fail_fraction != 0.0)) \ + { \ + if (PREDICT_TRUE (buffer_fail_counters[thread_index] > 0)) \ + { \ + if ((1.0 / (f32) (buffer_fail_counters[thread_index])) \ + < tcp_main.buffer_fail_fraction) \ + { \ + buffer_fail_counters[thread_index] = 0.0000001; \ + return -1; \ + } \ + } \ + buffer_fail_counters[thread_index] ++; \ + } \ +} +#else +#define TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL(thread_index) +#endif + #else #define TCP_EVT_CC_STAT_HANDLER(_tc, ...) #endif diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 614b94a4b06..702a94f3a98 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -3001,6 +3001,9 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto done; } + vnet_buffer (b0)->tcp.hdr_offset = (u8 *) tcp0 + - (u8 *) vlib_buffer_get_current (b0); + /* Session exists */ if (PREDICT_TRUE (0 != tconn)) { @@ -3014,8 +3017,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b0)->tcp.ack_number = clib_net_to_host_u32 (tcp0->ack_number); - vnet_buffer (b0)->tcp.hdr_offset = (u8 *) tcp0 - - (u8 *) vlib_buffer_get_current (b0); vnet_buffer (b0)->tcp.data_offset = n_advance_bytes0; vnet_buffer (b0)->tcp.data_len = n_data_bytes0; diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 3509ad4701d..f377c912073 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -463,6 +463,9 @@ tcp_get_free_buffer_index (tcp_main_t * tm, u32 * bidx) { u32 *my_tx_buffers; u32 thread_index = vlib_get_thread_index (); + + TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL (thread_index); + if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0)) { if (tcp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE)) @@ -504,7 +507,7 @@ tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->total_length_not_including_first_buffer = 0; vnet_buffer (b)->tcp.flags = 0; - + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); /* Leave enough space for headers */ return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); } @@ -590,9 +593,6 @@ tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b) initial_wnd); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; tcp_options_write ((u8 *) (th + 1), &snd_opts); - - tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, - tc->rto * TCP_TO_TIMER_TICK); } /** @@ -641,15 +641,11 @@ tcp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, b->error = 0; /* Default FIB for now */ - vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; /* Send to IP lookup */ next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; - if (VLIB_BUFFER_TRACE_TRAJECTORY > 0) - { - b->pre_data[0] = 2; - b->pre_data[1] = next_index; - } + tcp_trajectory_add_start (b, 1); f = tm->ip_lookup_tx_frames[!is_ip4][thread_index]; if (!f) @@ -697,11 +693,7 @@ tcp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, /* Decide where to send the packet */ next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index; - if (VLIB_BUFFER_TRACE_TRAJECTORY > 0) - { - b->pre_data[0] = 1; - b->pre_data[1] = next_index; - } + tcp_trajectory_add_start (b, 2); /* Get frame to v4/6 output node */ f = tm->tx_frames[!is_ip4][thread_index]; @@ -791,6 +783,7 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0, } tcp_reuse_buffer (vm, b0); + tcp_trajectory_add_start (b0, 4); th0 = vlib_buffer_push_tcp_net_order (b0, dst_port, src_port, seq, ack, sizeof (tcp_header_t), flags, 0); @@ -977,6 +970,14 @@ tcp_send_syn (tcp_connection_t * tc) tcp_main_t *tm = vnet_get_tcp_main (); vlib_main_t *vm = vlib_get_main (); + /* + * Setup retransmit and establish timers before requesting buffer + * such that we can return if we've ran out. + */ + tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME); + tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, + tc->rto * TCP_TO_TIMER_TICK); + if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) return; @@ -989,9 +990,6 @@ tcp_send_syn (tcp_connection_t * tc) tc->rtt_seq = tc->snd_nxt; tc->rto_boff = 0; - /* Set the connection establishment timer */ - tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME); - tcp_push_ip_hdr (tm, tc, b); tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc); @@ -1055,6 +1053,7 @@ tcp_send_fin (tcp_connection_t * tc) u32 bi; u8 fin_snt = 0; + tcp_retransmit_timer_force_update (tc); if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) return; b = vlib_get_buffer (vm, bi); @@ -1075,7 +1074,6 @@ tcp_send_fin (tcp_connection_t * tc) { tc->snd_nxt = tc->snd_una_max; } - tcp_retransmit_timer_force_update (tc); TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc); } @@ -1378,9 +1376,11 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (tc->state >= TCP_STATE_ESTABLISHED) { /* Lost FIN, retransmit and return */ - if (tcp_is_lost_fin (tc)) + if (tc->state == TCP_STATE_FIN_WAIT_1) { tcp_send_fin (tc); + tc->rto_boff += 1; + tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); return; } @@ -1419,12 +1419,6 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (n_bytes == 0) { - ASSERT (!b); - if (tc->snd_una == tc->snd_una_max) - return; - ASSERT (tc->rto_boff > 1 && tc->snd_una == tc->snd_congestion); - clib_warning ("retransmit fail: %U", format_tcp_connection, tc, 2); - /* Try again eventually */ tcp_retransmit_timer_set (tc); return; } @@ -1460,6 +1454,9 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (tc->rto_boff > TCP_RTO_SYN_RETRIES) tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, + tc->rto * TCP_TO_TIMER_TICK); + if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) return; @@ -1483,7 +1480,10 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tc->rtt_ts = 0; if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) - return; + { + tcp_retransmit_timer_force_update (tc); + return; + } b = vlib_get_buffer (vm, bi); tcp_make_synack (tc, b); @@ -2037,7 +2037,7 @@ tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } /* Prepare to send to IP lookup */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0; next0 = TCP_RESET_NEXT_IP_LOOKUP; done: diff --git a/src/vnet/util/trajectory.c b/src/vnet/util/trajectory.c index 24b51254ecd..91812dcba58 100644 --- a/src/vnet/util/trajectory.c +++ b/src/vnet/util/trajectory.c @@ -64,7 +64,11 @@ vnet_dump_trajectory_trace (vlib_main_t * vm, u32 bi) void init_trajectory_trace (vlib_buffer_t * b) { - vec_validate (vnet_buffer2 (b)->trajectory_trace, 7); + if (!clib_mem_is_vec (vnet_buffer2 (b)->trajectory_trace)) + { + vnet_buffer2 (b)->trajectory_trace = 0; + vec_validate (vnet_buffer2 (b)->trajectory_trace, 7); + } _vec_len (vnet_buffer2 (b)->trajectory_trace) = 0; } -- 2.16.6