From b11175d797c11880d19442f178db5df526ca1256 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Fri, 9 Nov 2018 14:34:08 -0800 Subject: [PATCH] tcp: handle disconnects after enq notifications Make sure that we notify the app of the data enqueued in the burst before notifying of disconnect. Change-Id: I7747a5cbb4c6bc9132007f849c24ce04b7841273 Signed-off-by: Florin Coras --- src/vnet/tcp/tcp.c | 2 ++ src/vnet/tcp/tcp.h | 61 ++++++++++++++++++++++++++++---------------- src/vnet/tcp/tcp_input.c | 64 ++++++++++++++++++++++++++++++++++++----------- src/vnet/tcp/tcp_output.c | 3 +-- 4 files changed, 93 insertions(+), 37 deletions(-) diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 695f614a91c..2511a176e00 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -1332,11 +1332,13 @@ tcp_main_enable (vlib_main_t * vm) vec_validate (tm->wrk_ctx[thread].postponed_fast_rxt, 255); vec_validate (tm->wrk_ctx[thread].pending_deq_acked, 255); vec_validate (tm->wrk_ctx[thread].pending_acks, 255); + vec_validate (tm->wrk_ctx[thread].pending_disconnects, 255); vec_reset_length (tm->wrk_ctx[thread].pending_fast_rxt); vec_reset_length (tm->wrk_ctx[thread].ongoing_fast_rxt); vec_reset_length (tm->wrk_ctx[thread].postponed_fast_rxt); vec_reset_length (tm->wrk_ctx[thread].pending_deq_acked); vec_reset_length (tm->wrk_ctx[thread].pending_acks); + vec_reset_length (tm->wrk_ctx[thread].pending_disconnects); tm->wrk_ctx[thread].vm = vlib_mains[thread]; /* diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 46262978a83..d4bebeb47b8 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -118,7 +118,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; _(SENT_RCV_WND0, "Sent 0 rcv_wnd") \ _(RECOVERY, "Recovery") \ _(FAST_RECOVERY, "Fast Recovery") \ - _(FR_1_SMSS, "Sent 1 SMSS") \ + _(DCNT_PENDING, "Disconnect pending") \ _(HALF_OPEN_DONE, "Half-open completed") \ _(FINPNDG, "FIN pending") \ _(FRXT_PENDING, "Fast-retransmit pending") \ @@ -352,9 +352,9 @@ struct _tcp_cc_algorithm #define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY) #define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_RECOVERY)) #define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh) -#define tcp_fastrecovery_sent_1_smss(tc) ((tc)->flags & TCP_CONN_FR_1_SMSS) -#define tcp_fastrecovery_1_smss_on(tc) ((tc)->flags |= TCP_CONN_FR_1_SMSS) -#define tcp_fastrecovery_1_smss_off(tc) ((tc)->flags &= ~TCP_CONN_FR_1_SMSS) +#define tcp_disconnect_pending(tc) ((tc)->flags & TCP_CONN_DCNT_PENDING) +#define tcp_disconnect_pending_on(tc) ((tc)->flags |= TCP_CONN_DCNT_PENDING) +#define tcp_disconnect_pending_off(tc) ((tc)->flags &= ~TCP_CONN_DCNT_PENDING) #define tcp_fastrecovery_first(tc) ((tc)->flags & TCP_CONN_FRXT_FIRST) #define tcp_fastrecovery_first_on(tc) ((tc)->flags |= TCP_CONN_FRXT_FIRST) #define tcp_fastrecovery_first_off(tc) ((tc)->flags &= ~TCP_CONN_FRXT_FIRST) @@ -366,7 +366,6 @@ always_inline void tcp_cong_recovery_off (tcp_connection_t * tc) { tc->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY); - tcp_fastrecovery_1_smss_off (tc); tcp_fastrecovery_first_off (tc); } @@ -386,26 +385,46 @@ typedef struct _tcp_lookup_dispatch typedef struct tcp_worker_ctx_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u32 time_now; /**< worker time */ - tw_timer_wheel_16t_2w_512sl_t timer_wheel; /**< worker timer wheel */ - u32 *tx_buffers; /**< tx buffer free list */ - vlib_frame_t *tx_frames[2]; /**< tx frames for tcp 4/6 - output nodes */ - vlib_frame_t *ip_lookup_tx_frames[2]; /**< tx frames for ip 4/6 - lookup nodes */ - u32 *pending_fast_rxt; /**< vector of connections - needing fast rxt */ - u32 *ongoing_fast_rxt; /**< vector of connections - now doing fast rxt */ - u32 *postponed_fast_rxt; /**< vector of connections - that will do fast rxt */ + /** worker time */ + u32 time_now; + + /** worker timer wheel */ + tw_timer_wheel_16t_2w_512sl_t timer_wheel; + + /** tx buffer free list */ + u32 *tx_buffers; + + /** tx frames for tcp 4/6 output nodes */ + vlib_frame_t *tx_frames[2]; + + /** tx frames for ip 4/6 lookup nodes */ + vlib_frame_t *ip_lookup_tx_frames[2]; + + /** vector of connections needing fast rxt */ + u32 *pending_fast_rxt; + + /** vector of connections now doing fast rxt */ + u32 *ongoing_fast_rxt; + + /** vector of connections that will do fast rxt */ + u32 *postponed_fast_rxt; + + /** vector of pending ack dequeues */ u32 *pending_deq_acked; + + /** vector of pending acks */ u32 *pending_acks; - vlib_main_t *vm; /**< pointer to vm */ + + /** vector of pending disconnect notifications */ + u32 *pending_disconnects; + + /** convenience pointer to this thread's vlib main */ + vlib_main_t *vm; CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); - u8 cached_opts[40]; /**< cached 'on the wire' - options for bursts */ + + /** cached 'on the wire' options for bursts */ + u8 cached_opts[40]; } tcp_worker_ctx_t; diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 2578b7d1051..0c8706567ae 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -1131,7 +1131,6 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc) tc->rtt_ts = 0; tcp_fastrecovery_off (tc); - tcp_fastrecovery_1_smss_off (tc); tcp_fastrecovery_first_off (tc); TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3); @@ -1600,6 +1599,54 @@ process_ack: return 0; } +static void +tcp_program_disconnect (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) +{ + if (!tcp_disconnect_pending (tc)) + { + vec_add1 (wrk->pending_disconnects, tc->c_c_index); + tcp_disconnect_pending_on (tc); + } +} + +static void +tcp_handle_disconnects (tcp_worker_ctx_t * wrk) +{ + u32 thread_index, *pending_disconnects; + tcp_connection_t *tc; + int i; + + if (!vec_len (wrk->pending_disconnects)) + return; + + thread_index = wrk->vm->thread_index; + pending_disconnects = wrk->pending_disconnects; + for (i = 0; i < vec_len (pending_disconnects); i++) + { + tc = tcp_connection_get (pending_disconnects[i], thread_index); + tcp_disconnect_pending_off (tc); + stream_session_disconnect_notify (&tc->connection); + } + _vec_len (wrk->pending_disconnects) = 0; +} + +static void +tcp_rcv_fin (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b, + u32 * error) +{ + /* Enter CLOSE-WAIT and notify session. To avoid lingering + * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */ + /* Account for the FIN if nothing else was received */ + if (vnet_buffer (b)->tcp.data_len == 0) + tc->rcv_nxt += 1; + tcp_program_ack (wrk, tc); + tc->state = TCP_STATE_CLOSE_WAIT; + tcp_program_disconnect (wrk, tc); + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); + TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc); + *error = TCP_ERROR_FIN_RCVD; +} + static u8 tcp_sack_vector_is_sane (sack_block_t * sacks) { @@ -2099,19 +2146,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* 8: check the FIN bit */ if (PREDICT_FALSE (is_fin)) - { - /* Enter CLOSE-WAIT and notify session. To avoid lingering - * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */ - /* Account for the FIN if nothing else was received */ - if (vnet_buffer (b0)->tcp.data_len == 0) - tc0->rcv_nxt += 1; - tcp_program_ack (wrk, tc0); - tc0->state = TCP_STATE_CLOSE_WAIT; - stream_session_disconnect_notify (&tc0->connection); - tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); - TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0); - error0 = TCP_ERROR_FIN_RCVD; - } + tcp_rcv_fin (wrk, tc0, b0, &error0); done: tcp_inc_err_counter (err_counters, error0, 1); @@ -2122,6 +2157,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, err_counters[TCP_ERROR_EVENT_FIFO_FULL] = errors; tcp_store_err_counters (established, err_counters); tcp_handle_postponed_dequeues (wrk); + tcp_handle_disconnects (wrk); vlib_buffer_free (vm, first_buffer, frame->n_vectors); return frame->n_vectors; diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index e16095b635d..49156368574 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1201,8 +1201,7 @@ tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b) tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0, /* burst */ 1); tc->snd_una_max = tc->snd_nxt; - ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd - + tcp_fastrecovery_sent_1_smss (tc) * tc->snd_mss)); + ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd)); tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); /* If not tracking an ACK, start tracking */ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc)) -- 2.16.6