From 3ec66b023280b1aa4b2e92ae475ceb03e5ed3910 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Thu, 23 Aug 2018 16:27:05 -0700 Subject: [PATCH] tcp: fix cc recovery re-entry and persist timer pop Change-Id: I89e8052f2d2c36dd3de5255c4ee570722dc58227 Signed-off-by: Florin Coras --- src/vnet/session/session.c | 4 ++-- src/vnet/session/session.h | 9 +++++++++ src/vnet/tcp/tcp_input.c | 21 +++++++++++++++------ src/vnet/tcp/tcp_output.c | 31 ++++++++++++++++++++++--------- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 6378fe8e299..9790ec25edd 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -491,9 +491,9 @@ session_enqueue_notify (stream_session_t * s, u8 lock) application_t *app; app = application_get_if_valid (s->app_index); - if (PREDICT_FALSE (app == 0)) + if (PREDICT_FALSE (!app)) { - clib_warning ("invalid s->app_index = %d", s->app_index); + TCP_DBG ("invalid s->app_index = %d", s->app_index); return 0; } diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 5e94c41f927..63ed83d40df 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -571,6 +571,15 @@ void session_register_transport (transport_proto_t transport_proto, const transport_proto_vft_t * vft, u8 is_ip4, u32 output_node); +always_inline void +transport_add_tx_event (transport_connection_t * tc) +{ + stream_session_t *s = session_get (tc->s_index, tc->thread_index); + if (svm_fifo_has_event (s->server_tx_fifo)) + return; + session_send_io_evt_to_thread (s->server_tx_fifo, FIFO_EVENT_APP_TX); +} + clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en); always_inline svm_msg_q_t * diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 1e6a8eb4eae..3f36b6996be 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -453,7 +453,11 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack) /* Karn's rule, part 1. Don't use retransmitted segments to estimate * RTT because they're ambiguous. */ if (tcp_in_cong_recovery (tc) || tc->sack_sb.sacked_bytes) - goto done; + { + if (tcp_in_recovery (tc)) + return 0; + goto done; + } if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq)) { @@ -479,7 +483,7 @@ done: tc->rtt_ts = 0; /* If we got here something must've been ACKed so make sure boff is 0, - * even if mrrt is not valid since we update the rto lower */ + * even if mrtt is not valid since we update the rto lower */ tc->rto_boff = 0; tcp_update_rto (tc); @@ -1240,7 +1244,10 @@ partial_ack: * Legitimate ACK. 1) See if we can exit recovery */ /* XXX limit this only to first partial ack? */ - tcp_retransmit_timer_update (tc); + if (seq_lt (tc->snd_una, tc->snd_congestion)) + tcp_retransmit_timer_force_update (tc); + else + tcp_retransmit_timer_update (tc); if (seq_geq (tc->snd_una, tc->snd_congestion)) { @@ -1272,6 +1279,7 @@ partial_ack: { tc->cc_algo->rcv_ack (tc); tc->tsecr_last_ack = tc->rcv_opts.tsecr; + transport_add_tx_event (&tc->connection); return; } @@ -1322,10 +1330,11 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b, /* When we entered recovery, we reset snd_nxt to snd_una. Seems peer * still has the data so accept the ack */ if (tcp_in_recovery (tc) - && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_congestion) - && seq_geq (vnet_buffer (b)->tcp.ack_number, tc->snd_una)) + && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_congestion)) { - tc->snd_una_max = tc->snd_nxt = vnet_buffer (b)->tcp.ack_number; + tc->snd_nxt = vnet_buffer (b)->tcp.ack_number; + if (seq_gt (tc->snd_nxt, tc->snd_una_max)) + tc->snd_una_max = tc->snd_nxt; goto process_ack; } diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index b3c39b11b30..f37958e12f2 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1187,7 +1187,8 @@ tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b) tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0, /* burst */ 1); tc->snd_una_max = tc->snd_nxt; - ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd)); + ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd + + tcp_fastrecovery_sent_1_smss (tc) * tc->snd_mss)); tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); /* If not tracking an ACK, start tracking */ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc)) @@ -1451,15 +1452,19 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) return; } - /* Shouldn't be here */ + /* Shouldn't be here. This condition is tricky because it has to take + * into account boff > 0 due to persist timeout. */ if ((tc->rto_boff == 0 && tc->snd_una == tc->snd_una_max) - || (tc->rto_boff > 0 && seq_geq (tc->snd_una, tc->snd_congestion))) + || (tc->rto_boff > 0 && seq_geq (tc->snd_una, tc->snd_congestion) + && !tcp_flight_size (tc))) { - tcp_recovery_off (tc); + ASSERT (!tcp_in_recovery (tc)); + tc->rto_boff = 0; return; } - /* We're not in recovery so make sure rto_boff is 0 */ + /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due + * to persist timer timeout */ if (!tcp_in_recovery (tc) && tc->rto_boff > 0) { tc->rto_boff = 0; @@ -1474,10 +1479,15 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (tc->rto_boff == 1) tcp_rxt_timeout_cc (tc); + /* If we've sent beyond snd_congestion, update it */ + if (seq_gt (tc->snd_una_max, tc->snd_congestion)) + tc->snd_congestion = tc->snd_una_max; + tc->snd_una_max = tc->snd_nxt = tc->snd_una; tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); - /* Send one segment. Note that n_bytes may be zero due to buffer shortfall */ + /* Send one segment. Note that n_bytes may be zero due to buffer + * shortfall */ n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b); /* TODO be less aggressive about this */ @@ -1485,7 +1495,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (n_bytes == 0) { - tcp_retransmit_timer_set (tc); + tcp_retransmit_timer_force_update (tc); return; } @@ -1496,7 +1506,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tc->snd_rxt_ts = tcp_time_now (); tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); - tcp_retransmit_timer_update (tc); + tcp_retransmit_timer_force_update (tc); } /* Retransmit for SYN */ else if (tc->state == TCP_STATE_SYN_SENT) @@ -1632,7 +1642,10 @@ tcp_timer_persist_handler (u32 index) * Try to force the first unsent segment (or buffer) */ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) - return; + { + tcp_persist_timer_set (tc); + return; + } b = vlib_get_buffer (vm, bi); data = tcp_init_buffer (vm, b); -- 2.16.6