From: Florin Coras Date: Tue, 22 May 2018 18:39:59 +0000 (-0700) Subject: tcp: cc improvements and fixes X-Git-Tag: v18.07-rc1~302 X-Git-Url: https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commitdiff_plain;h=d2aab838ea7875d7f58246b80285e299b137fbf1 tcp: cc improvements and fixes Change-Id: I6615bb612bcc3f795b5f822ea55209bb30ef35b5 Signed-off-by: Florin Coras --- diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index b24e22923f1..6908a568a0f 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -450,19 +450,26 @@ session_get_from_handle_safe (u64 handle) } always_inline u32 -stream_session_max_rx_enqueue (transport_connection_t * tc) +transport_max_rx_enqueue (transport_connection_t * tc) { stream_session_t *s = session_get (tc->s_index, tc->thread_index); return svm_fifo_max_enqueue (s->server_rx_fifo); } always_inline u32 -stream_session_rx_fifo_size (transport_connection_t * tc) +transport_rx_fifo_size (transport_connection_t * tc) { stream_session_t *s = session_get (tc->s_index, tc->thread_index); return s->server_rx_fifo->nitems; } +always_inline u32 +transport_tx_fifo_size (transport_connection_t * tc) +{ + stream_session_t *s = session_get (tc->s_index, tc->thread_index); + return s->server_tx_fifo->nitems; +} + always_inline u32 session_get_index (stream_session_t * s) { diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index 2e67c84cc33..4af4f2e7052 100755 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -626,7 +626,7 @@ if (_av > 0) \ { \ ELOG_TYPE_DECLARE (_e) = \ { \ - .format = "cc: %s wnd %u snd_cong %u rxt_bytes %u", \ + .format = "cc: %s snd_space %u snd_cong %u rxt_bytes %u", \ .format_args = "t4i4i4i4", \ .n_enum_strings = 6, \ .enum_strings = { \ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 42db82e1214..ef6cb8b8fa2 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -936,6 +936,12 @@ tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd) } } +/** + * Init loss recovery/fast recovery. + * + * Triggered by dup acks as opposed to timer timeout. Note that cwnd is + * updated in @ref tcp_cc_handle_event after fast retransmit + */ void tcp_cc_init_congestion (tcp_connection_t * tc) { @@ -949,7 +955,6 @@ tcp_cc_init_congestion (tcp_connection_t * tc) static void tcp_cc_recovery_exit (tcp_connection_t * tc) { - /* Deflate rto */ tc->rto_boff = 0; tcp_update_rto (tc); tc->snd_rxt_ts = 0; @@ -999,6 +1004,7 @@ tcp_cc_recover (tcp_connection_t * tc) ASSERT (tcp_in_cong_recovery (tc)); if (tcp_cc_is_spurious_retransmit (tc)) { + clib_warning ("here"); tcp_cc_congestion_undo (tc); return 1; } @@ -1061,28 +1067,24 @@ tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack) * Duplicate ACK. Check if we should enter fast recovery, or if already in * it account for the bytes that left the network. */ - if (is_dack) + if (is_dack && !tcp_in_recovery (tc)) { + TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc, 1); ASSERT (tc->snd_una != tc->snd_una_max || tc->sack_sb.last_sacked_bytes); tc->rcv_dupacks++; + /* Pure duplicate ack. If some data got acked, it's handled lower */ if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD && !tc->bytes_acked) { ASSERT (tcp_in_fastrecovery (tc)); - /* Pure duplicate ack. If some data got acked, it's handled lower */ tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK); return; } else if (tcp_should_fastrecover (tc)) { - /* Things are already bad */ - if (tcp_in_cong_recovery (tc)) - { - tc->rcv_dupacks = 0; - goto partial_ack_test; - } + ASSERT (!tcp_in_fastrecovery (tc)); /* If of of the two conditions lower hold, reset dupacks because * we're probably after timeout (RFC6582 heuristics). @@ -1139,12 +1141,12 @@ tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack) goto partial_ack; } -partial_ack_test: - if (!tc->bytes_acked) return; partial_ack: + TCP_EVT_DBG (TCP_EVT_CC_PACK, tc); + /* * Legitimate ACK. 1) See if we can exit recovery */ @@ -1171,17 +1173,18 @@ partial_ack: /* * Legitimate ACK. 2) If PARTIAL ACK try to retransmit */ - TCP_EVT_DBG (TCP_EVT_CC_PACK, tc); /* RFC6675: If the incoming ACK is a cumulative acknowledgment, - * reset dupacks to 0 */ + * reset dupacks to 0. Also needed if in congestion recovery */ tc->rcv_dupacks = 0; - tcp_retransmit_first_unacked (tc); - /* Post RTO timeout don't try anything fancy */ if (tcp_in_recovery (tc)) - return; + { + tc->cc_algo->rcv_ack (tc); + tc->tsecr_last_ack = tc->rcv_opts.tsecr; + return; + } /* Remove retransmitted bytes that have been delivered */ ASSERT (tc->bytes_acked + tc->sack_sb.snd_una_adv @@ -1262,7 +1265,6 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b, vnet_buffer (b)->tcp.ack_number); if (tcp_in_fastrecovery (tc) && tc->rcv_dupacks == TCP_DUPACK_THRESHOLD) { - TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc); tcp_cc_handle_event (tc, 1); } /* Don't drop yet */ @@ -1300,7 +1302,6 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b, if (!tcp_in_cong_recovery (tc)) return 0; *error = TCP_ERROR_ACK_DUP; - TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc, 1); return vnet_buffer (b)->tcp.data_len ? 0 : -1; } diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c index 7ae7f484b56..0f43d21dfde 100644 --- a/src/vnet/tcp/tcp_newreno.c +++ b/src/vnet/tcp/tcp_newreno.c @@ -44,6 +44,8 @@ newreno_rcv_ack (tcp_connection_t * tc) tc->cwnd += inc * tc->snd_mss; tc->cwnd_acc_bytes -= inc * tc->cwnd; } + tc->cwnd = clib_min (tc->cwnd, + transport_tx_fifo_size (&tc->connection)); } } diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 5932ed406d9..27450654f71 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -163,8 +163,8 @@ tcp_update_rcv_wnd (tcp_connection_t * tc) /* * Figure out how much space we have available */ - available_space = stream_session_max_rx_enqueue (&tc->connection); - max_fifo = stream_session_rx_fifo_size (&tc->connection); + available_space = transport_max_rx_enqueue (&tc->connection); + max_fifo = transport_rx_fifo_size (&tc->connection); ASSERT (tc->rcv_opts.mss < max_fifo); if (available_space < tc->rcv_opts.mss && available_space < max_fifo >> 3) @@ -1347,10 +1347,12 @@ tcp_rtx_timeout_cc (tcp_connection_t * tc) tcp_cc_fastrecovery_exit (tc); /* Start again from the beginning */ - tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss); + tc->cc_algo->congestion (tc); tc->cwnd = tcp_loss_wnd (tc); tc->snd_congestion = tc->snd_una_max; tc->rtt_ts = 0; + tc->cwnd_acc_bytes = 0; + tcp_recovery_on (tc); } @@ -1393,7 +1395,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) } /* Shouldn't be here */ - if (tc->snd_una == tc->snd_una_max) + if (seq_geq (tc->snd_una, tc->snd_congestion)) { tcp_recovery_off (tc); return; @@ -1414,7 +1416,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (tc->rto_boff == 1) tcp_rtx_timeout_cc (tc); - tc->snd_nxt = tc->snd_una; + tc->snd_una_max = tc->snd_nxt = tc->snd_una; tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);