From: Florin Coras Date: Sun, 7 Jun 2020 20:06:56 +0000 (+0000) Subject: tcp: improve rtt estimation with rate sampling X-Git-Tag: v21.01-rc0~301 X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;h=8ddd19bf7bbb351a45be1baf6cf670b9fa3360b0;p=vpp.git tcp: improve rtt estimation with rate sampling Type: improvement Signed-off-by: Florin Coras Change-Id: I25043da84b2f2b3f37024ecfedac535df3004ea5 --- diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 964afe36751..cc373f5b665 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -426,8 +426,6 @@ acceptable: * Note that although the original article, srtt and rttvar are scaled * to minimize round-off errors, here we don't. Instead, we rely on * better precision time measurements. - * - * TODO support us rtt resolution */ static void tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt) @@ -452,16 +450,28 @@ tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt) } } +static inline void +tcp_estimate_rtt_us (tcp_connection_t * tc, f64 mrtt) +{ + tc->mrtt_us = tc->mrtt_us + (mrtt - tc->mrtt_us) * 0.125; +} + /** - * Update RTT estimate and RTO timer + * Update rtt estimate * - * Measure RTT: We have two sources of RTT measurements: TSOPT and ACK - * timing. Middle boxes are known to fiddle with TCP options so we - * should give higher priority to ACK timing. + * We have potentially three sources of rtt measurements: * - * This should be called only if previously sent bytes have been acked. + * TSOPT difference between current and echoed timestamp. It has ms + * precision and can be computed per ack + * ACK timing one sequence number is tracked per rtt with us (micro second) + * precision. + * rate sample if enabled, all outstanding bytes are tracked with us + * precision. Every ack and sack are a rtt sample * - * return 1 if valid rtt 0 otherwise + * Middle boxes are known to fiddle with TCP options so we give higher + * priority to ACK timing. + * + * For now, rate sample rtts are only used under congestion. */ static int tcp_update_rtt (tcp_connection_t * tc, tcp_rate_sample_t * rs, u32 ack) @@ -473,19 +483,19 @@ tcp_update_rtt (tcp_connection_t * tc, tcp_rate_sample_t * rs, u32 ack) if (tcp_in_cong_recovery (tc)) { /* Accept rtt estimates for samples that have not been retransmitted */ - if ((tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) - && !(rs->flags & TCP_BTS_IS_RXT)) - { - mrtt = rs->rtt_time * THZ; - goto estimate_rtt; - } - goto done; + if (!(tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) + || (rs->flags & TCP_BTS_IS_RXT)) + goto done; + if (rs->rtt_time) + tcp_estimate_rtt_us (tc, rs->rtt_time); + mrtt = rs->rtt_time * THZ; + goto estimate_rtt; } if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq)) { f64 sample = tcp_time_now_us (tc->c_thread_index) - tc->rtt_ts; - tc->mrtt_us = tc->mrtt_us + (sample - tc->mrtt_us) * 0.125; + tcp_estimate_rtt_us (tc, sample); mrtt = clib_max ((u32) (sample * THZ), 1); /* Allow measuring of a new RTT */ tc->rtt_ts = 0; @@ -1048,10 +1058,11 @@ process_ack: if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) tcp_bt_sample_delivery_rate (tc, &rs); - if (tc->bytes_acked) + if (tc->bytes_acked + tc->sack_sb.last_sacked_bytes) { - tcp_program_dequeue (wrk, tc); tcp_update_rtt (tc, &rs, vnet_buffer (b)->tcp.ack_number); + if (tc->bytes_acked) + tcp_program_dequeue (wrk, tc); } TCP_EVT (TCP_EVT_ACK_RCVD, tc);