tcp: do not sample rtt for retransmitted segments 15/8515/3
authorFlorin Coras <fcoras@cisco.com>
Sun, 24 Sep 2017 23:43:08 +0000 (19:43 -0400)
committerDave Barach <openvpp@barachs.net>
Mon, 25 Sep 2017 19:23:24 +0000 (19:23 +0000)
Change-Id: I365c31607332a944ef498369881332b515894ed7
Signed-off-by: Florin Coras <fcoras@cisco.com>
src/vnet/tcp/tcp_debug.h
src/vnet/tcp/tcp_input.c
src/vnet/tcp/tcp_output.c

index 4bc6b42..eb318cd 100755 (executable)
@@ -20,7 +20,7 @@
 
 #define TCP_DEBUG (1)
 #define TCP_DEBUG_SM (0)
-#define TCP_DEBUG_CC (1)
+#define TCP_DEBUG_CC (0)
 #define TCP_DEBUG_CC_STAT (1)
 
 #define foreach_tcp_dbg_evt            \
@@ -411,21 +411,6 @@ typedef enum _tcp_dbg_evt
   ed->data[4] = _tc->snd_wnd;                                          \
 }
 
-#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)                          \
-{                                                                      \
-  ELOG_TYPE_DECLARE (_e) =                                             \
-  {                                                                    \
-    .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
-    .format_args = "i4i4i4i4i4",                                       \
-  };                                                                   \
-  DECLARE_ETD(_tc, _e, 5);                                             \
-  ed->data[0] = _tc->rcv_nxt - _tc->irs;                               \
-  ed->data[1] = _tc->rcv_wnd;                                          \
-  ed->data[2] = _tc->snd_nxt - _tc->iss;                               \
-  ed->data[3] = tcp_available_snd_wnd(_tc);                            \
-  ed->data[4] = _tc->snd_wnd;                                          \
-}
-
 #define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)                             \
 {                                                                      \
   ELOG_TYPE_DECLARE (_e) =                                             \
@@ -441,21 +426,6 @@ typedef enum _tcp_dbg_evt
   ed->data[4] = tcp_flight_size(_tc);                                  \
 }
 
-#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)                          \
-{                                                                      \
-  ELOG_TYPE_DECLARE (_e) =                                             \
-  {                                                                    \
-    .format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
-    .format_args = "i4i4i4i4i4",                                       \
-  };                                                                   \
-  DECLARE_ETD(_tc, _e, 5);                                             \
-  ed->data[0] = _tc->snd_una - _tc->iss;                               \
-  ed->data[1] = _tc->cwnd;                                             \
-  ed->data[2] = _tc->snd_wnd;                                          \
-  ed->data[3] = tcp_flight_size(_tc);                                  \
-  ed->data[4] = _tc->rcv_wnd;                                          \
-}
-
 #define TCP_EVT_PKTIZE_HANDLER(_tc, ...)                               \
 {                                                                      \
   ELOG_TYPE_DECLARE (_e) =                                             \
@@ -601,9 +571,7 @@ if (_av > 0)                                                                \
 }
 #else
 #define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)
-#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
 #define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)
-#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
 #define TCP_EVT_PKTIZE_HANDLER(_tc, ...)
 #define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...)
 #define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...)
@@ -649,19 +617,6 @@ if (_av > 0)                                                               \
  */
 
 #if TCP_DEBUG_CC
-#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)              \
-{                                                                      \
-  ELOG_TYPE_DECLARE (_e) =                                             \
-  {                                                                    \
-    .format = "rxt: snd_nxt %u offset %u snd %u rxt %u",               \
-    .format_args = "i4i4i4i4",                                         \
-  };                                                                   \
-  DECLARE_ETD(_tc, _e, 4);                                             \
-  ed->data[0] = _tc->snd_nxt - _tc->iss;                               \
-  ed->data[1] = offset;                                                        \
-  ed->data[2] = n_bytes;                                               \
-  ed->data[3] = _tc->snd_rxt_bytes;                                    \
-}
 
 #define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)                     \
 {                                                                      \
@@ -669,13 +624,14 @@ if (_av > 0)                                                              \
   {                                                                    \
     .format = "cc: %s wnd %u snd_cong %u rxt_bytes %u",                        \
     .format_args = "t4i4i4i4",                                         \
-    .n_enum_strings = 5,                                               \
+    .n_enum_strings = 6,                                               \
     .enum_strings = {                                                  \
       "fast-rxt",                                                      \
       "rxt-timeout",                                                   \
       "first-rxt",                                                     \
       "recovered",                                                     \
       "congestion",                                                    \
+      "undo",                                                          \
     },                                                                 \
   };                                                                   \
   DECLARE_ETD(_tc, _e, 4);                                             \
@@ -685,6 +641,50 @@ if (_av > 0)                                                               \
   ed->data[3] = _tc->snd_rxt_bytes;                                    \
 }
 
+#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)              \
+{                                                                      \
+  ELOG_TYPE_DECLARE (_e) =                                             \
+  {                                                                    \
+    .format = "rxt: snd_nxt %u offset %u snd %u rxt %u",               \
+    .format_args = "i4i4i4i4",                                         \
+  };                                                                   \
+  DECLARE_ETD(_tc, _e, 4);                                             \
+  ed->data[0] = _tc->snd_nxt - _tc->iss;                               \
+  ed->data[1] = offset;                                                        \
+  ed->data[2] = n_bytes;                                               \
+  ed->data[3] = _tc->snd_rxt_bytes;                                    \
+}
+
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)                          \
+{                                                                      \
+  ELOG_TYPE_DECLARE (_e) =                                             \
+  {                                                                    \
+    .format = "dack-tx: rcv_nxt %u rcv_wnd %u snd_nxt %u av_wnd %u snd_wnd %u",\
+    .format_args = "i4i4i4i4i4",                                       \
+  };                                                                   \
+  DECLARE_ETD(_tc, _e, 5);                                             \
+  ed->data[0] = _tc->rcv_nxt - _tc->irs;                               \
+  ed->data[1] = _tc->rcv_wnd;                                          \
+  ed->data[2] = _tc->snd_nxt - _tc->iss;                               \
+  ed->data[3] = tcp_available_snd_wnd(_tc);                            \
+  ed->data[4] = _tc->snd_wnd;                                          \
+}
+
+#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)                          \
+{                                                                      \
+  ELOG_TYPE_DECLARE (_e) =                                             \
+  {                                                                    \
+    .format = "dack-rx: snd_una %u cwnd %u snd_wnd %u flight %u rcv_wnd %u",\
+    .format_args = "i4i4i4i4i4",                                       \
+  };                                                                   \
+  DECLARE_ETD(_tc, _e, 5);                                             \
+  ed->data[0] = _tc->snd_una - _tc->iss;                               \
+  ed->data[1] = _tc->cwnd;                                             \
+  ed->data[2] = _tc->snd_wnd;                                          \
+  ed->data[3] = tcp_flight_size(_tc);                                  \
+  ed->data[4] = _tc->rcv_wnd;                                          \
+}
+
 #define TCP_EVT_CC_PACK_HANDLER(_tc, ...)                              \
 {                                                                      \
   ELOG_TYPE_DECLARE (_e) =                                             \
@@ -696,6 +696,13 @@ if (_av > 0)                                                               \
   ed->data[0] = _tc->snd_una - _tc->iss;                               \
   ed->data[1] = _tc->snd_una_max - _tc->iss;                           \
 }
+#else
+#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
+#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
+#endif
 
 /*
  * Congestion control stats
@@ -744,13 +751,6 @@ if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now())               \
 #define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
 #endif
 
-#else
-#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
-#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
-#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
-#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
-#endif
-
 #endif /* SRC_VNET_TCP_TCP_DEBUG_H_ */
 /*
  * fd.io coding-style-patch-verification: ON
index bd57eca..0a36d06 100644 (file)
@@ -248,8 +248,8 @@ tcp_update_timestamp (tcp_connection_t * tc, u32 seq, u32 seq_end)
    * then the TSval from the segment is copied to TS.Recent;
    * otherwise, the TSval is ignored.
    */
-  if (tcp_opts_tstamp (&tc->rcv_opts) && tc->tsval_recent
-      && seq_leq (seq, tc->rcv_las) && seq_leq (tc->rcv_las, seq_end))
+  if (tcp_opts_tstamp (&tc->rcv_opts) && seq_leq (seq, tc->rcv_las)
+      && seq_leq (tc->rcv_las, seq_end))
     {
       ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval));
       tc->tsval_recent = tc->rcv_opts.tsval;
@@ -418,51 +418,53 @@ tcp_update_rto (tcp_connection_t * tc)
   tc->rto = clib_max (tc->rto, TCP_RTO_MIN);
 }
 
-/** Update RTT estimate and RTO timer
+/**
+ * Update RTT estimate and RTO timer
  *
  * Measure RTT: We have two sources of RTT measurements: TSOPT and ACK
  * timing. Middle boxes are known to fiddle with TCP options so we
  * should give higher priority to ACK timing.
  *
+ * This should be called only if previously sent bytes have been acked.
+ *
  * return 1 if valid rtt 0 otherwise
  */
 static int
 tcp_update_rtt (tcp_connection_t * tc, u32 ack)
 {
   u32 mrtt = 0;
-  u8 rtx_acked;
-
-  /* Determine if only rtx bytes are acked. */
-  rtx_acked = tcp_in_cong_recovery (tc) || !tc->bytes_acked;
 
   /* Karn's rule, part 1. Don't use retransmitted segments to estimate
    * RTT because they're ambiguous. */
-  if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq) && !rtx_acked)
+  if (tcp_in_cong_recovery (tc) || tc->sack_sb.sacked_bytes)
+    goto done;
+
+  if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq))
     {
       mrtt = tcp_time_now () - tc->rtt_ts;
     }
   /* As per RFC7323 TSecr can be used for RTTM only if the segment advances
    * snd_una, i.e., the left side of the send window:
-   * seq_lt (tc->snd_una, ack). */
-  else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr
-          && tc->bytes_acked)
+   * seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
+  else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
     {
       mrtt = tcp_time_now () - tc->rcv_opts.tsecr;
     }
 
-  /* Allow measuring of a new RTT */
-  tc->rtt_ts = 0;
-
-  /* If ACK moves left side of the wnd make sure boff is 0, even if mrtt is
-   * not valid */
-  if (tc->bytes_acked)
-    tc->rto_boff = 0;
-
   /* Ignore dubious measurements */
   if (mrtt == 0 || mrtt > TCP_RTT_MAX)
-    return 0;
+    goto done;
 
   tcp_estimate_rtt (tc, mrtt);
+
+done:
+
+  /* Allow measuring of a new RTT */
+  tc->rtt_ts = 0;
+
+  /* If we got here something must've been ACKed so make sure boff is 0,
+   * even if mrrt is not valid since we update the rto lower */
+  tc->rto_boff = 0;
   tcp_update_rto (tc);
 
   return 0;
@@ -932,10 +934,11 @@ static void
 tcp_cc_recovery_exit (tcp_connection_t * tc)
 {
   /* Deflate rto */
-  tcp_update_rto (tc);
   tc->rto_boff = 0;
+  tcp_update_rto (tc);
   tc->snd_rxt_ts = 0;
   tcp_recovery_off (tc);
+  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
 }
 
 void
@@ -946,6 +949,7 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
   tc->rcv_dupacks = 0;
   tcp_fastrecovery_off (tc);
   tcp_fastrecovery_1_smss_off (tc);
+  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
 }
 
 static void
@@ -958,13 +962,14 @@ tcp_cc_congestion_undo (tcp_connection_t * tc)
   if (tcp_in_recovery (tc))
     tcp_cc_recovery_exit (tc);
   ASSERT (tc->rto_boff == 0);
+  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 5);
   /* TODO extend for fastrecovery */
 }
 
 static u8
 tcp_cc_is_spurious_retransmit (tcp_connection_t * tc)
 {
-  return (tcp_in_recovery (tc)
+  return (tcp_in_recovery (tc) && tc->rto_boff == 1
          && tc->snd_rxt_ts
          && tcp_opts_tstamp (&tc->rcv_opts)
          && timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts));
@@ -988,7 +993,6 @@ tcp_cc_recover (tcp_connection_t * tc)
   ASSERT (tc->rto_boff == 0);
   ASSERT (!tcp_in_cong_recovery (tc));
   ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
-  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
   return 0;
 }
 
@@ -2115,7 +2119,6 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              new_tc0->flags |= TCP_CONN_SNDACK;
 
              /* Update rtt with the syn-ack sample */
-             new_tc0->bytes_acked = 1;
              tcp_update_rtt (new_tc0, vnet_buffer (b0)->tcp.ack_number);
              TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, new_tc0);
            }
@@ -2352,7 +2355,6 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                }
 
              /* Update rtt and rto */
-             tc0->bytes_acked = 1;
              tcp_update_rtt (tc0, vnet_buffer (b0)->tcp.ack_number);
 
              /* Switch state to ESTABLISHED */
index be29f05..cb1fcc9 100644 (file)
@@ -918,7 +918,24 @@ tcp_send_reset (tcp_connection_t * tc)
   opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
   ASSERT (opts_write_len == tc->snd_opts_len);
   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
-  tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4);
+  if (tc->c_is_ip4)
+    {
+      ip4_header_t *ih4;
+      ih4 = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip.ip4,
+                                 &tc->c_rmt_ip.ip4, IP_PROTOCOL_TCP, 0);
+      th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
+    }
+  else
+    {
+      int bogus = ~0;
+      ip6_header_t *ih6;
+      ih6 = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip.ip6,
+                                 &tc->c_rmt_ip.ip6, IP_PROTOCOL_TCP);
+      th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
+      ASSERT (!bogus);
+    }
+  tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4);
+  TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
 }
 
 void
@@ -1324,7 +1341,7 @@ tcp_rtx_timeout_cc (tcp_connection_t * tc)
   tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
   tc->cwnd = tcp_loss_wnd (tc);
   tc->snd_congestion = tc->snd_una_max;
-
+  tc->rtt_ts = 0;
   tcp_recovery_on (tc);
 }