tcp: improve rcv process ack processing 12/18612/8
authorFlorin Coras <fcoras@cisco.com>
Mon, 1 Apr 2019 00:17:11 +0000 (17:17 -0700)
committerDave Barach <openvpp@barachs.net>
Tue, 2 Apr 2019 16:48:39 +0000 (16:48 +0000)
- Avoid doing cc in closing states.
- Rest connections closed with unread data

Change-Id: I97d46b0459f03ea5439eeb0f233b6c17d3e06dfd
Signed-off-by: Florin Coras <fcoras@cisco.com>
src/vnet/session/session.h
src/vnet/session/transport.c
src/vnet/tcp/tcp.c
src/vnet/tcp/tcp.h
src/vnet/tcp/tcp_input.c
src/vnet/tcp/tcp_output.c

index a3b84a6..ed42e54 100644 (file)
@@ -388,6 +388,13 @@ transport_max_tx_dequeue (transport_connection_t * tc)
   return svm_fifo_max_dequeue (s->tx_fifo);
 }
 
+always_inline u32
+transport_max_rx_dequeue (transport_connection_t * tc)
+{
+  session_t *s = session_get (tc->s_index, tc->thread_index);
+  return svm_fifo_max_dequeue (s->rx_fifo);
+}
+
 always_inline u32
 transport_rx_fifo_size (transport_connection_t * tc)
 {
index abab086..d83ecfb 100644 (file)
@@ -49,7 +49,7 @@ static double transport_pacer_period;
 
 #define TRANSPORT_PACER_MIN_MSS        1460
 #define TRANSPORT_PACER_MIN_BURST      TRANSPORT_PACER_MIN_MSS
-#define TRANSPORT_PACER_MAX_BURST      (48 * TRANSPORT_PACER_MIN_MSS)
+#define TRANSPORT_PACER_MAX_BURST      (32 * TRANSPORT_PACER_MIN_MSS)
 
 u8 *
 format_transport_proto (u8 * s, va_list * args)
index e262513..09c47d9 100644 (file)
@@ -350,6 +350,15 @@ tcp_connection_close (tcp_connection_t * tc)
       tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME);
       break;
     case TCP_STATE_ESTABLISHED:
+      /* If closing with unread data, reset the connection */
+      if (transport_max_rx_dequeue (&tc->connection))
+       {
+         tcp_send_reset (tc);
+         tcp_connection_timers_reset (tc);
+         tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+         tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+         break;
+       }
       if (!transport_max_tx_dequeue (&tc->connection))
        tcp_send_fin (tc);
       else
index 8383d01..46d72b7 100644 (file)
@@ -126,6 +126,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
   _(FRXT_FIRST, "Fast-retransmit first again") \
   _(DEQ_PENDING, "Pending dequeue acked")      \
   _(PSH_PENDING, "PSH pending")                        \
+  _(FINRCVD, "FIN received")                   \
 
 typedef enum _tcp_connection_flag_bits
 {
index cc630f8..9ac2d85 100644 (file)
@@ -405,11 +405,26 @@ error:
 }
 
 always_inline int
-tcp_rcv_ack_is_acceptable (tcp_connection_t * tc0, vlib_buffer_t * tb0)
+tcp_rcv_ack_no_cc (tcp_connection_t * tc, vlib_buffer_t * b, u32 * error)
 {
   /* SND.UNA =< SEG.ACK =< SND.NXT */
-  return (seq_leq (tc0->snd_una, vnet_buffer (tb0)->tcp.ack_number)
-         && seq_leq (vnet_buffer (tb0)->tcp.ack_number, tc0->snd_una_max));
+  if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number)
+       && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
+    {
+      if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_una_max))
+       {
+         tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
+         goto acceptable;
+       }
+      *error = TCP_ERROR_ACK_INVALID;
+      return -1;
+    }
+
+acceptable:
+  tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
+  tc->snd_una = vnet_buffer (b)->tcp.ack_number;
+  *error = TCP_ERROR_ACK_OK;
+  return 0;
 }
 
 /**
@@ -2703,24 +2718,24 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       switch (tc0->state)
        {
        case TCP_STATE_SYN_RCVD:
+
+         /* Make sure the segment is exactly right */
+         if (tc0->rcv_nxt != vnet_buffer (b0)->tcp.seq_number || is_fin0)
+           {
+             tcp_connection_reset (tc0);
+             error0 = TCP_ERROR_SEGMENT_INVALID;
+             goto drop;
+           }
+
          /*
           * If the segment acknowledgment is not acceptable, form a
           * reset segment,
           *  <SEQ=SEG.ACK><CTL=RST>
           * and send it.
           */
-         if (!tcp_rcv_ack_is_acceptable (tc0, b0))
+         if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
            {
              tcp_connection_reset (tc0);
-             error0 = TCP_ERROR_ACK_INVALID;
-             goto drop;
-           }
-
-         /* Make sure the ack is exactly right */
-         if (tc0->rcv_nxt != vnet_buffer (b0)->tcp.seq_number || is_fin0)
-           {
-             tcp_connection_reset (tc0);
-             error0 = TCP_ERROR_SEGMENT_INVALID;
              goto drop;
            }
 
@@ -2774,12 +2789,22 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          /* If FIN is ACKed */
          else if (tc0->snd_una == tc0->snd_nxt)
            {
-             tcp_connection_set_state (tc0, TCP_STATE_FIN_WAIT_2);
-
              /* Stop all retransmit timers because we have nothing more
-              * to send. Enable waitclose though because we're willing to
-              * wait for peer's FIN but not indefinitely. */
+              * to send. */
              tcp_connection_timers_reset (tc0);
+
+             /* We already have a FIN but didn't transition to CLOSING
+              * because of outstanding tx data. Close the connection. */
+             if (tc0->flags & TCP_CONN_FINRCVD)
+               {
+                 tcp_connection_set_state (tc0, TCP_STATE_CLOSED);
+                 tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+                 goto drop;
+               }
+
+             tcp_connection_set_state (tc0, TCP_STATE_FIN_WAIT_2);
+             /* Enable waitclose because we're willing to wait for peer's
+              * FIN but not indefinitely. */
              tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
 
              /* Don't try to deq the FIN acked */
@@ -2793,7 +2818,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          /* In addition to the processing for the ESTABLISHED state, if
           * the retransmission queue is empty, the user's CLOSE can be
           * acknowledged ("ok") but do not delete the TCB. */
-         if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0))
+         if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
            goto drop;
          tc0->burst_acked = 0;
          break;
@@ -2802,37 +2827,27 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0))
            goto drop;
 
-         if (tc0->flags & TCP_CONN_FINPNDG)
-           {
-             /* TX fifo finally drained */
-             if (!transport_max_tx_dequeue (&tc0->connection))
-               {
-                 tcp_send_fin (tc0);
-                 tcp_connection_timers_reset (tc0);
-                 tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK);
-                 tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
-               }
-           }
+         if (!(tc0->flags & TCP_CONN_FINPNDG))
+           break;
+
+         /* Still have outstanding tx data */
+         if (transport_max_tx_dequeue (&tc0->connection))
+           break;
+
+         tcp_send_fin (tc0);
+         tcp_connection_timers_reset (tc0);
+         tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK);
+         tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
          break;
        case TCP_STATE_CLOSING:
          /* In addition to the processing for the ESTABLISHED state, if
           * the ACK acknowledges our FIN then enter the TIME-WAIT state,
           * otherwise ignore the segment. */
-         if (!tcp_rcv_ack_is_acceptable (tc0, b0))
-           {
-             error0 = TCP_ERROR_ACK_INVALID;
-             goto drop;
-           }
+         if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
+           goto drop;
 
-         error0 = TCP_ERROR_ACK_OK;
-         tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
-         /* Ack moved snd_una beyond snd_nxt so reprogram fin */
-         if (seq_gt (tc0->snd_una, tc0->snd_nxt))
-           {
-             tc0->snd_nxt = tc0->snd_una;
-             tc0->flags &= ~TCP_CONN_FINSNT;
-             goto drop;
-           }
+         if (tc0->snd_una != tc0->snd_nxt)
+           goto drop;
 
          tcp_connection_timers_reset (tc0);
          tcp_connection_set_state (tc0, TCP_STATE_TIME_WAIT);
@@ -2845,13 +2860,9 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
           * acknowledgment of our FIN. If our FIN is now acknowledged,
           * delete the TCB, enter the CLOSED state, and return. */
 
-         if (!tcp_rcv_ack_is_acceptable (tc0, b0))
-           {
-             error0 = TCP_ERROR_ACK_INVALID;
-             goto drop;
-           }
-         error0 = TCP_ERROR_ACK_OK;
-         tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
+         if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
+           goto drop;
+
          /* Apparently our ACK for the peer's FIN was lost */
          if (is_fin0 && tc0->snd_una != tc0->snd_nxt)
            {
@@ -2875,7 +2886,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
           * retransmission of the remote FIN. Acknowledge it, and restart
           * the 2 MSL timeout. */
 
-         if (tcp_rcv_ack (wrk, tc0, b0, tcp0, &error0))
+         if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
            goto drop;
 
          if (!is_fin0)
@@ -2943,26 +2954,17 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          break;
        case TCP_STATE_FIN_WAIT_1:
          tc0->rcv_nxt += 1;
-         tcp_connection_set_state (tc0, TCP_STATE_CLOSING);
+         /* If data is outstanding stay in FIN_WAIT_1 and try to finish
+          * sending it. */
          if (tc0->flags & TCP_CONN_FINPNDG)
            {
-             /* Drop all outstanding tx data. */
-             session_tx_fifo_dequeue_drop (&tc0->connection,
-                                           transport_max_tx_dequeue
-                                           (&tc0->connection));
-             /* Make it look as if we've recovered, if needed */
-             if (tcp_in_cong_recovery (tc0))
-               {
-                 scoreboard_clear (&tc0->sack_sb);
-                 tcp_fastrecovery_off (tc0);
-                 tcp_recovery_off (tc0);
-                 tcp_connection_timers_reset (tc0);
-                 tc0->snd_nxt = tc0->snd_una;
-               }
-             tcp_send_fin (tc0);
+             tc0->flags |= TCP_CONN_FINRCVD;
            }
          else
-           tcp_program_ack (wrk, tc0);
+           {
+             tcp_connection_set_state (tc0, TCP_STATE_CLOSING);
+             tcp_program_ack (wrk, tc0);
+           }
          /* Wait for ACK for our FIN but not forever */
          tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
          break;
index 518a80d..03caa07 100644 (file)
@@ -1132,7 +1132,6 @@ tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
   tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
                  /* update_snd_nxt */ 1);
   tc->snd_una_max = seq_max (tc->snd_nxt, tc->snd_una_max);
-  ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
   tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
   /* If not tracking an ACK, start tracking */
   if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))