tcp: handle disconnects after enq notifications 45/15845/4
authorFlorin Coras <fcoras@cisco.com>
Fri, 9 Nov 2018 22:34:08 +0000 (14:34 -0800)
committerDave Barach <openvpp@barachs.net>
Mon, 12 Nov 2018 16:29:46 +0000 (16:29 +0000)
Make sure that we notify the app of the data enqueued in the burst
before notifying of disconnect.

Change-Id: I7747a5cbb4c6bc9132007f849c24ce04b7841273
Signed-off-by: Florin Coras <fcoras@cisco.com>
src/vnet/tcp/tcp.c
src/vnet/tcp/tcp.h
src/vnet/tcp/tcp_input.c
src/vnet/tcp/tcp_output.c

index 695f614..2511a17 100644 (file)
@@ -1332,11 +1332,13 @@ tcp_main_enable (vlib_main_t * vm)
       vec_validate (tm->wrk_ctx[thread].postponed_fast_rxt, 255);
       vec_validate (tm->wrk_ctx[thread].pending_deq_acked, 255);
       vec_validate (tm->wrk_ctx[thread].pending_acks, 255);
+      vec_validate (tm->wrk_ctx[thread].pending_disconnects, 255);
       vec_reset_length (tm->wrk_ctx[thread].pending_fast_rxt);
       vec_reset_length (tm->wrk_ctx[thread].ongoing_fast_rxt);
       vec_reset_length (tm->wrk_ctx[thread].postponed_fast_rxt);
       vec_reset_length (tm->wrk_ctx[thread].pending_deq_acked);
       vec_reset_length (tm->wrk_ctx[thread].pending_acks);
+      vec_reset_length (tm->wrk_ctx[thread].pending_disconnects);
       tm->wrk_ctx[thread].vm = vlib_mains[thread];
 
       /*
index 4626297..d4bebeb 100644 (file)
@@ -118,7 +118,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
   _(SENT_RCV_WND0, "Sent 0 rcv_wnd")           \
   _(RECOVERY, "Recovery")                      \
   _(FAST_RECOVERY, "Fast Recovery")            \
-  _(FR_1_SMSS, "Sent 1 SMSS")                  \
+  _(DCNT_PENDING, "Disconnect pending")                \
   _(HALF_OPEN_DONE, "Half-open completed")     \
   _(FINPNDG, "FIN pending")                    \
   _(FRXT_PENDING, "Fast-retransmit pending")   \
@@ -352,9 +352,9 @@ struct _tcp_cc_algorithm
 #define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY)
 #define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_RECOVERY))
 #define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh)
-#define tcp_fastrecovery_sent_1_smss(tc) ((tc)->flags & TCP_CONN_FR_1_SMSS)
-#define tcp_fastrecovery_1_smss_on(tc) ((tc)->flags |= TCP_CONN_FR_1_SMSS)
-#define tcp_fastrecovery_1_smss_off(tc) ((tc)->flags &= ~TCP_CONN_FR_1_SMSS)
+#define tcp_disconnect_pending(tc) ((tc)->flags & TCP_CONN_DCNT_PENDING)
+#define tcp_disconnect_pending_on(tc) ((tc)->flags |= TCP_CONN_DCNT_PENDING)
+#define tcp_disconnect_pending_off(tc) ((tc)->flags &= ~TCP_CONN_DCNT_PENDING)
 #define tcp_fastrecovery_first(tc) ((tc)->flags & TCP_CONN_FRXT_FIRST)
 #define tcp_fastrecovery_first_on(tc) ((tc)->flags |= TCP_CONN_FRXT_FIRST)
 #define tcp_fastrecovery_first_off(tc) ((tc)->flags &= ~TCP_CONN_FRXT_FIRST)
@@ -366,7 +366,6 @@ always_inline void
 tcp_cong_recovery_off (tcp_connection_t * tc)
 {
   tc->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY);
-  tcp_fastrecovery_1_smss_off (tc);
   tcp_fastrecovery_first_off (tc);
 }
 
@@ -386,26 +385,46 @@ typedef struct _tcp_lookup_dispatch
 typedef struct tcp_worker_ctx_
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-  u32 time_now;                                        /**< worker time */
-  tw_timer_wheel_16t_2w_512sl_t timer_wheel;   /**< worker timer wheel */
-  u32 *tx_buffers;                             /**< tx buffer free list */
-  vlib_frame_t *tx_frames[2];                  /**< tx frames for tcp 4/6
-                                                    output nodes */
-  vlib_frame_t *ip_lookup_tx_frames[2];                /**< tx frames for ip 4/6
-                                                    lookup nodes */
-  u32 *pending_fast_rxt;                       /**< vector of connections
-                                                    needing fast rxt */
-  u32 *ongoing_fast_rxt;                       /**< vector of connections
-                                                    now doing fast rxt */
-  u32 *postponed_fast_rxt;                     /**< vector of connections
-                                                    that will do fast rxt */
+  /** worker time */
+  u32 time_now;
+
+  /** worker timer wheel */
+  tw_timer_wheel_16t_2w_512sl_t timer_wheel;
+
+  /** tx buffer free list */
+  u32 *tx_buffers;
+
+  /** tx frames for tcp 4/6 output nodes */
+  vlib_frame_t *tx_frames[2];
+
+  /** tx frames for ip 4/6 lookup nodes */
+  vlib_frame_t *ip_lookup_tx_frames[2];
+
+  /** vector of connections needing fast rxt */
+  u32 *pending_fast_rxt;
+
+  /** vector of connections now doing fast rxt */
+  u32 *ongoing_fast_rxt;
+
+  /** vector of connections that will do fast rxt */
+  u32 *postponed_fast_rxt;
+
+  /** vector of pending ack dequeues */
   u32 *pending_deq_acked;
+
+  /** vector of pending acks */
   u32 *pending_acks;
-  vlib_main_t *vm;                             /**< pointer to vm */
+
+  /** vector of pending disconnect notifications */
+  u32 *pending_disconnects;
+
+  /** convenience pointer to this thread's vlib main */
+  vlib_main_t *vm;
 
     CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
-  u8 cached_opts[40];                          /**< cached 'on the wire'
-                                                    options for bursts */
+
+  /** cached 'on the wire' options for bursts */
+  u8 cached_opts[40];
 
 } tcp_worker_ctx_t;
 
index 2578b7d..0c87065 100644 (file)
@@ -1131,7 +1131,6 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
   tc->rtt_ts = 0;
 
   tcp_fastrecovery_off (tc);
-  tcp_fastrecovery_1_smss_off (tc);
   tcp_fastrecovery_first_off (tc);
 
   TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
@@ -1600,6 +1599,54 @@ process_ack:
   return 0;
 }
 
+static void
+tcp_program_disconnect (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+  if (!tcp_disconnect_pending (tc))
+    {
+      vec_add1 (wrk->pending_disconnects, tc->c_c_index);
+      tcp_disconnect_pending_on (tc);
+    }
+}
+
+static void
+tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
+{
+  u32 thread_index, *pending_disconnects;
+  tcp_connection_t *tc;
+  int i;
+
+  if (!vec_len (wrk->pending_disconnects))
+    return;
+
+  thread_index = wrk->vm->thread_index;
+  pending_disconnects = wrk->pending_disconnects;
+  for (i = 0; i < vec_len (pending_disconnects); i++)
+    {
+      tc = tcp_connection_get (pending_disconnects[i], thread_index);
+      tcp_disconnect_pending_off (tc);
+      stream_session_disconnect_notify (&tc->connection);
+    }
+  _vec_len (wrk->pending_disconnects) = 0;
+}
+
+static void
+tcp_rcv_fin (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
+            u32 * error)
+{
+  /* Enter CLOSE-WAIT and notify session. To avoid lingering
+   * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
+  /* Account for the FIN if nothing else was received */
+  if (vnet_buffer (b)->tcp.data_len == 0)
+    tc->rcv_nxt += 1;
+  tcp_program_ack (wrk, tc);
+  tc->state = TCP_STATE_CLOSE_WAIT;
+  tcp_program_disconnect (wrk, tc);
+  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+  TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc);
+  *error = TCP_ERROR_FIN_RCVD;
+}
+
 static u8
 tcp_sack_vector_is_sane (sack_block_t * sacks)
 {
@@ -2099,19 +2146,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
       /* 8: check the FIN bit */
       if (PREDICT_FALSE (is_fin))
-       {
-         /* Enter CLOSE-WAIT and notify session. To avoid lingering
-          * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
-         /* Account for the FIN if nothing else was received */
-         if (vnet_buffer (b0)->tcp.data_len == 0)
-           tc0->rcv_nxt += 1;
-         tcp_program_ack (wrk, tc0);
-         tc0->state = TCP_STATE_CLOSE_WAIT;
-         stream_session_disconnect_notify (&tc0->connection);
-         tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
-         TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
-         error0 = TCP_ERROR_FIN_RCVD;
-       }
+       tcp_rcv_fin (wrk, tc0, b0, &error0);
 
     done:
       tcp_inc_err_counter (err_counters, error0, 1);
@@ -2122,6 +2157,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
   err_counters[TCP_ERROR_EVENT_FIFO_FULL] = errors;
   tcp_store_err_counters (established, err_counters);
   tcp_handle_postponed_dequeues (wrk);
+  tcp_handle_disconnects (wrk);
   vlib_buffer_free (vm, first_buffer, frame->n_vectors);
 
   return frame->n_vectors;
index e16095b..4915636 100644 (file)
@@ -1201,8 +1201,7 @@ tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b)
   tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0,
                  /* burst */ 1);
   tc->snd_una_max = tc->snd_nxt;
-  ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd
-                  + tcp_fastrecovery_sent_1_smss (tc) * tc->snd_mss));
+  ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
   tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
   /* If not tracking an ACK, start tracking */
   if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))