tcp: fix retransmissions under buffer shortage 79/9579/6
authorFlorin Coras <fcoras@cisco.com>
Mon, 27 Nov 2017 09:34:14 +0000 (04:34 -0500)
committerDave Barach <openvpp@barachs.net>
Tue, 28 Nov 2017 13:21:26 +0000 (13:21 +0000)
- add debugging scaffolding for simulating buffer shortage

Change-Id: Ice519d74f9c4e4094c4586c548185135b7bb5f2d
Signed-off-by: Florin Coras <fcoras@cisco.com>
src/vnet/session/session_node.c
src/vnet/tcp/tcp.c
src/vnet/tcp/tcp.h
src/vnet/tcp/tcp_debug.h
src/vnet/tcp/tcp_input.c
src/vnet/tcp/tcp_output.c
src/vnet/util/trajectory.c

index 7811617..ce460e9 100644 (file)
@@ -325,8 +325,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
          /* *INDENT-ON* */
 
          VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
-         if (VLIB_BUFFER_TRACE_TRAJECTORY)
-           b0->pre_data[1] = 3;
+         tcp_trajectory_add_start (b0, 3);
 
          if (PREDICT_FALSE (n_trace > 0))
            {
index a0a5f19..b16b2a7 100644 (file)
@@ -233,6 +233,8 @@ tcp_connection_reset (tcp_connection_t * tc)
       tcp_connection_cleanup (tc);
       break;
     case TCP_STATE_ESTABLISHED:
+      stream_session_reset_notify (&tc->connection);
+      /* fall through */
     case TCP_STATE_CLOSE_WAIT:
     case TCP_STATE_FIN_WAIT_1:
     case TCP_STATE_FIN_WAIT_2:
@@ -242,7 +244,6 @@ tcp_connection_reset (tcp_connection_t * tc)
 
       /* Make sure all timers are cleared */
       tcp_connection_timers_reset (tc);
-      stream_session_reset_notify (&tc->connection);
 
       /* Wait for cleanup from session layer but not forever */
       tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
@@ -1319,7 +1320,9 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
                         &tm->local_endpoints_table_buckets))
        ;
 
-
+      else if (unformat (input, "buffer-fail-fraction %f",
+                        &tm->buffer_fail_fraction))
+       ;
       else
        return clib_error_return (0, "unknown input `%U'",
                                  format_unformat_error, input);
index b057b88..1ddfac0 100644 (file)
@@ -410,6 +410,9 @@ typedef struct _tcp_main
 
   u8 punt_unknown4;
   u8 punt_unknown6;
+
+  /** fault-injection */
+  f64 buffer_fail_fraction;
 } tcp_main_t;
 
 extern tcp_main_t tcp_main;
@@ -432,6 +435,15 @@ tcp_buffer_hdr (vlib_buffer_t * b)
                           + vnet_buffer (b)->tcp.hdr_offset);
 }
 
+#if (VLIB_BUFFER_TRACE_TRAJECTORY)
+#define tcp_trajectory_add_start(b, start)                     \
+{                                                              \
+    (*vlib_buffer_trace_trajectory_cb) (b, start);             \
+}
+#else
+#define tcp_trajectory_add_start(b, start)
+#endif
+
 clib_error_t *vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en);
 
 void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
index eb318cd..5d4f7d6 100755 (executable)
@@ -22,6 +22,7 @@
 #define TCP_DEBUG_SM (0)
 #define TCP_DEBUG_CC (0)
 #define TCP_DEBUG_CC_STAT (1)
+#define TCP_DEBUG_BUFFER_ALLOCATION (0)
 
 #define foreach_tcp_dbg_evt            \
   _(INIT, "")                          \
@@ -747,6 +748,39 @@ if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now())               \
 }                                                                      \
 }
 
+/*
+ * Buffer allocation
+ */
+#if TCP_DEBUG_BUFFER_ALLOCATION
+
+#define TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL(thread_index)                  \
+{                                                                      \
+  static u32 *buffer_fail_counters;                                    \
+  if (PREDICT_FALSE (buffer_fail_counters == 0))                               \
+    {                                                                  \
+      u32 num_threads;                                                 \
+      vlib_thread_main_t *vtm = vlib_get_thread_main ();                       \
+      num_threads = 1 /* main thread */  + vtm->n_threads;             \
+      vec_validate (buffer_fail_counters, num_threads - 1);            \
+    }                                                                  \
+  if (PREDICT_FALSE (tcp_main.buffer_fail_fraction != 0.0))            \
+    {                                                                  \
+      if (PREDICT_TRUE (buffer_fail_counters[thread_index] > 0))               \
+        {                                                              \
+          if ((1.0 / (f32) (buffer_fail_counters[thread_index]))               \
+              < tcp_main.buffer_fail_fraction)                         \
+            {                                                          \
+              buffer_fail_counters[thread_index] = 0.0000001;          \
+              return -1;                                                       \
+            }                                                          \
+        }                                                              \
+      buffer_fail_counters[thread_index] ++;                           \
+    }                                                                  \
+}
+#else
+#define TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL(thread_index)
+#endif
+
 #else
 #define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
 #endif
index 614b94a..702a94f 100644 (file)
@@ -3001,6 +3001,9 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              goto done;
            }
 
+         vnet_buffer (b0)->tcp.hdr_offset = (u8 *) tcp0
+           - (u8 *) vlib_buffer_get_current (b0);
+
          /* Session exists */
          if (PREDICT_TRUE (0 != tconn))
            {
@@ -3014,8 +3017,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              vnet_buffer (b0)->tcp.ack_number =
                clib_net_to_host_u32 (tcp0->ack_number);
 
-             vnet_buffer (b0)->tcp.hdr_offset = (u8 *) tcp0
-               - (u8 *) vlib_buffer_get_current (b0);
              vnet_buffer (b0)->tcp.data_offset = n_advance_bytes0;
              vnet_buffer (b0)->tcp.data_len = n_data_bytes0;
 
index 3509ad4..f377c91 100644 (file)
@@ -463,6 +463,9 @@ tcp_get_free_buffer_index (tcp_main_t * tm, u32 * bidx)
 {
   u32 *my_tx_buffers;
   u32 thread_index = vlib_get_thread_index ();
+
+  TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL (thread_index);
+
   if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0))
     {
       if (tcp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE))
@@ -504,7 +507,7 @@ tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
   b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
   b->total_length_not_including_first_buffer = 0;
   vnet_buffer (b)->tcp.flags = 0;
-
+  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
   /* Leave enough space for headers */
   return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
 }
@@ -590,9 +593,6 @@ tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b)
                             initial_wnd);
   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
   tcp_options_write ((u8 *) (th + 1), &snd_opts);
-
-  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
-                   tc->rto * TCP_TO_TIMER_TICK);
 }
 
 /**
@@ -641,15 +641,11 @@ tcp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
   b->error = 0;
 
   /* Default FIB for now */
-  vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+  vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
 
   /* Send to IP lookup */
   next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
-  if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
-    {
-      b->pre_data[0] = 2;
-      b->pre_data[1] = next_index;
-    }
+  tcp_trajectory_add_start (b, 1);
 
   f = tm->ip_lookup_tx_frames[!is_ip4][thread_index];
   if (!f)
@@ -697,11 +693,7 @@ tcp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
 
   /* Decide where to send the packet */
   next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
-  if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
-    {
-      b->pre_data[0] = 1;
-      b->pre_data[1] = next_index;
-    }
+  tcp_trajectory_add_start (b, 2);
 
   /* Get frame to v4/6 output node */
   f = tm->tx_frames[!is_ip4][thread_index];
@@ -791,6 +783,7 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0,
     }
 
   tcp_reuse_buffer (vm, b0);
+  tcp_trajectory_add_start (b0, 4);
   th0 = vlib_buffer_push_tcp_net_order (b0, dst_port, src_port, seq, ack,
                                        sizeof (tcp_header_t), flags, 0);
 
@@ -977,6 +970,14 @@ tcp_send_syn (tcp_connection_t * tc)
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
 
+  /*
+   * Setup retransmit and establish timers before requesting buffer
+   * such that we can return if we've ran out.
+   */
+  tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
+  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
+                   tc->rto * TCP_TO_TIMER_TICK);
+
   if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
     return;
 
@@ -989,9 +990,6 @@ tcp_send_syn (tcp_connection_t * tc)
   tc->rtt_seq = tc->snd_nxt;
   tc->rto_boff = 0;
 
-  /* Set the connection establishment timer */
-  tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
-
   tcp_push_ip_hdr (tm, tc, b);
   tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
   TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc);
@@ -1055,6 +1053,7 @@ tcp_send_fin (tcp_connection_t * tc)
   u32 bi;
   u8 fin_snt = 0;
 
+  tcp_retransmit_timer_force_update (tc);
   if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
     return;
   b = vlib_get_buffer (vm, bi);
@@ -1075,7 +1074,6 @@ tcp_send_fin (tcp_connection_t * tc)
     {
       tc->snd_nxt = tc->snd_una_max;
     }
-  tcp_retransmit_timer_force_update (tc);
   TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
 }
 
@@ -1378,9 +1376,11 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
   if (tc->state >= TCP_STATE_ESTABLISHED)
     {
       /* Lost FIN, retransmit and return */
-      if (tcp_is_lost_fin (tc))
+      if (tc->state == TCP_STATE_FIN_WAIT_1)
        {
          tcp_send_fin (tc);
+         tc->rto_boff += 1;
+         tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
          return;
        }
 
@@ -1419,12 +1419,6 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
 
       if (n_bytes == 0)
        {
-         ASSERT (!b);
-         if (tc->snd_una == tc->snd_una_max)
-           return;
-         ASSERT (tc->rto_boff > 1 && tc->snd_una == tc->snd_congestion);
-         clib_warning ("retransmit fail: %U", format_tcp_connection, tc, 2);
-         /* Try again eventually */
          tcp_retransmit_timer_set (tc);
          return;
        }
@@ -1460,6 +1454,9 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
       if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
        tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
 
+      tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
+                       tc->rto * TCP_TO_TIMER_TICK);
+
       if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
        return;
 
@@ -1483,7 +1480,10 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
       tc->rtt_ts = 0;
 
       if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
-       return;
+       {
+         tcp_retransmit_timer_force_update (tc);
+         return;
+       }
 
       b = vlib_get_buffer (vm, bi);
       tcp_make_synack (tc, b);
@@ -2037,7 +2037,7 @@ tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
            }
 
          /* Prepare to send to IP lookup */
-         vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0;
+         vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
          next0 = TCP_RESET_NEXT_IP_LOOKUP;
 
        done:
index 24b5125..91812dc 100644 (file)
@@ -64,7 +64,11 @@ vnet_dump_trajectory_trace (vlib_main_t * vm, u32 bi)
 void
 init_trajectory_trace (vlib_buffer_t * b)
 {
-  vec_validate (vnet_buffer2 (b)->trajectory_trace, 7);
+  if (!clib_mem_is_vec (vnet_buffer2 (b)->trajectory_trace))
+    {
+      vnet_buffer2 (b)->trajectory_trace = 0;
+      vec_validate (vnet_buffer2 (b)->trajectory_trace, 7);
+    }
   _vec_len (vnet_buffer2 (b)->trajectory_trace) = 0;
 }