tls: fix connection failures/interrupts at scale (VPP-1464) 53/15353/5
authorFlorin Coras <fcoras@cisco.com>
Wed, 17 Oct 2018 21:53:11 +0000 (14:53 -0700)
committerFlorin Coras <florin.coras@gmail.com>
Thu, 18 Oct 2018 18:47:20 +0000 (18:47 +0000)
Change-Id: I0bc4062c1fd3202ee201acb36a2bb14fc6ee1543
Signed-off-by: Florin Coras <fcoras@cisco.com>
src/plugins/unittest/tcp_test.c
src/vnet/session-apps/echo_client.c
src/vnet/session/session.c
src/vnet/tcp/tcp_input.c
src/vnet/tls/tls.c

index 33a7fd6..c19d0f0 100644 (file)
@@ -1559,13 +1559,14 @@ tcp_test_lookup (vlib_main_t * vm, unformat_input_t * input)
   tcp_connection_t *tc;
   stream_session_t *s, *s1;
   u8 cmp = 0, is_filtered = 0;
+  u32 sidx;
 
   /*
    * Allocate fake session and connection 1
    */
   pool_get (smm->sessions[0], s);
   memset (s, 0, sizeof (*s));
-  s->session_index = s - smm->sessions[0];
+  s->session_index = sidx = s - smm->sessions[0];
 
   pool_get (tm->connections[0], tc);
   memset (tc, 0, sizeof (*tc));
@@ -1606,7 +1607,7 @@ tcp_test_lookup (vlib_main_t * vm, unformat_input_t * input)
    * Confirm that connection lookup works
    */
 
-  s1 = pool_elt_at_index (smm->sessions[0], 0);
+  s1 = pool_elt_at_index (smm->sessions[0], sidx);
   session_lookup_add_connection (tc1, session_handle (s1));
   tconn = session_lookup_connection_wt4 (0, &tc1->lcl_ip.ip4,
                                         &tc1->rmt_ip.ip4,
index 1fd7ad0..1ece019 100644 (file)
@@ -352,16 +352,6 @@ echo_clients_init (vlib_main_t * vm)
   return 0;
 }
 
-static void
-echo_clients_session_disconnect (stream_session_t * s)
-{
-  echo_client_main_t *ecm = &echo_client_main;
-  vnet_disconnect_args_t _a, *a = &_a;
-  a->handle = session_handle (s);
-  a->app_index = ecm->app_index;
-  vnet_disconnect_session (a);
-}
-
 static int
 echo_clients_session_connected_callback (u32 app_index, u32 api_context,
                                         stream_session_t * s, u8 is_fail)
@@ -377,6 +367,7 @@ echo_clients_session_connected_callback (u32 app_index, u32 api_context,
   if (is_fail)
     {
       clib_warning ("connection %d failed!", api_context);
+      ecm->run_test = ECHO_CLIENTS_EXITING;
       signal_evt_to_cli (-1);
       return 0;
     }
@@ -454,6 +445,16 @@ echo_clients_session_disconnect_callback (stream_session_t * s)
   return;
 }
 
+void
+echo_clients_session_disconnect (stream_session_t * s)
+{
+  echo_client_main_t *ecm = &echo_client_main;
+  vnet_disconnect_args_t _a, *a = &_a;
+  a->handle = session_handle (s);
+  a->app_index = ecm->app_index;
+  vnet_disconnect_session (a);
+}
+
 static int
 echo_clients_rx_callback (stream_session_t * s)
 {
index 23d258f..81c9306 100644 (file)
@@ -814,18 +814,22 @@ stream_session_delete_notify (transport_connection_t * tc)
        * from the app, do the whole disconnect since we might still
        * have lingering events */
       stream_session_disconnect (s);
+      s->session_state = SESSION_STATE_CLOSED;
       break;
     case SESSION_STATE_CLOSING:
       /* Cleanup lookup table. Transport needs to still be valid */
       session_lookup_del_session (s);
+      s->session_state = SESSION_STATE_CLOSED;
       break;
     case SESSION_STATE_CLOSED:
     case SESSION_STATE_ACCEPTING:
       stream_session_delete (s);
       break;
+    default:
+      /* Assume connection was not yet added the lookup table */
+      session_free_w_fifos (s);
+      break;
     }
-
-  s->session_state = SESSION_STATE_CLOSED;
 }
 
 /**
index 85aaa16..d03388e 100644 (file)
@@ -2675,9 +2675,12 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
              tc0->state = TCP_STATE_CLOSED;
              TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
-             /* Delete the connection/session since the pipes should be
-              * clear by now */
-             tcp_connection_del (tc0);
+
+             /* Don't free the connection from the data path since
+              * we can't ensure that we have no packets already enqueued
+              * to output. Rely instead on the waitclose timer */
+             tcp_connection_timers_reset (tc0);
+             tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, 1);
 
              goto drop;
 
@@ -3489,6 +3492,7 @@ do {                                                              \
   _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
     TCP_ERROR_NONE);
   _(LAST_ACK, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  _(LAST_ACK, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
   _(TIME_WAIT, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
   _(TIME_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
     TCP_ERROR_NONE);
index 9a82360..f4814a3 100644 (file)
@@ -26,6 +26,18 @@ static tls_engine_vft_t *tls_vfts;
 
 void tls_disconnect (u32 ctx_handle, u32 thread_index);
 
+static void
+tls_disconnect_transport (tls_ctx_t * ctx)
+{
+  vnet_disconnect_args_t a = {
+    .handle = ctx->tls_session_handle,
+    .app_index = tls_main.app_index,
+  };
+
+  if (vnet_disconnect_session (&a))
+    clib_warning ("disconnect returned");
+}
+
 tls_engine_type_t
 tls_get_available_engine (void)
 {
@@ -91,6 +103,8 @@ tls_listener_ctx_alloc (void)
 void
 tls_listener_ctx_free (tls_ctx_t * ctx)
 {
+  if (CLIB_DEBUG)
+    memset (ctx, 0xfb, sizeof (*ctx));
   pool_put (tls_main.listener_ctx_pool, ctx);
 }
 
@@ -226,7 +240,7 @@ tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed)
   app_wrk = app_worker_get_if_valid (ctx->parent_app_index);
   if (!app_wrk)
     {
-      tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
+      tls_disconnect_transport (ctx);
       return -1;
     }
 
@@ -249,16 +263,16 @@ tls_notify_app_connected (tls_ctx_t * ctx, u8 is_failed)
 
   ctx->app_session_handle = session_handle (app_session);
   ctx->c_s_index = app_session->session_index;
-  app_session->session_state = SESSION_STATE_READY;
+  app_session->session_state = SESSION_STATE_CONNECTING;
   if (cb_fn (ctx->parent_app_index, ctx->parent_app_api_context,
             app_session, 0 /* not failed */ ))
     {
       TLS_DBG (1, "failed to notify app");
       tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
-      session_free_w_fifos (app_session);
       return -1;
     }
 
+  app_session->session_state = SESSION_STATE_READY;
   session_lookup_add_connection (&ctx->connection,
                                 session_handle (app_session));
 
@@ -553,15 +567,7 @@ tls_disconnect (u32 ctx_handle, u32 thread_index)
   TLS_DBG (1, "Disconnecting %x", ctx_handle);
 
   ctx = tls_ctx_get (ctx_handle);
-
-  vnet_disconnect_args_t a = {
-    .handle = ctx->tls_session_handle,
-    .app_index = tls_main.app_index,
-  };
-
-  if (vnet_disconnect_session (&a))
-    clib_warning ("disconnect returned");
-
+  tls_disconnect_transport (ctx);
   stream_session_delete_notify (&ctx->connection);
   tls_ctx_free (ctx);
 }