VPP-659 TCP improvements 24/5624/14
authorFlorin Coras <fcoras@cisco.com>
Sat, 4 Mar 2017 13:37:52 +0000 (05:37 -0800)
committerDamjan Marion <dmarion.lists@gmail.com>
Fri, 10 Mar 2017 19:24:57 +0000 (19:24 +0000)
- builtin test echo server
- fix SYN-ACK retransmit canceling
- avoid sending spurious ACK if in LAST_ACK
- improved client dummy test app
- renamed tx fifo dequeuing and sending functions to avoid confusion
- improved RST handling

Change-Id: Ia14aad3df319540dcf6e6a4e18a9f8d423a4b83b
Signed-off-by: Florin Coras <fcoras@cisco.com>
Signed-off-by: Dave Barach <dave@barachs.net>
22 files changed:
src/scripts/vnet/uri/afp_setup.cli [moved from src/scripts/vnet/uri/tcp_server with 100% similarity]
src/scripts/vnet/uri/dummy_app.py
src/scripts/vnet/uri/tap_setup.cli [new file with mode: 0644]
src/uri/uri_tcp_test.c
src/vnet/session/application.c
src/vnet/session/application.h
src/vnet/session/application_interface.c
src/vnet/session/application_interface.h
src/vnet/session/node.c
src/vnet/session/session.c
src/vnet/session/session.h
src/vnet/session/session_api.c
src/vnet/session/transport.h
src/vnet/tcp/builtin_server.c
src/vnet/tcp/tcp.c
src/vnet/tcp/tcp.h
src/vnet/tcp/tcp_error.def
src/vnet/tcp/tcp_input.c
src/vnet/tcp/tcp_output.c
src/vnet/udp/builtin_server.c
src/vnet/udp/udp_input.c
src/vnet/unix/tapcli.c

index b80fbb2..5033392 100644 (file)
@@ -2,7 +2,7 @@
 
 import socket
 import sys
-import bitstring
+import time
 
 # action can be reflect or drop 
 action = "drop"
@@ -22,6 +22,7 @@ def handle_connection (connection, client_address):
 def run_server(ip, port):
     print("Starting server {}:{}".format(repr(ip), repr(port)))
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
     server_address = (ip, int(port))
     sock.bind(server_address)
     sock.listen(1)
@@ -39,12 +40,31 @@ def prepare_data():
 def run_client(ip, port):
     print("Starting client {}:{}".format(repr(ip), repr(port)))
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    server_address = ("6.0.1.1", 1234)
+    server_address = (ip, port)
     sock.connect(server_address)
     
     data = prepare_data()
+    n_rcvd = 0
+    n_sent = len (data)
     try:
         sock.sendall(data)
+        
+        timeout = time.time() + 2
+        while n_rcvd < n_sent and time.time() < timeout:
+            tmp = sock.recv(1500)
+            tmp = bytearray (tmp)
+            n_read = len(tmp)
+            for i in range(n_read):
+                if (data[n_rcvd + i] != tmp[i]):
+                    print("Difference at byte {}. Sent {} got {}"
+                          .format(n_rcvd + i, data[n_rcvd + i], tmp[i]))
+            n_rcvd += n_read
+            
+        if (n_rcvd < n_sent or n_rcvd > n_sent):
+            print("Sent {} and got back {}".format(n_sent, n_rcvd))
+        else:
+            print("Got back what we've sent!!");
+            
     finally:
         sock.close()
     
@@ -62,4 +82,4 @@ if __name__ == "__main__":
     if (len(sys.argv) == 5):
         action = sys.argv[4]
 
-    run (sys.argv[1], sys.argv[2], sys.argv[3])
+    run (sys.argv[1], sys.argv[2], int(sys.argv[3]))
diff --git a/src/scripts/vnet/uri/tap_setup.cli b/src/scripts/vnet/uri/tap_setup.cli
new file mode 100644 (file)
index 0000000..1d9a1b3
--- /dev/null
@@ -0,0 +1,5 @@
+tap connect tap0 address 6.0.1.2/24
+set int ip addr tap-0 6.0.1.1/24
+set int state tap-0 up
+trace add tapcli-rx 10
+session enable
index 6c9cf1d..261fd28 100644 (file)
@@ -287,6 +287,7 @@ vl_api_reset_session_t_handler (vl_api_reset_session_t * mp)
       session = pool_elt_at_index (utm->sessions, p[0]);
       hash_unset (utm->session_index_by_vpp_handles, key);
       pool_put (utm->sessions, session);
+      utm->time_to_stop = 1;
     }
   else
     {
@@ -296,7 +297,7 @@ vl_api_reset_session_t_handler (vl_api_reset_session_t * mp)
 
   rmp = vl_msg_api_alloc (sizeof (*rmp));
   memset (rmp, 0, sizeof (*rmp));
-  rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY);
+  rmp->_vl_msg_id = ntohs (VL_API_RESET_SESSION_REPLY);
   rmp->retval = rv;
   rmp->session_index = mp->session_index;
   rmp->session_thread_index = mp->session_thread_index;
@@ -734,7 +735,7 @@ server_handle_fifo_event_rx (uri_tcp_test_main_t * utm,
            {
              rv = svm_fifo_enqueue_nowait (tx_fifo, 0, n_read, utm->rx_buf);
            }
-         while (rv == -2);
+         while (rv == -2 && !utm->time_to_stop);
 
          /* Fabricate TX event, send to vpp */
          evt.fifo = tx_fifo;
@@ -750,7 +751,7 @@ server_handle_fifo_event_rx (uri_tcp_test_main_t * utm,
       if (n_read > 0)
        bytes -= n_read;
     }
-  while (n_read < 0 || bytes > 0);
+  while ((n_read < 0 || bytes > 0) && !utm->time_to_stop);
 }
 
 void
index a542eeb..513e5fa 100644 (file)
@@ -92,6 +92,19 @@ application_del (application_t * app)
   pool_put (app_pool, app);
 }
 
+static void
+application_verify_cb_fns (application_type_t type, session_cb_vft_t * cb_fns)
+{
+  if (type == APP_SERVER && cb_fns->session_accept_callback == 0)
+    clib_warning ("No accept callback function provided");
+  if (type == APP_CLIENT && cb_fns->session_connected_callback == 0)
+    clib_warning ("No session connected callback function provided");
+  if (cb_fns->session_disconnect_callback == 0)
+    clib_warning ("No session disconnect callback function provided");
+  if (cb_fns->session_reset_callback == 0)
+    clib_warning ("No session reset callback function provided");
+}
+
 application_t *
 application_new (application_type_t type, session_type_t sst,
                 u32 api_client_index, u32 flags, session_cb_vft_t * cb_fns)
@@ -142,6 +155,9 @@ application_new (application_type_t type, session_type_t sst,
   app->flags = flags;
   app->cb_fns = *cb_fns;
 
+  /* Check that the obvious things are properly set up */
+  application_verify_cb_fns (type, cb_fns);
+
   /* Add app to lookup by api_client_index table */
   application_table_add (app);
 
index 480828f..a60a8b8 100644 (file)
@@ -45,7 +45,8 @@ typedef struct _stream_session_cb_vft
   void (*session_reset_callback) (stream_session_t * s);
 
   /* Direct RX callback, for built-in servers */
-  int (*builtin_server_rx_callback) (stream_session_t * session);
+  int (*builtin_server_rx_callback) (stream_session_t * session,
+                                    session_fifo_event_t * ep);
 
   /* Redirect connection to local server */
   int (*redirect_connect_callback) (u32 api_client_index, void *mp);
index 6ddfb70..4b30bd8 100644 (file)
@@ -98,7 +98,7 @@ vnet_bind_i (u32 api_client_index, ip46_address_t * ip46, u16 port_host_order,
 
   if (application_lookup (api_client_index))
     {
-      clib_warning ("Only one bind supported for now");
+      clib_warning ("Only one connection supported for now");
       return VNET_API_ERROR_ADDRESS_IN_USE;
     }
 
@@ -364,8 +364,7 @@ vnet_connect_uri (vnet_connect_args_t * a)
 }
 
 int
-vnet_disconnect_session (u32 client_index, u32 session_index,
-                        u32 thread_index)
+vnet_disconnect_session (u32 session_index, u32 thread_index)
 {
   stream_session_t *session;
 
index 8d87c06..a5f2b9a 100644 (file)
@@ -112,9 +112,7 @@ typedef enum
 int vnet_bind_uri (vnet_bind_args_t *);
 int vnet_unbind_uri (char *uri, u32 api_client_index);
 int vnet_connect_uri (vnet_connect_args_t * a);
-int
-vnet_disconnect_session (u32 client_index, u32 session_index,
-                        u32 thread_index);
+int vnet_disconnect_session (u32 session_index, u32 thread_index);
 
 int vnet_bind (vnet_bind_args_t * a);
 int vnet_connect (vnet_connect_args_t * a);
index 399077d..7fd7e0b 100644 (file)
@@ -78,10 +78,11 @@ static u32 session_type_to_next[] = {
 };
 
 always_inline int
-session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node,
-                  session_manager_main_t * smm, session_fifo_event_t * e0,
-                  stream_session_t * s0, u32 thread_index, int *n_tx_packets,
-                  u8 peek_data)
+session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
+                               session_manager_main_t * smm,
+                               session_fifo_event_t * e0,
+                               stream_session_t * s0, u32 thread_index,
+                               int *n_tx_packets, u8 peek_data)
 {
   u32 n_trace = vlib_get_trace_count (vm, node);
   u32 left_to_snd0, max_len_to_snd0, len_to_deq0, n_bufs, snd_space0;
@@ -120,7 +121,7 @@ session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node,
   if (peek_data)
     {
       /* Offset in rx fifo from where to peek data  */
-      rx_offset = transport_vft->rx_fifo_offset (tc0);
+      rx_offset = transport_vft->tx_fifo_offset (tc0);
     }
 
   /* TODO check if transport is willing to send len_to_snd0
@@ -194,25 +195,27 @@ session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node,
              t0->server_thread_index = s0->thread_index;
            }
 
+         len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0;
+
          /* *INDENT-OFF* */
          if (1)
            {
              ELOG_TYPE_DECLARE (e) = {
-                 .format = "evt-dequeue: id %d length %d",
-                 .format_args = "i4i4",
+                 .format = "evt-deq: id %d len %d rd %d wnd %d",
+                 .format_args = "i4i4i4i4",
              };
              struct
              {
-               u32 data[2];
+               u32 data[4];
              } *ed;
              ed = ELOG_DATA (&vm->elog_main, e);
              ed->data[0] = e0->event_id;
              ed->data[1] = e0->enqueue_length;
+             ed->data[2] = len_to_deq0;
+             ed->data[3] = left_to_snd0;
            }
          /* *INDENT-ON* */
 
-         len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0;
-
          /* Make room for headers */
          data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN);
 
@@ -276,22 +279,25 @@ dequeue_fail:
 }
 
 int
-session_fifo_rx_peek (vlib_main_t * vm, vlib_node_runtime_t * node,
-                     session_manager_main_t * smm, session_fifo_event_t * e0,
-                     stream_session_t * s0, u32 thread_index, int *n_tx_pkts)
+session_tx_fifo_peek_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node,
+                             session_manager_main_t * smm,
+                             session_fifo_event_t * e0,
+                             stream_session_t * s0, u32 thread_index,
+                             int *n_tx_pkts)
 {
-  return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts,
-                           1);
+  return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index,
+                                        n_tx_pkts, 1);
 }
 
 int
-session_fifo_rx_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node,
-                        session_manager_main_t * smm,
-                        session_fifo_event_t * e0, stream_session_t * s0,
-                        u32 thread_index, int *n_tx_pkts)
+session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node,
+                                session_manager_main_t * smm,
+                                session_fifo_event_t * e0,
+                                stream_session_t * s0, u32 thread_index,
+                                int *n_tx_pkts)
 {
-  return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts,
-                           0);
+  return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index,
+                                        n_tx_pkts, 0);
 }
 
 static uword
@@ -369,12 +375,16 @@ skip_dequeue:
 
       s0 = stream_session_get_if_valid (server_session_index0,
                                        my_thread_index);
-      if (!s0)
+
+      if (CLIB_DEBUG && !s0)
        {
-         clib_warning ("It's dead Jim!");
+         clib_warning ("It's dead, Jim!");
          continue;
        }
 
+      if (PREDICT_FALSE (s0->session_state == SESSION_STATE_CLOSED))
+       continue;
+
       ASSERT (s0->thread_index == my_thread_index);
 
       switch (e0->event_type)
index b5a168c..8867e79 100644 (file)
@@ -373,7 +373,7 @@ stream_session_lookup_transport6 (ip6_address_t * lcl, ip6_address_t * rmt,
   /* Finally, try half-open connections */
   rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6);
   if (rv == 0)
-    return tp_vfts[s->session_type].get_half_open (kv6.value & 0xFFFFFFFF);
+    return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF);
 
   return 0;
 }
@@ -617,7 +617,10 @@ again:
          goto again;
        }
       else
-       return SESSION_ERROR_NO_SPACE;
+       {
+         clib_warning ("No space to allocate fifos!");
+         return SESSION_ERROR_NO_SPACE;
+       }
     }
   return 0;
 }
@@ -806,6 +809,10 @@ stream_session_enqueue_notify (stream_session_t * s, u8 block)
   evt.event_id = serial_number++;
   evt.enqueue_length = svm_fifo_max_dequeue (s->server_rx_fifo);
 
+  /* Built-in server? Hand event to the callback... */
+  if (app->cb_fns.builtin_server_rx_callback)
+    return app->cb_fns.builtin_server_rx_callback (s, &evt);
+
   /* Add event to server's event queue */
   q = app->event_queue;
 
@@ -1043,13 +1050,9 @@ stream_session_delete (stream_session_t * s)
   session_manager_main_t *smm = vnet_get_session_manager_main ();
   svm_fifo_segment_private_t *fifo_segment;
   application_t *app;
-  int rv;
 
-  /* delete from the main lookup table */
-  rv = stream_session_table_del (smm, s);
-
-  if (rv)
-    clib_warning ("hash delete error, rv %d", rv);
+  /* Delete from the main lookup table. */
+  stream_session_table_del (smm, s);
 
   /* Cleanup fifo segments */
   fifo_segment = svm_fifo_get_segment (s->server_segment_index);
@@ -1197,18 +1200,30 @@ stream_session_open (u8 sst, ip46_address_t * addr, u16 port_host_byte_order,
 void
 stream_session_disconnect (stream_session_t * s)
 {
-  tp_vfts[s->session_type].close (s->connection_index, s->thread_index);
   s->session_state = SESSION_STATE_CLOSED;
+  tp_vfts[s->session_type].close (s->connection_index, s->thread_index);
 }
 
 /**
  * Cleanup transport and session state.
+ *
+ * Notify transport of the cleanup, wait for a delete notify to actually
+ * remove the session state.
  */
 void
 stream_session_cleanup (stream_session_t * s)
 {
+  session_manager_main_t *smm = &session_manager_main;
+  int rv;
+
+  s->session_state = SESSION_STATE_CLOSED;
+
+  /* Delete from the main lookup table to avoid more enqueues */
+  rv = stream_session_table_del (smm, s);
+  if (rv)
+    clib_warning ("hash delete error, rv %d", rv);
+
   tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index);
-  stream_session_delete (s);
 }
 
 void
@@ -1221,7 +1236,8 @@ session_register_transport (u8 type, const transport_proto_vft_t * vft)
 
   /* If an offset function is provided, then peek instead of dequeue */
   smm->session_rx_fns[type] =
-    (vft->rx_fifo_offset) ? session_fifo_rx_peek : session_fifo_rx_dequeue;
+    (vft->tx_fifo_offset) ? session_tx_fifo_peek_and_snd :
+    session_tx_fifo_dequeue_and_snd;
 }
 
 transport_proto_vft_t *
index 46e5ce2..1b712e2 100644 (file)
@@ -102,33 +102,33 @@ typedef CLIB_PACKED (struct
 
 typedef struct _stream_session_t
 {
+  /** fifo pointers. Once allocated, these do not move */
+  svm_fifo_t *server_rx_fifo;
+  svm_fifo_t *server_tx_fifo;
+
   /** Type */
   u8 session_type;
 
   /** State */
   u8 session_state;
 
+  u8 thread_index;
+
+  /** used during unbind processing */
+  u8 is_deleted;
+
+  /** To avoid n**2 "one event per frame" check */
+  u8 enqueue_epoch;
+
   /** Session index in per_thread pool */
   u32 session_index;
 
   /** Transport specific */
   u32 connection_index;
 
-  u8 thread_index;
-
   /** Application specific */
   u32 pid;
 
-  /** fifo pointers. Once allocated, these do not move */
-  svm_fifo_t *server_rx_fifo;
-  svm_fifo_t *server_tx_fifo;
-
-  /** To avoid n**2 "one event per frame" check */
-  u8 enqueue_epoch;
-
-  /** used during unbind processing */
-  u8 is_deleted;
-
   /** stream server pool index */
   u32 app_index;
 
@@ -162,8 +162,8 @@ typedef int
                        session_fifo_event_t * e0, stream_session_t * s0,
                        u32 thread_index, int *n_tx_pkts);
 
-extern session_fifo_rx_fn session_fifo_rx_peek;
-extern session_fifo_rx_fn session_fifo_rx_dequeue;
+extern session_fifo_rx_fn session_tx_fifo_peek_and_snd;
+extern session_fifo_rx_fn session_tx_fifo_dequeue_and_snd;
 
 struct _session_manager_main
 {
index 8852fc6..9c38428 100644 (file)
@@ -130,6 +130,27 @@ send_session_disconnect_uri_callback (stream_session_t * s)
   vl_msg_api_send_shmem (q, (u8 *) & mp);
 }
 
+static void
+send_session_reset_uri_callback (stream_session_t * s)
+{
+  vl_api_reset_session_t *mp;
+  unix_shared_memory_queue_t *q;
+  application_t *app = application_get (s->app_index);
+
+  q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+  if (!q)
+    return;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_RESET_SESSION);
+
+  mp->session_thread_index = s->thread_index;
+  mp->session_index = s->session_index;
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
 static int
 send_session_connected_uri_callback (u32 api_client_index,
                                     stream_session_t * s, u8 is_fail)
@@ -347,6 +368,26 @@ send_session_disconnect_callback (stream_session_t * s)
   vl_msg_api_send_shmem (q, (u8 *) & mp);
 }
 
+static void
+send_session_reset_callback (stream_session_t * s)
+{
+  vl_api_reset_sock_t *mp;
+  unix_shared_memory_queue_t *q;
+  application_t *app = application_get (s->app_index);
+
+  q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+  if (!q)
+    return;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_RESET_SOCK);
+
+  mp->handle = make_session_handle (s);
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
 /**
  * Redirect a connect_uri message to the indicated server.
  * Only sent if the server has bound the related port with
@@ -414,6 +455,7 @@ static session_cb_vft_t uri_session_cb_vft = {
   .session_accept_callback = send_session_accept_uri_callback,
   .session_disconnect_callback = send_session_disconnect_uri_callback,
   .session_connected_callback = send_session_connected_uri_callback,
+  .session_reset_callback = send_session_reset_uri_callback,
   .add_segment_callback = send_add_segment_callback,
   .redirect_connect_callback = redirect_connect_uri_callback
 };
@@ -422,6 +464,7 @@ static session_cb_vft_t session_cb_vft = {
   .session_accept_callback = send_session_accept_callback,
   .session_disconnect_callback = send_session_disconnect_callback,
   .session_connected_callback = send_session_connected_callback,
+  .session_reset_callback = send_session_reset_callback,
   .add_segment_callback = send_add_segment_callback,
   .redirect_connect_callback = redirect_connect_callback
 };
@@ -548,8 +591,8 @@ vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
 
   rv = api_session_not_valid (mp->session_index, mp->session_thread_index);
   if (!rv)
-    rv = vnet_disconnect_session (mp->client_index, mp->session_index,
-                                 mp->session_thread_index);
+    rv =
+      vnet_disconnect_session (mp->session_index, mp->session_thread_index);
 
   REPLY_MACRO (VL_API_DISCONNECT_SESSION_REPLY);
 }
@@ -572,8 +615,7 @@ vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t *
     }
 
   /* Disconnect has been confirmed. Confirm close to transport */
-  vnet_disconnect_session (mp->client_index, mp->session_index,
-                          mp->session_thread_index);
+  vnet_disconnect_session (mp->session_index, mp->session_thread_index);
 }
 
 static void
index f486dbb..0da3026 100644 (file)
@@ -74,7 +74,7 @@ typedef struct _transport_proto_vft
     u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b);
     u16 (*send_mss) (transport_connection_t * tc);
     u32 (*send_space) (transport_connection_t * tc);
-    u32 (*rx_fifo_offset) (transport_connection_t * tc);
+    u32 (*tx_fifo_offset) (transport_connection_t * tc);
 
   /*
    * Connection retrieval
@@ -92,39 +92,39 @@ typedef struct _transport_proto_vft
 
 } transport_proto_vft_t;
 
+/* *INDENT-OFF* */
 /* 16 octets */
-typedef CLIB_PACKED (struct
-                    {
-                    union
-                    {
-                    struct
-                    {
-                    ip4_address_t src; ip4_address_t dst;
-                    u16 src_port;
-                    u16 dst_port;
-                    /* align by making this 4 octets even though its a 1-bit field
-                     * NOTE: avoid key overlap with other transports that use 5 tuples for
-                     * session identification.
-                     */
-                    u32 proto;
-                    };
-                    u64 as_u64[2];
-                    };
-                    }) v4_connection_key_t;
-
-typedef CLIB_PACKED (struct
-                    {
-                    union
-                    {
-                    struct
-                    {
-                    /* 48 octets */
-                    ip6_address_t src; ip6_address_t dst;
-                    u16 src_port;
-                    u16 dst_port; u32 proto; u8 unused_for_now[8];
-                    }; u64 as_u64[6];
-                    };
-                    }) v6_connection_key_t;
+typedef CLIB_PACKED (struct {
+  union
+    {
+      struct
+       {
+         ip4_address_t src; ip4_address_t dst;
+         u16 src_port;
+         u16 dst_port;
+         /* align by making this 4 octets even though its a 1-bit field
+          * NOTE: avoid key overlap with other transports that use 5 tuples for
+          * session identification.
+          */
+         u32 proto;
+       };
+      u64 as_u64[2];
+    };
+}) v4_connection_key_t;
+
+typedef CLIB_PACKED (struct {
+  union
+    {
+      struct
+       {
+         /* 48 octets */
+         ip6_address_t src; ip6_address_t dst;
+         u16 src_port;
+         u16 dst_port; u32 proto; u8 unused_for_now[8];
+       }; u64 as_u64[6];
+    };
+}) v6_connection_key_t;
+/* *INDENT-ON* */
 
 typedef clib_bihash_kv_16_8_t session_kv4_t;
 typedef clib_bihash_kv_48_8_t session_kv6_t;
index be65642..9b697a0 100644 (file)
 #include <vnet/session/application.h>
 #include <vnet/session/application_interface.h>
 
+typedef struct
+{
+  u8 *rx_buf;
+  unix_shared_memory_queue_t **vpp_queue;
+  vlib_main_t *vlib_main;
+} builtin_server_main_t;
+
+builtin_server_main_t builtin_server_main;
+
+
 int
 builtin_session_accept_callback (stream_session_t * s)
 {
+  builtin_server_main_t *bsm = &builtin_server_main;
   clib_warning ("called...");
+
+  bsm->vpp_queue[s->thread_index] =
+    session_manager_get_vpp_event_queue (s->thread_index);
   s->session_state = SESSION_STATE_READY;
   return 0;
 }
@@ -30,8 +44,19 @@ void
 builtin_session_disconnect_callback (stream_session_t * s)
 {
   clib_warning ("called...");
+
+  vnet_disconnect_session (s->session_index, s->thread_index);
 }
 
+void
+builtin_session_reset_callback (stream_session_t * s)
+{
+  clib_warning ("called.. ");
+
+  stream_session_cleanup (s);
+}
+
+
 int
 builtin_session_connected_callback (u32 client_index,
                                    stream_session_t * s, u8 is_fail)
@@ -56,9 +81,57 @@ builtin_redirect_connect_callback (u32 client_index, void *mp)
 }
 
 int
-builtin_server_rx_callback (stream_session_t * s)
+builtin_server_rx_callback (stream_session_t * s, session_fifo_event_t * e)
 {
-  clib_warning ("called...");
+  int n_written, bytes, total_copy_bytes;
+  int n_read;
+  svm_fifo_t *tx_fifo;
+  builtin_server_main_t *bsm = &builtin_server_main;
+  session_fifo_event_t evt;
+  static int serial_number = 0;
+
+  bytes = e->enqueue_length;
+  if (PREDICT_FALSE (bytes <= 0))
+    {
+      clib_warning ("bizarre rx callback: bytes %d", bytes);
+      return 0;
+    }
+
+  tx_fifo = s->server_tx_fifo;
+
+  /* Number of bytes we're going to copy */
+  total_copy_bytes = (bytes < (tx_fifo->nitems - tx_fifo->cursize)) ? bytes :
+    tx_fifo->nitems - tx_fifo->cursize;
+
+  if (PREDICT_FALSE (total_copy_bytes <= 0))
+    {
+      clib_warning ("no space in tx fifo, event had %d bytes", bytes);
+      return 0;
+    }
+
+  vec_validate (bsm->rx_buf, total_copy_bytes - 1);
+  _vec_len (bsm->rx_buf) = total_copy_bytes;
+
+  n_read = svm_fifo_dequeue_nowait (s->server_rx_fifo, 0, total_copy_bytes,
+                                   bsm->rx_buf);
+  ASSERT (n_read == total_copy_bytes);
+
+  /*
+   * Echo back
+   */
+
+  n_written = svm_fifo_enqueue_nowait (tx_fifo, 0, n_read, bsm->rx_buf);
+  ASSERT (n_written == total_copy_bytes);
+
+  /* Fabricate TX event, send to vpp */
+  evt.fifo = tx_fifo;
+  evt.event_type = FIFO_EVENT_SERVER_TX;
+  evt.enqueue_length = total_copy_bytes;
+  evt.event_id = serial_number++;
+
+  unix_shared_memory_queue_add (bsm->vpp_queue[s->thread_index], (u8 *) & evt,
+                               0 /* do wait for mutex */ );
+
   return 0;
 }
 
@@ -68,7 +141,8 @@ static session_cb_vft_t builtin_session_cb_vft = {
   .session_connected_callback = builtin_session_connected_callback,
   .add_segment_callback = builtin_add_segment_callback,
   .redirect_connect_callback = builtin_redirect_connect_callback,
-  .builtin_server_rx_callback = builtin_server_rx_callback
+  .builtin_server_rx_callback = builtin_server_rx_callback,
+  .session_reset_callback = builtin_session_reset_callback
 };
 
 static int
@@ -77,6 +151,11 @@ server_create (vlib_main_t * vm)
   vnet_bind_args_t _a, *a = &_a;
   u64 options[SESSION_OPTIONS_N_OPTIONS];
   char segment_name[128];
+  u32 num_threads;
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+  vec_validate (builtin_server_main.vpp_queue, num_threads - 1);
 
   memset (a, 0, sizeof (*a));
   memset (options, 0, sizeof (options));
@@ -110,6 +189,7 @@ server_create_command_fn (vlib_main_t * vm,
     }
 #endif
 
+  vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
   rv = server_create (vm);
   switch (rv)
     {
@@ -121,10 +201,14 @@ server_create_command_fn (vlib_main_t * vm,
   return 0;
 }
 
+/* *INDENT-OFF* */
 VLIB_CLI_COMMAND (server_create_command, static) =
 {
-.path = "test server",.short_help = "test server",.function =
-    server_create_command_fn,};
+  .path = "test server",
+  .short_help = "test server",
+  .function = server_create_command_fn,
+};
+/* *INDENT-ON* */
 
 /*
 * fd.io coding-style-patch-verification: ON
index 69433e2..d2df5c3 100644 (file)
@@ -139,6 +139,20 @@ tcp_connection_del (tcp_connection_t * tc)
   tcp_connection_cleanup (tc);
 }
 
+/** Notify session that connection has been reset.
+ *
+ * Switch state to closed and wait for session to call cleanup.
+ */
+void
+tcp_connection_reset (tcp_connection_t * tc)
+{
+  if (tc->state == TCP_STATE_CLOSED)
+    return;
+
+  tc->state = TCP_STATE_CLOSED;
+  stream_session_reset_notify (&tc->connection);
+}
+
 /**
  * Begin connection closing procedure.
  *
@@ -149,6 +163,8 @@ tcp_connection_del (tcp_connection_t * tc)
  * calls cleanup.
  * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers
  * and cleanup is called.
+ *
+ * N.B. Half-close connections are not supported
  */
 void
 tcp_connection_close (tcp_connection_t * tc)
@@ -166,9 +182,9 @@ tcp_connection_close (tcp_connection_t * tc)
   else if (tc->state == TCP_STATE_CLOSE_WAIT)
     tc->state = TCP_STATE_LAST_ACK;
 
-  /* Half-close connections are not supported XXX */
-
-  if (tc->state == TCP_STATE_CLOSED)
+  /* If in CLOSED and WAITCLOSE timer is not set, delete connection now */
+  if (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID
+      && tc->state == TCP_STATE_CLOSED)
     tcp_connection_del (tc);
 }
 
@@ -185,7 +201,10 @@ tcp_session_cleanup (u32 conn_index, u32 thread_index)
 {
   tcp_connection_t *tc;
   tc = tcp_connection_get (conn_index, thread_index);
-  tcp_connection_cleanup (tc);
+
+  /* Wait for the session tx events to clear */
+  tc->state = TCP_STATE_CLOSED;
+  tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
 }
 
 void *
@@ -227,7 +246,8 @@ tcp_allocate_local_port (tcp_main_t * tm, ip46_address_t * ip)
 {
   transport_endpoint_t *tep;
   u32 time_now, tei;
-  u16 min = 1024, max = 65535, tries;  /* XXX configurable ? */
+  u16 min = 1024, max = 65535; /* XXX configurable ? */
+  int tries;
 
   tries = max - min;
   time_now = tcp_time_now ();
@@ -505,10 +525,10 @@ tcp_session_send_space (transport_connection_t * trans_conn)
 }
 
 u32
-tcp_session_rx_fifo_offset (transport_connection_t * trans_conn)
+tcp_session_tx_fifo_offset (transport_connection_t * trans_conn)
 {
   tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
-  return (tc->snd_una_max - tc->snd_una);
+  return (tc->snd_nxt - tc->snd_una);
 }
 
 /* *INDENT-OFF* */
@@ -524,7 +544,7 @@ const static transport_proto_vft_t tcp4_proto = {
   .cleanup = tcp_session_cleanup,
   .send_mss = tcp_session_send_mss,
   .send_space = tcp_session_send_space,
-  .rx_fifo_offset = tcp_session_rx_fifo_offset,
+  .tx_fifo_offset = tcp_session_tx_fifo_offset,
   .format_connection = format_tcp_session_ip4,
   .format_listener = format_tcp_listener_session_ip4,
   .format_half_open = format_tcp_half_open_session_ip4
@@ -542,7 +562,7 @@ const static transport_proto_vft_t tcp6_proto = {
   .cleanup = tcp_session_cleanup,
   .send_mss = tcp_session_send_mss,
   .send_space = tcp_session_send_space,
-  .rx_fifo_offset = tcp_session_rx_fifo_offset,
+  .tx_fifo_offset = tcp_session_tx_fifo_offset,
   .format_connection = format_tcp_session_ip6,
   .format_listener = format_tcp_listener_session_ip6,
   .format_half_open = format_tcp_half_open_session_ip6
@@ -579,13 +599,32 @@ tcp_timer_establish_handler (u32 conn_index)
 }
 
 void
-tcp_timer_2msl_handler (u32 conn_index)
+tcp_timer_waitclose_handler (u32 conn_index)
 {
   u32 cpu_index = os_get_cpu_number ();
   tcp_connection_t *tc;
 
   tc = tcp_connection_get (conn_index, cpu_index);
-  tc->timers[TCP_TIMER_2MSL] = TCP_TIMER_HANDLE_INVALID;
+  tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID;
+
+  /* Session didn't come back with a close(). Send FIN either way
+   * and switch to LAST_ACK. */
+  if (tc->state == TCP_STATE_CLOSE_WAIT)
+    {
+      if (tc->flags & TCP_CONN_FINSNT)
+       {
+         clib_warning ("FIN was sent and still in CLOSE WAIT. Weird!");
+       }
+
+      tcp_send_fin (tc);
+      tc->state = TCP_STATE_LAST_ACK;
+
+      /* Make sure we don't wait in LAST ACK forever */
+      tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+
+      /* Don't delete the connection yet */
+      return;
+    }
 
   tcp_connection_del (tc);
 }
@@ -597,7 +636,7 @@ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
     tcp_timer_delack_handler,
     0,
     tcp_timer_keep_handler,
-    tcp_timer_2msl_handler,
+    tcp_timer_waitclose_handler,
     tcp_timer_retransmit_syn_handler,
     tcp_timer_establish_handler
 };
index 7d44343..3b3d8fc 100644 (file)
@@ -63,8 +63,8 @@ format_function_t format_tcp_state;
   _(DELACK, "DELAYED ACK")              \
   _(PERSIST, "PERSIST")                 \
   _(KEEP, "KEEP")                       \
-  _(2MSL, "2MSL")                       \
-  _(RETRANSMIT_SYN, "RETRANSMIT_SYN")   \
+  _(WAITCLOSE, "WAIT CLOSE")            \
+  _(RETRANSMIT_SYN, "RETRANSMIT SYN")   \
   _(ESTABLISH, "ESTABLISH")
 
 typedef enum _tcp_timers
@@ -89,6 +89,8 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
 #define TCP_DELACK_TIME         1      /* 0.1s */
 #define TCP_ESTABLISH_TIME      750    /* 75s */
 #define TCP_2MSL_TIME           300    /* 30s */
+#define TCP_CLOSEWAIT_TIME     1       /* 0.1s */
+#define TCP_CLEANUP_TIME       5       /* 0.5s Time to wait before cleanup */
 
 #define TCP_RTO_MAX 60 * THZ   /* Min max RTO (60s) as per RFC6298 */
 #define TCP_RTT_MAX 30 * THZ   /* 30s (probably too much) */
@@ -102,6 +104,7 @@ void tcp_update_time (f64 now, u32 thread_index);
   _(DELACK, "Delay ACK")                        \
   _(SNDACK, "Send ACK")                         \
   _(BURSTACK, "Burst ACK set")                  \
+  _(FINSNT, "FIN sent")                                \
   _(SENT_RCV_WND0, "Sent 0 receive window")     \
   _(RECOVERY, "Recovery on")                    \
   _(FAST_RECOVERY, "Fast Recovery on")
@@ -331,6 +334,8 @@ clib_error_t *vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en);
 always_inline tcp_connection_t *
 tcp_connection_get (u32 conn_index, u32 thread_index)
 {
+  if (pool_is_free_index (tcp_main.connections[thread_index], conn_index))
+    return 0;
   return pool_elt_at_index (tcp_main.connections[thread_index], conn_index);
 }
 
@@ -347,6 +352,7 @@ tcp_connection_get_if_valid (u32 conn_index, u32 thread_index)
 void tcp_connection_close (tcp_connection_t * tc);
 void tcp_connection_cleanup (tcp_connection_t * tc);
 void tcp_connection_del (tcp_connection_t * tc);
+void tcp_connection_reset (tcp_connection_t * tc);
 
 always_inline tcp_connection_t *
 tcp_listener_get (u32 tli)
@@ -361,7 +367,7 @@ tcp_half_open_connection_get (u32 conn_index)
 }
 
 void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b);
-void tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b);
+void tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b);
 void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b);
 void tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4);
 void tcp_send_syn (tcp_connection_t * tc);
@@ -467,7 +473,7 @@ tcp_timer_set (tcp_connection_t * tc, u8 timer_id, u32 interval)
 }
 
 always_inline void
-tcp_retransmit_timer_set (tcp_main_t * tm, tcp_connection_t * tc)
+tcp_retransmit_timer_set (tcp_connection_t * tc)
 {
   /* XXX Switch to faster TW */
   tcp_timer_set (tc, TCP_TIMER_RETRANSMIT,
index cff5ec1..2dbdd9b 100644 (file)
@@ -17,13 +17,13 @@ tcp_error (NONE, "no error")
 tcp_error (NO_LISTENER, "no listener for dst port")
 tcp_error (LOOKUP_DROPS, "lookup drops")
 tcp_error (DISPATCH, "Dispatch error")
-tcp_error (ENQUEUED, "Packets pushed into rx fifo")                              
+tcp_error (ENQUEUED, "Packets pushed into rx fifo") 
 tcp_error (PURE_ACK, "Pure acks")
 tcp_error (SYNS_RCVD, "SYNs received")
 tcp_error (SYN_ACKS_RCVD, "SYN-ACKs received")
-tcp_error (NOT_READY, "Session not ready for packets")                               
-tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space")               
-tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space")      
+tcp_error (NOT_READY, "Session not ready for packets") 
+tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space") 
+tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") 
 tcp_error (API_QUEUE_FULL, "Sessions not created for lack of API queue space")
 tcp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated")
 tcp_error (SEGMENT_INVALID, "Invalid segment")
@@ -32,4 +32,5 @@ tcp_error (ACK_DUP, "Duplicate ACK")
 tcp_error (ACK_OLD, "Old ACK")
 tcp_error (PKTS_SENT, "Packets sent")
 tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs")
-tcp_error (RST_SENT, "Resets sent")
\ No newline at end of file
+tcp_error (RST_SENT, "Resets sent")
+tcp_error (INVALID_CONNECTION, "Invalid connection")
index 0a907d0..f19fbf8 100644 (file)
@@ -274,10 +274,7 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0,
   /* 2nd: check the RST bit */
   if (tcp_rst (th0))
     {
-      /* Notify session that connection has been reset. Switch
-       * state to closed and await for session to do the cleanup. */
-      stream_session_reset_notify (&tc0->connection);
-      tc0->state = TCP_STATE_CLOSED;
+      tcp_connection_reset (tc0);
       return -1;
     }
 
@@ -1023,6 +1020,12 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
                                    my_thread_index);
 
+         if (PREDICT_FALSE (tc0 == 0))
+           {
+             error0 = TCP_ERROR_INVALID_CONNECTION;
+             goto drop;
+           }
+
          /* Checksum computed by ipx_local no need to compute again */
 
          if (is_ip4)
@@ -1072,12 +1075,12 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          /* 8: check the FIN bit */
          if (tcp_fin (th0))
            {
-             /* Send ACK and enter CLOSE-WAIT */
-             tcp_make_ack (tc0, b0);
-             tcp_connection_force_ack (tc0, b0);
-             next0 = tcp_next_output (tc0->c_is_ip4);
+             /* Enter CLOSE-WAIT and notify session. Don't send ACK, instead
+              * wait for session to call close. To avoid lingering
+              * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
              tc0->state = TCP_STATE_CLOSE_WAIT;
              stream_session_disconnect_notify (&tc0->connection);
+             tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
            }
 
        drop:
@@ -1468,7 +1471,7 @@ VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
 
 VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv);
 /**
- * Handles reception for all states except LISTEN, SYN-SEND and ESTABLISHED
+ * Handles reception for all states except LISTEN, SYN-SENT and ESTABLISHED
  * as per RFC793 p. 64
  */
 always_inline uword
@@ -1511,6 +1514,11 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          b0 = vlib_get_buffer (vm, bi0);
          tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
                                    my_thread_index);
+         if (PREDICT_FALSE (tc0 == 0))
+           {
+             error0 = TCP_ERROR_INVALID_CONNECTION;
+             goto drop;
+           }
 
          /* Checksum computed by ipx_local no need to compute again */
 
@@ -1587,7 +1595,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              /* Shoulder tap the server */
              stream_session_accept_notify (&tc0->connection);
 
-             tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN);
+             /* Reset SYN-ACK retransmit timer */
+             tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT);
              break;
            case TCP_STATE_ESTABLISHED:
              /* We can get packets in established state here because they
@@ -1602,9 +1611,14 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
               * continue processing in that state. */
              if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
                goto drop;
-             tc0->state = TCP_STATE_FIN_WAIT_2;
-             /* Stop all timers, 2MSL will be set lower */
-             tcp_connection_timers_reset (tc0);
+
+             /* If FIN is ACKed */
+             if (tc0->snd_una == tc0->snd_una_max)
+               {
+                 tc0->state = TCP_STATE_FIN_WAIT_2;
+                 /* Stop all timers, 2MSL will be set lower */
+                 tcp_connection_timers_reset (tc0);
+               }
              break;
            case TCP_STATE_FIN_WAIT_2:
              /* In addition to the processing for the ESTABLISHED state, if
@@ -1639,7 +1653,17 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              if (!tcp_rcv_ack_is_acceptable (tc0, b0))
                goto drop;
 
-             tcp_connection_del (tc0);
+             tc0->state = TCP_STATE_CLOSED;
+
+             /* Don't delete the connection/session yet. Instead, wait a
+              * reasonable amount of time until the pipes are cleared. In
+              * particular, this makes sure that we won't have dead sessions
+              * when processing events on the tx path */
+             tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+
+             /* Stop retransmit */
+             tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT);
+
              goto drop;
 
              break;
@@ -1684,7 +1708,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
            case TCP_STATE_SYN_RCVD:
              /* Send FIN-ACK notify app and enter CLOSE-WAIT */
              tcp_connection_timers_reset (tc0);
-             tcp_make_finack (tc0, b0);
+             tcp_make_fin (tc0, b0);
              next0 = tcp_next_output (tc0->c_is_ip4);
              stream_session_disconnect_notify (&tc0->connection);
              tc0->state = TCP_STATE_CLOSE_WAIT;
@@ -1697,12 +1721,12 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
            case TCP_STATE_FIN_WAIT_1:
              tc0->state = TCP_STATE_TIME_WAIT;
              tcp_connection_timers_reset (tc0);
-             tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME);
+             tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
              break;
            case TCP_STATE_FIN_WAIT_2:
              /* Got FIN, send ACK! */
              tc0->state = TCP_STATE_TIME_WAIT;
-             tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME);
+             tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
              tcp_make_ack (tc0, b0);
              next0 = tcp_next_output (is_ip4);
              break;
@@ -1710,7 +1734,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              /* Remain in the TIME-WAIT state. Restart the 2 MSL time-wait
               * timeout.
               */
-             tcp_timer_update (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME);
+             tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
              break;
            }
 
@@ -2113,6 +2137,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          n_left_to_next -= 1;
 
          b0 = vlib_get_buffer (vm, bi0);
+         vnet_buffer (b0)->tcp.flags = 0;
 
          if (is_ip4)
            {
@@ -2168,7 +2193,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              /* Send reset */
              next0 = TCP_INPUT_NEXT_RESET;
              error0 = TCP_ERROR_NO_LISTENER;
-             vnet_buffer (b0)->tcp.flags = 0;
            }
 
          b0->error = error0 ? node->errors[error0] : 0;
@@ -2288,6 +2312,7 @@ do {                                                              \
   _(ESTABLISHED, TCP_FLAG_FIN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
   _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
     TCP_ERROR_NONE);
+  _(ESTABLISHED, TCP_FLAG_RST, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
   /* ACK or FIN-ACK to our FIN */
   _(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
   _(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS,
index 7e431cd..aa43e9f 100644 (file)
@@ -396,6 +396,7 @@ tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
 
   /* Leave enough space for headers */
   vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+  vnet_buffer (b)->tcp.flags = 0;
 }
 
 /**
@@ -443,16 +444,22 @@ tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b)
  * Convert buffer to FIN-ACK
  */
 void
-tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b)
+tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b)
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = tm->vlib_main;
+  u8 flags = 0;
 
   tcp_reuse_buffer (vm, b);
-  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK | TCP_FLAG_FIN);
+
+  if (tc->rcv_las == tc->rcv_nxt)
+    flags = TCP_FLAG_FIN;
+  else
+    flags = TCP_FLAG_FIN | TCP_FLAG_ACK;
+
+  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, flags);
 
   /* Reset flags, make sure ack is sent */
-  tc->flags = TCP_CONN_SNDACK;
   vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
 
   tc->snd_nxt += 1;
@@ -500,7 +507,7 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
   vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
 
   /* Init retransmit timer */
-  tcp_retransmit_timer_set (tm, tc);
+  tcp_retransmit_timer_set (tc);
 }
 
 always_inline void
@@ -818,9 +825,9 @@ tcp_send_fin (tcp_connection_t * tc)
   /* Leave enough space for headers */
   vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
 
-  tcp_make_finack (tc, b);
-
+  tcp_make_fin (tc, b);
   tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+  tc->flags |= TCP_CONN_FINSNT;
 }
 
 always_inline u8
@@ -1038,7 +1045,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
       tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
 
       /* Re-enable retransmit timer */
-      tcp_retransmit_timer_set (tm, tc);
+      tcp_retransmit_timer_set (tc);
     }
   else
     {
@@ -1139,7 +1146,6 @@ tcp46_output_inline (vlib_main_t * vm,
                     vlib_node_runtime_t * node,
                     vlib_frame_t * from_frame, int is_ip4)
 {
-  tcp_main_t *tm = vnet_get_tcp_main ();
   u32 n_left_from, next_index, *from, *to_next;
   u32 my_thread_index = vm->cpu_index;
 
@@ -1172,6 +1178,13 @@ tcp46_output_inline (vlib_main_t * vm,
          b0 = vlib_get_buffer (vm, bi0);
          tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
                                    my_thread_index);
+         if (PREDICT_FALSE (tc0 == 0 || tc0->state == TCP_STATE_CLOSED))
+           {
+             error0 = TCP_ERROR_INVALID_CONNECTION;
+             next0 = TCP_OUTPUT_NEXT_DROP;
+             goto done;
+           }
+
          th0 = vlib_buffer_get_current (b0);
 
          if (is_ip4)
@@ -1229,6 +1242,22 @@ tcp46_output_inline (vlib_main_t * vm,
                  tc0->rtt_ts = tcp_time_now ();
                  tc0->rtt_seq = tc0->snd_nxt;
                }
+
+             if (1)
+               {
+                 ELOG_TYPE_DECLARE (e) =
+                 {
+                 .format =
+                     "output: snd_una %u snd_una_max %u",.format_args =
+                     "i4i4",};
+                 struct
+                 {
+                   u32 data[2];
+                 } *ed;
+                 ed = ELOG_DATA (&vm->elog_main, e);
+                 ed->data[0] = tc0->snd_una - tc0->iss;
+                 ed->data[1] = tc0->snd_una_max - tc0->iss;
+               }
            }
 
          /* Set the retransmit timer if not set already and not
@@ -1236,7 +1265,7 @@ tcp46_output_inline (vlib_main_t * vm,
          if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT)
              && tc0->snd_nxt != tc0->snd_una)
            {
-             tcp_retransmit_timer_set (tm, tc0);
+             tcp_retransmit_timer_set (tc0);
              tc0->rto_boff = 0;
            }
 
index afa66ba..46c8e73 100644 (file)
@@ -39,7 +39,7 @@ builtin_session_disconnect_callback (stream_session_t * s)
 }
 
 static int
-builtin_server_rx_callback (stream_session_t * s)
+builtin_server_rx_callback (stream_session_t * s, session_fifo_event_t * ep)
 {
   svm_fifo_t *rx_fifo, *tx_fifo;
   u32 this_transfer;
index 4d50933..8827873 100644 (file)
@@ -244,19 +244,19 @@ udp4_uri_input_node_fn (vlib_main_t * vm,
       /* Get session's server */
       server0 = application_get (s0->app_index);
 
-      /* Built-in server? Deliver the goods... */
-      if (server0->cb_fns.builtin_server_rx_callback)
-       {
-         server0->cb_fns.builtin_server_rx_callback (s0);
-         continue;
-       }
-
       /* Fabricate event */
       evt.fifo = s0->server_rx_fifo;
       evt.event_type = FIFO_EVENT_SERVER_RX;
       evt.event_id = serial_number++;
       evt.enqueue_length = svm_fifo_max_dequeue (s0->server_rx_fifo);
 
+      /* Built-in server? Deliver the goods... */
+      if (server0->cb_fns.builtin_server_rx_callback)
+       {
+         server0->cb_fns.builtin_server_rx_callback (s0, &evt);
+         continue;
+       }
+
       /* Add event to server's event queue */
       q = server0->event_queue;
 
index 496f388..fb1a8ba 100644 (file)
@@ -1435,7 +1435,8 @@ done:
 
 VLIB_CLI_COMMAND (tap_connect_command, static) = {
     .path = "tap connect",
-    .short_help = "tap connect <intfc-name> [hwaddr <addr>]",
+    .short_help =
+       "tap connect <intfc-name> [address <ip-addr>/mw] [hwaddr <addr>]",
     .function = tap_connect_command_fn,
 };