Add extern to *_main global variable declarations in header files.
[vpp.git] / src / vnet / tcp / builtin_client.c
index 9e8e156..76c5c12 100644 (file)
 
 #include <vlibapi/api.h>
 #include <vlibmemory/api.h>
-#include <vlibsocket/api.h>
 #include <vpp/app/version.h>
 
-/* define message IDs */
-#include <vpp/api/vpe_msg_enum.h>
+tclient_main_t tclient_main;
 
-/* define message structures */
-#define vl_typedefs
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
+#define TCP_BUILTIN_CLIENT_DBG (0)
 
-/* define generated endian-swappers */
-#define vl_endianfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_endianfun
-
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define vl_printfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_printfun
+static void
+signal_evt_to_cli_i (int *code)
+{
+  tclient_main_t *tm = &tclient_main;
+  ASSERT (vlib_get_thread_index () == 0);
+  vlib_process_signal_event (tm->vlib_main, tm->cli_node_index, *code, 0);
+}
 
-#define TCP_BUILTIN_CLIENT_DBG (1)
-#define TCP_BUILTIN_CLIENT_VPP_THREAD (0)
-#define TCP_BUILTIN_CLIENT_PTHREAD (!TCP_BUILTIN_CLIENT_VPP_THREAD)
+static void
+signal_evt_to_cli (int code)
+{
+  if (vlib_get_thread_index () != 0)
+    vl_api_rpc_call_main_thread (signal_evt_to_cli_i, (u8 *) & code,
+                                sizeof (code));
+  else
+    signal_evt_to_cli_i (&code);
+}
 
 static void
 send_test_chunk (tclient_main_t * tm, session_t * s)
@@ -54,52 +52,58 @@ send_test_chunk (tclient_main_t * tm, session_t * s)
   int test_buf_offset;
   u32 bytes_this_chunk;
   session_fifo_event_t evt;
-  static int serial_number = 0;
+  svm_fifo_t *txf;
   int rv;
+
+  ASSERT (vec_len (test_data) > 0);
+
   test_buf_offset = s->bytes_sent % vec_len (test_data);
   bytes_this_chunk = vec_len (test_data) - test_buf_offset;
 
   bytes_this_chunk = bytes_this_chunk < s->bytes_to_send
     ? bytes_this_chunk : s->bytes_to_send;
 
-  rv = svm_fifo_enqueue_nowait (s->server_tx_fifo, 0 /*pid */ ,
-                               bytes_this_chunk,
+  txf = s->server_tx_fifo;
+  rv = svm_fifo_enqueue_nowait (txf, bytes_this_chunk,
                                test_data + test_buf_offset);
 
   /* If we managed to enqueue data... */
   if (rv > 0)
     {
+      /* Account for it... */
+      s->bytes_to_send -= rv;
+      s->bytes_sent += rv;
+
       if (TCP_BUILTIN_CLIENT_DBG)
        {
           /* *INDENT-OFF* */
           ELOG_TYPE_DECLARE (e) =
             {
-              .format = "tx-enq: %d bytes",
-              .format_args = "i4",
+              .format = "tx-enq: xfer %d bytes, sent %u remain %u",
+              .format_args = "i4i4i4",
             };
           /* *INDENT-ON* */
          struct
          {
-           u32 data[1];
+           u32 data[3];
          } *ed;
          ed = ELOG_DATA (&vlib_global_main.elog_main, e);
          ed->data[0] = rv;
+         ed->data[1] = s->bytes_sent;
+         ed->data[2] = s->bytes_to_send;
        }
 
-      /* Account for it... */
-      s->bytes_to_send -= rv;
-      s->bytes_sent += rv;
-
-      /* Poke the TCP state machine */
-      if (svm_fifo_set_event (s->server_tx_fifo))
+      /* Poke the session layer */
+      if (svm_fifo_set_event (txf))
        {
          /* Fabricate TX event, send to vpp */
-         evt.fifo = s->server_tx_fifo;
-         evt.event_type = FIFO_EVENT_SERVER_TX;
-         evt.event_id = serial_number++;
+         evt.fifo = txf;
+         evt.event_type = FIFO_EVENT_APP_TX;
 
-         unix_shared_memory_queue_add (tm->vpp_event_queue, (u8 *) & evt,
-                                       0 /* do wait for mutex */ );
+         if (unix_shared_memory_queue_add
+             (tm->vpp_event_queue[txf->master_thread_index], (u8 *) & evt,
+              0 /* do wait for mutex */ ))
+           clib_warning ("could not enqueue event");
        }
     }
 }
@@ -109,12 +113,23 @@ receive_test_chunk (tclient_main_t * tm, session_t * s)
 {
   svm_fifo_t *rx_fifo = s->server_rx_fifo;
   int n_read, test_bytes = 0;
+  u32 my_thread_index = vlib_get_thread_index ();
 
   /* Allow enqueuing of new event */
   // svm_fifo_unset_event (rx_fifo);
 
-  n_read = svm_fifo_dequeue_nowait (rx_fifo, 0, vec_len (tm->rx_buf),
-                                   tm->rx_buf);
+  if (test_bytes)
+    {
+      n_read = svm_fifo_dequeue_nowait (rx_fifo,
+                                       vec_len (tm->rx_buf[my_thread_index]),
+                                       tm->rx_buf[my_thread_index]);
+    }
+  else
+    {
+      n_read = svm_fifo_max_dequeue (rx_fifo);
+      svm_fifo_dequeue_drop (rx_fifo, n_read);
+    }
+
   if (n_read > 0)
     {
       if (TCP_BUILTIN_CLIENT_DBG)
@@ -139,10 +154,12 @@ receive_test_chunk (tclient_main_t * tm, session_t * s)
          int i;
          for (i = 0; i < n_read; i++)
            {
-             if (tm->rx_buf[i] != ((s->bytes_received + i) & 0xff))
+             if (tm->rx_buf[my_thread_index][i]
+                 != ((s->bytes_received + i) & 0xff))
                {
                  clib_warning ("read %d error at byte %lld, 0x%x not 0x%x",
-                               n_read, s->bytes_received + i, tm->rx_buf[i],
+                               n_read, s->bytes_received + i,
+                               tm->rx_buf[my_thread_index][i],
                                ((s->bytes_received + i) & 0xff));
                }
            }
@@ -152,315 +169,626 @@ receive_test_chunk (tclient_main_t * tm, session_t * s)
     }
 }
 
-#if TCP_BUILTIN_CLIENT_VPP_THREAD
-static void
-#else
-static void *
-#endif
-tclient_thread_fn (void *arg)
+static uword
+builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+                       vlib_frame_t * frame)
 {
   tclient_main_t *tm = &tclient_main;
-  vl_api_disconnect_session_t *dmp;
+  int my_thread_index = vlib_get_thread_index ();
   session_t *sp;
-  struct timespec ts, tsrem;
   int i;
-  int try_tx, try_rx;
-  u32 *session_indices = 0;
-
-  /* stats thread wants no signals. */
-  {
-    sigset_t s;
-    sigfillset (&s);
-    pthread_sigmask (SIG_SETMASK, &s, 0);
-  }
-
-  clib_per_cpu_mheaps[vlib_get_thread_index ()] = clib_per_cpu_mheaps[0];
+  int delete_session;
+  u32 *connection_indices;
+  u32 *connections_this_batch;
+  u32 nconnections_this_batch;
+
+  connection_indices = tm->connection_index_by_thread[my_thread_index];
+  connections_this_batch =
+    tm->connections_this_batch_by_thread[my_thread_index];
+
+  if ((tm->run_test == 0) ||
+      ((vec_len (connection_indices) == 0)
+       && vec_len (connections_this_batch) == 0))
+    return 0;
+
+  /* Grab another pile of connections */
+  if (PREDICT_FALSE (vec_len (connections_this_batch) == 0))
+    {
+      nconnections_this_batch =
+       clib_min (tm->connections_per_batch, vec_len (connection_indices));
+
+      ASSERT (nconnections_this_batch > 0);
+      vec_validate (connections_this_batch, nconnections_this_batch - 1);
+      clib_memcpy (connections_this_batch,
+                  connection_indices + vec_len (connection_indices)
+                  - nconnections_this_batch,
+                  nconnections_this_batch * sizeof (u32));
+      _vec_len (connection_indices) -= nconnections_this_batch;
+    }
 
-  while (1)
+  if (PREDICT_FALSE (tm->prev_conns != tm->connections_per_batch
+                    && tm->prev_conns == vec_len (connections_this_batch)))
     {
-      /* Wait until we're told to get busy */
-      while (tm->run_test == 0
-            || (tm->ready_connections != tm->expected_connections))
+      tm->repeats++;
+      tm->prev_conns = vec_len (connections_this_batch);
+      if (tm->repeats == 500000)
        {
-         ts.tv_sec = 0;
-         ts.tv_nsec = 100000000;
-         while (nanosleep (&ts, &tsrem) < 0)
-           ts = tsrem;
+         clib_warning ("stuck clients");
        }
-      tm->run_test = 0;
+    }
+  else
+    {
+      tm->prev_conns = vec_len (connections_this_batch);
+      tm->repeats = 0;
+    }
+
+  for (i = 0; i < vec_len (connections_this_batch); i++)
+    {
+      delete_session = 1;
 
-      clib_warning ("Run %d iterations", tm->n_iterations);
+      sp = pool_elt_at_index (tm->sessions, connections_this_batch[i]);
 
-      for (i = 0; i < tm->n_iterations; i++)
+      if (sp->bytes_to_send > 0)
+       {
+         send_test_chunk (tm, sp);
+         delete_session = 0;
+       }
+      if (sp->bytes_to_receive > 0)
        {
-         session_t *sp;
+         receive_test_chunk (tm, sp);
+         delete_session = 0;
+       }
+      if (PREDICT_FALSE (delete_session == 1))
+       {
+         u32 index, thread_index;
+         stream_session_t *s;
+
+         __sync_fetch_and_add (&tm->tx_total, sp->bytes_sent);
+         __sync_fetch_and_add (&tm->rx_total, sp->bytes_received);
+
+         session_parse_handle (sp->vpp_session_handle,
+                               &index, &thread_index);
+         s = session_get_if_valid (index, thread_index);
 
-         do
+         if (s)
            {
-             try_tx = try_rx = 0;
-
-             /* *INDENT-OFF* */
-             pool_foreach (sp, tm->sessions, ({
-                if (sp->bytes_to_send > 0)
-                  {
-                    send_test_chunk (tm, sp);
-                    try_tx = 1;
-                  }
-             }));
-             pool_foreach (sp, tm->sessions, ({
-               if (sp->bytes_to_receive > 0)
-                  {
-                    receive_test_chunk (tm, sp);
-                    try_rx = 1;
-                  }
-              }));
-             /* *INDENT-ON* */
+             vnet_disconnect_args_t _a, *a = &_a;
+             a->handle = session_handle (s);
+             a->app_index = tm->app_index;
+             vnet_disconnect_session (a);
+
+             vec_delete (connections_this_batch, 1, i);
+             i--;
+             __sync_fetch_and_add (&tm->ready_connections, -1);
+           }
+         else
+           clib_warning ("session AWOL?");
 
+         /* Kick the debug CLI process */
+         if (tm->ready_connections == 0)
+           {
+             signal_evt_to_cli (2);
            }
-         while (try_tx || try_rx);
        }
-      clib_warning ("Done %d iterations", tm->n_iterations);
+    }
 
-      /* Disconnect sessions... */
-      vec_reset_length (session_indices);
+  tm->connection_index_by_thread[my_thread_index] = connection_indices;
+  tm->connections_this_batch_by_thread[my_thread_index] =
+    connections_this_batch;
+  return 0;
+}
 
-      /* *INDENT-OFF* */
-      pool_foreach (sp, tm->sessions, ({
-       vec_add1 (session_indices, sp - tm->sessions);
-      }));
-      /* *INDENT-ON* */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (builtin_client_node) =
+{
+  .function = builtin_client_node_fn,
+  .name = "builtin-tcp-client",
+  .type = VLIB_NODE_TYPE_INPUT,
+  .state = VLIB_NODE_STATE_DISABLED,
+};
+/* *INDENT-ON* */
 
-      for (i = 0; i < vec_len (session_indices); i++)
-       {
-         sp = pool_elt_at_index (tm->sessions, session_indices[i]);
-         dmp = vl_msg_api_alloc_as_if_client (sizeof (*dmp));
-         memset (dmp, 0, sizeof (*dmp));
-         dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
-         dmp->client_index = tm->my_client_index;
-         dmp->session_index = sp->vpp_session_index;
-         dmp->session_thread_index = sp->vpp_session_thread;
-         vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & dmp);
-         pool_put (tm->sessions, sp);
-       }
-    }
-  /* NOTREACHED */
-#if TCP_BUILTIN_CLIENT_PTHREAD
+static int
+create_api_loopback (tclient_main_t * tm)
+{
+  api_main_t *am = &api_main;
+  vl_shmem_hdr_t *shmem_hdr;
+
+  shmem_hdr = am->shmem_hdr;
+  tm->vl_input_queue = shmem_hdr->vl_input_queue;
+  tm->my_client_index =
+    vl_api_memclnt_create_internal ("tcp_test_client", tm->vl_input_queue);
   return 0;
-#endif
 }
 
-/* So we don't get "no handler for... " msgs */
-static void
-vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
+static int
+tcp_test_clients_init (vlib_main_t * vm)
 {
   tclient_main_t *tm = &tclient_main;
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  u32 num_threads;
+  int i;
+
+  if (create_api_loopback (tm))
+    return -1;
+
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+
+  /* Init test data. Big buffer */
+  vec_validate (tm->connect_test_data, 1024 * 1024 - 1);
+  for (i = 0; i < vec_len (tm->connect_test_data); i++)
+    tm->connect_test_data[i] = i & 0xff;
+
+  vec_validate (tm->rx_buf, num_threads - 1);
+  for (i = 0; i < num_threads; i++)
+    vec_validate (tm->rx_buf[i], vec_len (tm->connect_test_data) - 1);
 
-  tm->my_client_index = mp->index;
+  tm->is_init = 1;
+
+  vec_validate (tm->connection_index_by_thread, vtm->n_vlib_mains);
+  vec_validate (tm->connections_this_batch_by_thread, vtm->n_vlib_mains);
+  vec_validate (tm->vpp_event_queue, vtm->n_vlib_mains);
+
+  return 0;
 }
 
-static void
-vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
+static int
+builtin_session_connected_callback (u32 app_index, u32 api_context,
+                                   stream_session_t * s, u8 is_fail)
 {
   tclient_main_t *tm = &tclient_main;
   session_t *session;
   u32 session_index;
-  u64 key;
-  i32 retval = /* clib_net_to_host_u32 ( */ mp->retval /*) */ ;
+  u8 thread_index = vlib_get_thread_index ();
 
-  if (retval < 0)
+  if (is_fail)
     {
-      clib_warning ("connection failed: retval %d", retval);
-      return;
+      clib_warning ("connection %d failed!", api_context);
+      signal_evt_to_cli (-1);
+      return 0;
     }
 
-  tm->our_event_queue = (unix_shared_memory_queue_t *)
-    mp->vpp_event_queue_address;
+  ASSERT (s->thread_index == thread_index);
 
-  tm->vpp_event_queue = (unix_shared_memory_queue_t *)
-    mp->vpp_event_queue_address;
+  if (!tm->vpp_event_queue[thread_index])
+    tm->vpp_event_queue[thread_index] =
+      session_manager_get_vpp_event_queue (thread_index);
 
   /*
    * Setup session
    */
+  clib_spinlock_lock_if_init (&tm->sessions_lock);
   pool_get (tm->sessions, session);
+  clib_spinlock_unlock_if_init (&tm->sessions_lock);
+
   memset (session, 0, sizeof (*session));
   session_index = session - tm->sessions;
-  session->bytes_to_receive = session->bytes_to_send = tm->bytes_to_send;
-
-  session->server_rx_fifo = (svm_fifo_t *) mp->server_rx_fifo;
+  session->bytes_to_send = tm->bytes_to_send;
+  session->bytes_to_receive = tm->no_return ? 0ULL : tm->bytes_to_send;
+  session->server_rx_fifo = s->server_rx_fifo;
   session->server_rx_fifo->client_session_index = session_index;
-  session->server_tx_fifo = (svm_fifo_t *) mp->server_tx_fifo;
+  session->server_tx_fifo = s->server_tx_fifo;
   session->server_tx_fifo->client_session_index = session_index;
+  session->vpp_session_handle = session_handle (s);
 
-  session->vpp_session_index = mp->session_index;
-  session->vpp_session_thread = mp->session_thread_index;
-
-  /* Add it to the session lookup table */
-  key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index;
-  hash_set (tm->session_index_by_vpp_handles, key, session_index);
+  vec_add1 (tm->connection_index_by_thread[thread_index], session_index);
+  __sync_fetch_and_add (&tm->ready_connections, 1);
+  if (tm->ready_connections == tm->expected_connections)
+    {
+      tm->run_test = 1;
+      /* Signal the CLI process that the action is starting... */
+      signal_evt_to_cli (1);
+    }
 
-  tm->ready_connections++;
+  return 0;
 }
 
 static void
-create_api_loopback (tclient_main_t * tm)
+builtin_session_reset_callback (stream_session_t * s)
 {
-  vl_api_memclnt_create_t _m, *mp = &_m;
-  extern void vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t *);
-  api_main_t *am = &api_main;
-  vl_shmem_hdr_t *shmem_hdr;
+  if (s->session_state == SESSION_STATE_READY)
+    clib_warning ("Reset active connection %U", format_stream_session, s, 2);
+  stream_session_cleanup (s);
+  return;
+}
 
-  /*
-   * Create a "loopback" API client connection
-   * Don't do things like this unless you know what you're doing...
-   */
+static int
+builtin_session_create_callback (stream_session_t * s)
+{
+  return 0;
+}
 
-  shmem_hdr = am->shmem_hdr;
-  tm->vl_input_queue = shmem_hdr->vl_input_queue;
-  memset (mp, 0, sizeof (*mp));
-  mp->_vl_msg_id = VL_API_MEMCLNT_CREATE;
-  mp->context = 0xFEEDFACE;
-  mp->input_queue = (u64) tm->vl_input_queue;
-  strncpy ((char *) mp->name, "tcp_tester", sizeof (mp->name) - 1);
+static void
+builtin_session_disconnect_callback (stream_session_t * s)
+{
+  tclient_main_t *tm = &tclient_main;
+  vnet_disconnect_args_t _a, *a = &_a;
+  a->handle = session_handle (s);
+  a->app_index = tm->app_index;
+  vnet_disconnect_session (a);
+  return;
+}
 
-  vl_api_memclnt_create_t_handler (mp);
+static int
+builtin_server_rx_callback (stream_session_t * s)
+{
+  return 0;
 }
 
-#define foreach_tclient_static_api_msg         \
-_(MEMCLNT_CREATE_REPLY, memclnt_create_reply)   \
-_(CONNECT_URI_REPLY, connect_uri_reply)
+/* *INDENT-OFF* */
+static session_cb_vft_t builtin_clients = {
+  .session_reset_callback = builtin_session_reset_callback,
+  .session_connected_callback = builtin_session_connected_callback,
+  .session_accept_callback = builtin_session_create_callback,
+  .session_disconnect_callback = builtin_session_disconnect_callback,
+  .builtin_server_rx_callback = builtin_server_rx_callback
+};
+/* *INDENT-ON* */
 
 static clib_error_t *
-tclient_api_hookup (vlib_main_t * vm)
+attach_builtin_test_clients_app (u8 * appns_id, u64 appns_flags,
+                                u64 appns_secret)
 {
   tclient_main_t *tm = &tclient_main;
-  vl_msg_api_msg_config_t _c, *c = &_c;
-  int i;
+  vnet_app_attach_args_t _a, *a = &_a;
+  u8 segment_name[128];
+  u32 segment_name_length, prealloc_fifos;
+  u64 options[16];
+  clib_error_t *error = 0;
+
+  segment_name_length = ARRAY_LEN (segment_name);
+
+  memset (a, 0, sizeof (*a));
+  memset (options, 0, sizeof (options));
+
+  a->api_client_index = tm->my_client_index;
+  a->segment_name = segment_name;
+  a->segment_name_length = segment_name_length;
+  a->session_cb_vft = &builtin_clients;
+
+  prealloc_fifos = tm->prealloc_fifos ? tm->expected_connections : 1;
+
+  options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
+  options[SESSION_OPTIONS_SEGMENT_SIZE] = (2ULL << 32);
+  options[SESSION_OPTIONS_RX_FIFO_SIZE] = tm->fifo_size;
+  options[SESSION_OPTIONS_TX_FIFO_SIZE] = tm->fifo_size;
+  options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = tm->private_segment_count;
+  options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = tm->private_segment_size;
+  options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = prealloc_fifos;
+
+  options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+  if (appns_id)
+    {
+      options[APP_OPTIONS_FLAGS] |= appns_flags;
+      options[APP_OPTIONS_NAMESPACE_SECRET] = appns_secret;
+    }
+  a->options = options;
+  a->namespace_id = appns_id;
 
-  /* Init test data */
-  vec_validate (tm->connect_test_data, 64 * 1024 - 1);
-  for (i = 0; i < vec_len (tm->connect_test_data); i++)
-    tm->connect_test_data[i] = i & 0xff;
+  if ((error = vnet_application_attach (a)))
+    return error;
 
-  tm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
-  vec_validate (tm->rx_buf, vec_len (tm->connect_test_data) - 1);
-
-  /* Hook up client-side static APIs to our handlers */
-#define _(N,n) do {                                             \
-    c->id = VL_API_##N;                                         \
-    c->name = #n;                                               \
-    c->handler = vl_api_##n##_t_handler;                        \
-    c->cleanup = vl_noop_handler;                               \
-    c->endian = vl_api_##n##_t_endian;                          \
-    c->print = vl_api_##n##_t_print;                            \
-    c->size = sizeof(vl_api_##n##_t);                           \
-    c->traced = 1; /* trace, so these msgs print */             \
-    c->replay = 0; /* don't replay client create/delete msgs */ \
-    c->message_bounce = 0; /* don't bounce this message */     \
-    vl_msg_api_config(c);} while (0);
-
-  foreach_tclient_static_api_msg;
-#undef _
+  tm->app_index = a->app_index;
+  return 0;
+}
 
+static void *
+tclient_thread_fn (void *arg)
+{
   return 0;
 }
 
-VLIB_API_INIT_FUNCTION (tclient_api_hookup);
+/** Start a transmit thread */
+int
+start_tx_pthread (tclient_main_t * tm)
+{
+  if (tm->client_thread_handle == 0)
+    {
+      int rv = pthread_create (&tm->client_thread_handle,
+                              NULL /*attr */ ,
+                              tclient_thread_fn, 0);
+      if (rv)
+       {
+         tm->client_thread_handle = 0;
+         return -1;
+       }
+    }
+  return 0;
+}
+
+clib_error_t *
+clients_connect (vlib_main_t * vm, u8 * uri, u32 n_clients)
+{
+  tclient_main_t *tm = &tclient_main;
+  vnet_connect_args_t _a, *a = &_a;
+  clib_error_t *error = 0;
+  int i;
+  for (i = 0; i < n_clients; i++)
+    {
+      memset (a, 0, sizeof (*a));
+
+      a->uri = (char *) uri;
+      a->api_context = i;
+      a->app_index = tm->app_index;
+      a->mp = 0;
+
+      if ((error = vnet_connect_uri (a)))
+       return error;
+
+
+      /* Crude pacing for call setups  */
+      if ((i % 4) == 0)
+       vlib_process_suspend (vm, 10e-6);
+      ASSERT (i + 1 >= tm->ready_connections);
+      while (i + 1 - tm->ready_connections > 1000)
+       {
+         vlib_process_suspend (vm, 100e-6);
+       }
+    }
+  return 0;
+}
 
 static clib_error_t *
 test_tcp_clients_command_fn (vlib_main_t * vm,
                             unformat_input_t * input,
                             vlib_cli_command_t * cmd)
 {
-  u8 *connect_uri = (u8 *) "tcp://6.0.1.1/1234";
-  u8 *uri;
   tclient_main_t *tm = &tclient_main;
-  int i;
+  vlib_thread_main_t *thread_main = vlib_get_thread_main ();
+  uword *event_data = 0, event_type;
+  u8 *default_connect_uri = (u8 *) "tcp://6.0.1.1/1234", *uri, *appns_id;
+  u64 tmp, total_bytes, appns_flags = 0, appns_secret = 0;
+  f64 test_timeout = 20.0, syn_timeout = 20.0, delta;
+  f64 time_before_connects;
   u32 n_clients = 1;
+  int preallocate_sessions = 0;
+  char *transfer_type;
+  clib_error_t *error = 0;
+  int i;
 
   tm->bytes_to_send = 8192;
-  tm->n_iterations = 1;
+  tm->no_return = 0;
+  tm->fifo_size = 64 << 10;
+  tm->connections_per_batch = 1000;
+  tm->private_segment_count = 0;
+  tm->private_segment_size = 0;
+  tm->vlib_main = vm;
+  if (thread_main->n_vlib_mains > 1)
+    clib_spinlock_init (&tm->sessions_lock);
   vec_free (tm->connect_uri);
 
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
       if (unformat (input, "nclients %d", &n_clients))
        ;
-      else if (unformat (input, "iterations %d", &tm->n_iterations))
-       ;
+      else if (unformat (input, "mbytes %lld", &tmp))
+       tm->bytes_to_send = tmp << 20;
+      else if (unformat (input, "gbytes %lld", &tmp))
+       tm->bytes_to_send = tmp << 30;
       else if (unformat (input, "bytes %lld", &tm->bytes_to_send))
        ;
       else if (unformat (input, "uri %s", &tm->connect_uri))
        ;
+      else if (unformat (input, "test-timeout %f", &test_timeout))
+       ;
+      else if (unformat (input, "syn-timeout %f", &syn_timeout))
+       ;
+      else if (unformat (input, "no-return"))
+       tm->no_return = 1;
+      else if (unformat (input, "fifo-size %d", &tm->fifo_size))
+       tm->fifo_size <<= 10;
+      else if (unformat (input, "private-segment-count %d",
+                        &tm->private_segment_count))
+       ;
+      else if (unformat (input, "private-segment-size %U",
+                        unformat_memory_size, &tmp))
+       {
+         if (tmp >= 0x100000000ULL)
+           return clib_error_return
+             (0, "private segment size %lld (%llu) too large", tmp, tmp);
+         tm->private_segment_size = tmp;
+       }
+      else if (unformat (input, "preallocate-fifos"))
+       tm->prealloc_fifos = 1;
+      else if (unformat (input, "preallocate-sessions"))
+       preallocate_sessions = 1;
+      else
+       if (unformat (input, "client-batch %d", &tm->connections_per_batch))
+       ;
+      else if (unformat (input, "appns %_%v%_", &appns_id))
+       ;
+      else if (unformat (input, "all-scope"))
+       appns_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE
+                       | APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE);
+      else if (unformat (input, "local-scope"))
+       appns_flags = APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE;
+      else if (unformat (input, "global-scope"))
+       appns_flags = APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
+      else if (unformat (input, "secret %lu", &appns_secret))
+       ;
       else
        return clib_error_return (0, "unknown input `%U'",
                                  format_unformat_error, input);
     }
 
+  /* Store cli process node index for signalling */
+  tm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+
+  if (tm->is_init == 0)
+    {
+      if (tcp_test_clients_init (vm))
+       return clib_error_return (0, "failed init");
+    }
+
+
   tm->ready_connections = 0;
   tm->expected_connections = n_clients;
-  uri = connect_uri;
+  tm->rx_total = 0;
+  tm->tx_total = 0;
+
+  uri = default_connect_uri;
   if (tm->connect_uri)
     uri = tm->connect_uri;
 
-  create_api_loopback (tm);
-
 #if TCP_BUILTIN_CLIENT_PTHREAD
-  /* Start a transmit thread */
-  if (tm->client_thread_handle == 0)
+  start_tx_pthread ();
+#endif
+
+  vlib_worker_thread_barrier_sync (vm);
+  vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+  vlib_worker_thread_barrier_release (vm);
+
+  if (tm->test_client_attached == 0)
     {
-      int rv = pthread_create (&tm->client_thread_handle,
-                              NULL /*attr */ ,
-                              tclient_thread_fn, 0);
-      if (rv)
+      if ((error = attach_builtin_test_clients_app (appns_id, appns_flags,
+                                                   appns_secret)))
        {
-         tm->client_thread_handle = 0;
-         return clib_error_return (0, "pthread_create returned %d", rv);
+         vec_free (appns_id);
+         clib_error_report (error);
+         return error;
        }
+      vec_free (appns_id);
     }
-#endif
-  vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+  tm->test_client_attached = 1;
 
-  /* Fire off connect requests, in something approaching a normal manner */
-  for (i = 0; i < n_clients; i++)
+  /* Turn on the builtin client input nodes */
+  for (i = 0; i < thread_main->n_vlib_mains; i++)
+    vlib_node_set_state (vlib_mains[i], builtin_client_node.index,
+                        VLIB_NODE_STATE_POLLING);
+
+  if (preallocate_sessions)
+    {
+      session_t *sp __attribute__ ((unused));
+      for (i = 0; i < n_clients; i++)
+       pool_get (tm->sessions, sp);
+      for (i = 0; i < n_clients; i++)
+       pool_put_index (tm->sessions, i);
+    }
+
+  /* Fire off connect requests */
+  time_before_connects = vlib_time_now (vm);
+  if ((error = clients_connect (vm, uri, n_clients)))
+    return error;
+
+  /* Park until the sessions come up, or ten seconds elapse... */
+  vlib_process_wait_for_event_or_clock (vm, syn_timeout);
+  event_type = vlib_process_get_events (vm, &event_data);
+  switch (event_type)
+    {
+    case ~0:
+      vlib_cli_output (vm, "Timeout with only %d sessions active...",
+                      tm->ready_connections);
+      goto cleanup;
+
+    case 1:
+      delta = vlib_time_now (vm) - time_before_connects;
+
+      if (delta != 0.0)
+       {
+         vlib_cli_output
+           (vm, "%d three-way handshakes in %.2f seconds, %.2f/sec",
+            n_clients, delta, ((f64) n_clients) / delta);
+       }
+
+      tm->test_start_time = vlib_time_now (tm->vlib_main);
+      vlib_cli_output (vm, "Test started at %.6f", tm->test_start_time);
+      break;
+
+    default:
+      vlib_cli_output (vm, "unexpected event(1): %d", event_type);
+      goto cleanup;
+    }
+
+  /* Now wait for the sessions to finish... */
+  vlib_process_wait_for_event_or_clock (vm, test_timeout);
+  event_type = vlib_process_get_events (vm, &event_data);
+  switch (event_type)
+    {
+    case ~0:
+      vlib_cli_output (vm, "Timeout with %d sessions still active...",
+                      tm->ready_connections);
+      goto cleanup;
+
+    case 2:
+      tm->test_end_time = vlib_time_now (vm);
+      vlib_cli_output (vm, "Test finished at %.6f", tm->test_end_time);
+      break;
+
+    default:
+      vlib_cli_output (vm, "unexpected event(2): %d", event_type);
+      goto cleanup;
+    }
+
+  delta = tm->test_end_time - tm->test_start_time;
+
+  if (delta != 0.0)
     {
-      vl_api_connect_uri_t *cmp;
-      cmp = vl_msg_api_alloc_as_if_client (sizeof (*cmp));
-      memset (cmp, 0, sizeof (*cmp));
-
-      cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI);
-      cmp->client_index = tm->my_client_index;
-      cmp->context = ntohl (0xfeedface);
-      memcpy (cmp->uri, uri, strlen ((char *) uri) + 1);
-      vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & cmp);
+      total_bytes = (tm->no_return ? tm->tx_total : tm->rx_total);
+      transfer_type = tm->no_return ? "half-duplex" : "full-duplex";
+      vlib_cli_output (vm,
+                      "%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds",
+                      total_bytes, total_bytes / (1ULL << 20),
+                      total_bytes / (1ULL << 30), delta);
+      vlib_cli_output (vm, "%.2f bytes/second %s",
+                      ((f64) total_bytes) / (delta), transfer_type);
+      vlib_cli_output (vm, "%.4f gbit/second %s",
+                      (((f64) total_bytes * 8.0) / delta / 1e9),
+                      transfer_type);
     }
+  else
+    vlib_cli_output (vm, "zero delta-t?");
+
+cleanup:
+  tm->run_test = 0;
+  for (i = 0; i < vec_len (tm->connection_index_by_thread); i++)
+    {
+      vec_reset_length (tm->connection_index_by_thread[i]);
+      vec_reset_length (tm->connections_this_batch_by_thread[i]);
+    }
+
+  pool_free (tm->sessions);
+
+  /* Detach the application, so we can use different fifo sizes next time */
+  if (tm->test_client_attached)
+    {
+      vnet_app_detach_args_t _da, *da = &_da;
+      int rv;
 
-  tm->run_test = 1;
+      da->app_index = tm->app_index;
 
+      rv = vnet_application_detach (da);
+      if (rv)
+       vlib_cli_output (vm, "WARNING: app detach failed...");
+      tm->test_client_attached = 0;
+      tm->app_index = ~0;
+    }
   return 0;
 }
 
-#if TCP_BUILTIN_CLIENT_VPP_THREAD
-/* *INDENT-OFF* */
-VLIB_REGISTER_THREAD (builtin_client_reg, static) = {
-  .name = "tcp-builtin-client",
-  .function = tclient_thread_fn,
-  .fixed_count = 1,
-  .count = 1,
-  .no_data_structure_clone = 1,
-};
-/* *INDENT-ON* */
-#endif
-
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (test_clients_command, static) =
 {
   .path = "test tcp clients",
-  .short_help = "test tcp clients [nclients %d] [iterations %d] [bytes %d] [uri tcp://1.2.3.4/1234]",
+  .short_help = "test tcp clients [nclients %d] [[m|g]bytes <bytes>] "
+      "[test-timeout <time>][syn-timeout <time>][no-return][fifo-size <size>]"
+      "[private-segment-count <count>][private-segment-size <bytes>[m|g]]"
+      "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]"
+      "[uri <tcp://ip/port>]",
   .function = test_tcp_clients_command_fn,
+  .is_mp_safe = 1,
 };
 /* *INDENT-ON* */
 
+clib_error_t *
+tcp_test_clients_main_init (vlib_main_t * vm)
+{
+  tclient_main_t *tm = &tclient_main;
+  tm->is_init = 0;
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (tcp_test_clients_main_init);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *