Add extern to *_main global variable declarations in header files.
[vpp.git] / src / vnet / tcp / builtin_client.c
index 7238cda..76c5c12 100644 (file)
 
 #include <vlibapi/api.h>
 #include <vlibmemory/api.h>
-#include <vlibsocket/api.h>
 #include <vpp/app/version.h>
 
-/* define message IDs */
-#include <vpp/api/vpe_msg_enum.h>
+tclient_main_t tclient_main;
 
-/* define message structures */
-#define vl_typedefs
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
+#define TCP_BUILTIN_CLIENT_DBG (0)
 
-/* define generated endian-swappers */
-#define vl_endianfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_endianfun
-
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define vl_printfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_printfun
+static void
+signal_evt_to_cli_i (int *code)
+{
+  tclient_main_t *tm = &tclient_main;
+  ASSERT (vlib_get_thread_index () == 0);
+  vlib_process_signal_event (tm->vlib_main, tm->cli_node_index, *code, 0);
+}
 
-#define TCP_BUILTIN_CLIENT_DBG (1)
+static void
+signal_evt_to_cli (int code)
+{
+  if (vlib_get_thread_index () != 0)
+    vl_api_rpc_call_main_thread (signal_evt_to_cli_i, (u8 *) & code,
+                                sizeof (code));
+  else
+    signal_evt_to_cli_i (&code);
+}
 
 static void
 send_test_chunk (tclient_main_t * tm, session_t * s)
@@ -52,7 +52,7 @@ send_test_chunk (tclient_main_t * tm, session_t * s)
   int test_buf_offset;
   u32 bytes_this_chunk;
   session_fifo_event_t evt;
-  static int serial_number = 0;
+  svm_fifo_t *txf;
   int rv;
 
   ASSERT (vec_len (test_data) > 0);
@@ -63,7 +63,8 @@ send_test_chunk (tclient_main_t * tm, session_t * s)
   bytes_this_chunk = bytes_this_chunk < s->bytes_to_send
     ? bytes_this_chunk : s->bytes_to_send;
 
-  rv = svm_fifo_enqueue_nowait (s->server_tx_fifo, bytes_this_chunk,
+  txf = s->server_tx_fifo;
+  rv = svm_fifo_enqueue_nowait (txf, bytes_this_chunk,
                                test_data + test_buf_offset);
 
   /* If we managed to enqueue data... */
@@ -92,16 +93,17 @@ send_test_chunk (tclient_main_t * tm, session_t * s)
          ed->data[2] = s->bytes_to_send;
        }
 
-      /* Poke the TCP state machine */
-      if (svm_fifo_set_event (s->server_tx_fifo))
+      /* Poke the session layer */
+      if (svm_fifo_set_event (txf))
        {
          /* Fabricate TX event, send to vpp */
-         evt.fifo = s->server_tx_fifo;
+         evt.fifo = txf;
          evt.event_type = FIFO_EVENT_APP_TX;
-         evt.event_id = serial_number++;
 
-         unix_shared_memory_queue_add (tm->vpp_event_queue, (u8 *) & evt,
-                                       0 /* do wait for mutex */ );
+         if (unix_shared_memory_queue_add
+             (tm->vpp_event_queue[txf->master_thread_index], (u8 *) & evt,
+              0 /* do wait for mutex */ ))
+           clib_warning ("could not enqueue event");
        }
     }
 }
@@ -111,14 +113,16 @@ receive_test_chunk (tclient_main_t * tm, session_t * s)
 {
   svm_fifo_t *rx_fifo = s->server_rx_fifo;
   int n_read, test_bytes = 0;
+  u32 my_thread_index = vlib_get_thread_index ();
 
   /* Allow enqueuing of new event */
   // svm_fifo_unset_event (rx_fifo);
 
   if (test_bytes)
     {
-      n_read = svm_fifo_dequeue_nowait (rx_fifo, vec_len (tm->rx_buf),
-                                       tm->rx_buf);
+      n_read = svm_fifo_dequeue_nowait (rx_fifo,
+                                       vec_len (tm->rx_buf[my_thread_index]),
+                                       tm->rx_buf[my_thread_index]);
     }
   else
     {
@@ -150,10 +154,12 @@ receive_test_chunk (tclient_main_t * tm, session_t * s)
          int i;
          for (i = 0; i < n_read; i++)
            {
-             if (tm->rx_buf[i] != ((s->bytes_received + i) & 0xff))
+             if (tm->rx_buf[my_thread_index][i]
+                 != ((s->bytes_received + i) & 0xff))
                {
                  clib_warning ("read %d error at byte %lld, 0x%x not 0x%x",
-                               n_read, s->bytes_received + i, tm->rx_buf[i],
+                               n_read, s->bytes_received + i,
+                               tm->rx_buf[my_thread_index][i],
                                ((s->bytes_received + i) & 0xff));
                }
            }
@@ -169,71 +175,106 @@ builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   tclient_main_t *tm = &tclient_main;
   int my_thread_index = vlib_get_thread_index ();
-  vl_api_disconnect_session_t *dmp;
   session_t *sp;
   int i;
   int delete_session;
   u32 *connection_indices;
-  u32 tx_quota = 0;
-  u32 delta, prev_bytes_received_this_session;
+  u32 *connections_this_batch;
+  u32 nconnections_this_batch;
 
   connection_indices = tm->connection_index_by_thread[my_thread_index];
+  connections_this_batch =
+    tm->connections_this_batch_by_thread[my_thread_index];
 
-  if (tm->run_test == 0 || vec_len (connection_indices) == 0)
+  if ((tm->run_test == 0) ||
+      ((vec_len (connection_indices) == 0)
+       && vec_len (connections_this_batch) == 0))
     return 0;
 
-  for (i = 0; i < vec_len (connection_indices); i++)
+  /* Grab another pile of connections */
+  if (PREDICT_FALSE (vec_len (connections_this_batch) == 0))
+    {
+      nconnections_this_batch =
+       clib_min (tm->connections_per_batch, vec_len (connection_indices));
+
+      ASSERT (nconnections_this_batch > 0);
+      vec_validate (connections_this_batch, nconnections_this_batch - 1);
+      clib_memcpy (connections_this_batch,
+                  connection_indices + vec_len (connection_indices)
+                  - nconnections_this_batch,
+                  nconnections_this_batch * sizeof (u32));
+      _vec_len (connection_indices) -= nconnections_this_batch;
+    }
+
+  if (PREDICT_FALSE (tm->prev_conns != tm->connections_per_batch
+                    && tm->prev_conns == vec_len (connections_this_batch)))
+    {
+      tm->repeats++;
+      tm->prev_conns = vec_len (connections_this_batch);
+      if (tm->repeats == 500000)
+       {
+         clib_warning ("stuck clients");
+       }
+    }
+  else
+    {
+      tm->prev_conns = vec_len (connections_this_batch);
+      tm->repeats = 0;
+    }
+
+  for (i = 0; i < vec_len (connections_this_batch); i++)
     {
       delete_session = 1;
 
-      sp = pool_elt_at_index (tm->sessions, connection_indices[i]);
+      sp = pool_elt_at_index (tm->sessions, connections_this_batch[i]);
 
-      if (tx_quota < 60 && sp->bytes_to_send > 0)
+      if (sp->bytes_to_send > 0)
        {
          send_test_chunk (tm, sp);
          delete_session = 0;
-         tx_quota++;
        }
       if (sp->bytes_to_receive > 0)
        {
-         prev_bytes_received_this_session = sp->bytes_received;
          receive_test_chunk (tm, sp);
-         delta = sp->bytes_received - prev_bytes_received_this_session;
-         if (delta > 0)
-           tx_quota--;
          delete_session = 0;
        }
       if (PREDICT_FALSE (delete_session == 1))
        {
+         u32 index, thread_index;
+         stream_session_t *s;
+
+         __sync_fetch_and_add (&tm->tx_total, sp->bytes_sent);
          __sync_fetch_and_add (&tm->rx_total, sp->bytes_received);
-         dmp = vl_msg_api_alloc_as_if_client (sizeof (*dmp));
-         memset (dmp, 0, sizeof (*dmp));
-         dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
-         dmp->client_index = tm->my_client_index;
-         dmp->handle = sp->vpp_session_handle;
-//        vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & dmp);
-         if (!unix_shared_memory_queue_add (tm->vl_input_queue, (u8 *) & dmp,
-                                            1))
+
+         session_parse_handle (sp->vpp_session_handle,
+                               &index, &thread_index);
+         s = session_get_if_valid (index, thread_index);
+
+         if (s)
            {
-             vec_delete (connection_indices, 1, i);
-             tm->connection_index_by_thread[my_thread_index] =
-               connection_indices;
+             vnet_disconnect_args_t _a, *a = &_a;
+             a->handle = session_handle (s);
+             a->app_index = tm->app_index;
+             vnet_disconnect_session (a);
+
+             vec_delete (connections_this_batch, 1, i);
+             i--;
              __sync_fetch_and_add (&tm->ready_connections, -1);
            }
          else
-           {
-             vl_msg_api_free (dmp);
-           }
+           clib_warning ("session AWOL?");
 
          /* Kick the debug CLI process */
          if (tm->ready_connections == 0)
            {
-             tm->test_end_time = vlib_time_now (vm);
-             vlib_process_signal_event (vm, tm->cli_node_index,
-                                        2, 0 /* data */ );
+             signal_evt_to_cli (2);
            }
        }
     }
+
+  tm->connection_index_by_thread[my_thread_index] = connection_indices;
+  tm->connections_this_batch_by_thread[my_thread_index] =
+    connections_this_batch;
   return 0;
 }
 
@@ -247,156 +288,16 @@ VLIB_REGISTER_NODE (builtin_client_node) =
 };
 /* *INDENT-ON* */
 
-
-/* So we don't get "no handler for... " msgs */
-static void
-vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
-{
-  vlib_main_t *vm = vlib_get_main ();
-  tclient_main_t *tm = &tclient_main;
-  tm->my_client_index = mp->index;
-  vlib_process_signal_event (vm, tm->node_index, 1 /* evt */ ,
-                            0 /* data */ );
-}
-
-static void
-vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
-{
-  tclient_main_t *tm = &tclient_main;
-  session_t *session;
-  u32 session_index;
-  i32 retval = /* clib_net_to_host_u32 ( */ mp->retval /*) */ ;
-  int i;
-
-  if (retval < 0)
-    {
-      clib_warning ("connection failed: retval %d", retval);
-      return;
-    }
-
-  tm->our_event_queue =
-    uword_to_pointer (mp->vpp_event_queue_address,
-                     unix_shared_memory_queue_t *);
-  tm->vpp_event_queue =
-    uword_to_pointer (mp->vpp_event_queue_address,
-                     unix_shared_memory_queue_t *);
-
-  /*
-   * Setup session
-   */
-  pool_get (tm->sessions, session);
-  memset (session, 0, sizeof (*session));
-  session_index = session - tm->sessions;
-  session->bytes_to_receive = session->bytes_to_send = tm->bytes_to_send;
-
-  session->server_rx_fifo =
-    uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
-  session->server_rx_fifo->client_session_index = session_index;
-  session->server_tx_fifo =
-    uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
-  session->server_tx_fifo->client_session_index = session_index;
-  session->vpp_session_handle = mp->handle;
-
-  /* Add it to the session lookup table */
-  hash_set (tm->session_index_by_vpp_handles, mp->handle, session_index);
-
-  if (tm->ready_connections == tm->expected_connections - 1)
-    {
-      vlib_thread_main_t *thread_main = vlib_get_thread_main ();
-      int thread_index;
-
-      thread_index = 0;
-      for (i = 0; i < pool_elts (tm->sessions); i++)
-       {
-         vec_add1 (tm->connection_index_by_thread[thread_index], i);
-         thread_index++;
-         if (thread_index == thread_main->n_vlib_mains)
-           thread_index = 0;
-       }
-    }
-  __sync_fetch_and_add (&tm->ready_connections, 1);
-  if (tm->ready_connections == tm->expected_connections)
-    {
-      tm->run_test = 1;
-      tm->test_start_time = vlib_time_now (tm->vlib_main);
-      /* Signal the CLI process that the action is starting... */
-      vlib_process_signal_event (tm->vlib_main, tm->cli_node_index,
-                                1, 0 /* data */ );
-    }
-}
-
 static int
 create_api_loopback (tclient_main_t * tm)
 {
-  vlib_main_t *vm = vlib_get_main ();
-  vl_api_memclnt_create_t _m, *mp = &_m;
-  extern void vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t *);
   api_main_t *am = &api_main;
   vl_shmem_hdr_t *shmem_hdr;
-  uword *event_data = 0, event_type;
-  int resolved = 0;
-
-  /*
-   * Create a "loopback" API client connection
-   * Don't do things like this unless you know what you're doing...
-   */
 
   shmem_hdr = am->shmem_hdr;
   tm->vl_input_queue = shmem_hdr->vl_input_queue;
-  memset (mp, 0, sizeof (*mp));
-  mp->_vl_msg_id = VL_API_MEMCLNT_CREATE;
-  mp->context = 0xFEEDFACE;
-  mp->input_queue = pointer_to_uword (tm->vl_input_queue);
-  strncpy ((char *) mp->name, "tcp_tester", sizeof (mp->name) - 1);
-
-  vl_api_memclnt_create_t_handler (mp);
-
-  /* Wait for reply */
-  tm->node_index = vlib_get_current_process (vm)->node_runtime.node_index;
-  vlib_process_wait_for_event_or_clock (vm, 1.0);
-  event_type = vlib_process_get_events (vm, &event_data);
-  switch (event_type)
-    {
-    case 1:
-      resolved = 1;
-      break;
-    case ~0:
-      /* timed out */
-      break;
-    default:
-      clib_warning ("unknown event_type %d", event_type);
-    }
-  if (!resolved)
-    return -1;
-  return 0;
-}
-
-#define foreach_tclient_static_api_msg         \
-_(MEMCLNT_CREATE_REPLY, memclnt_create_reply)   \
-_(CONNECT_URI_REPLY, connect_uri_reply)
-
-static clib_error_t *
-tclient_api_hookup (vlib_main_t * vm)
-{
-  vl_msg_api_msg_config_t _c, *c = &_c;
-
-  /* Hook up client-side static APIs to our handlers */
-#define _(N,n) do {                                             \
-    c->id = VL_API_##N;                                         \
-    c->name = #n;                                               \
-    c->handler = vl_api_##n##_t_handler;                        \
-    c->cleanup = vl_noop_handler;                               \
-    c->endian = vl_api_##n##_t_endian;                          \
-    c->print = vl_api_##n##_t_print;                            \
-    c->size = sizeof(vl_api_##n##_t);                           \
-    c->traced = 1; /* trace, so these msgs print */             \
-    c->replay = 0; /* don't replay client create/delete msgs */ \
-    c->message_bounce = 0; /* don't bounce this message */     \
-    vl_msg_api_config(c);} while (0);
-
-  foreach_tclient_static_api_msg;
-#undef _
-
+  tm->my_client_index =
+    vl_api_memclnt_create_internal ("tcp_test_client", tm->vl_input_queue);
   return 0;
 }
 
@@ -404,25 +305,30 @@ static int
 tcp_test_clients_init (vlib_main_t * vm)
 {
   tclient_main_t *tm = &tclient_main;
-  vlib_thread_main_t *thread_main = vlib_get_thread_main ();
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  u32 num_threads;
   int i;
 
-  tclient_api_hookup (vm);
   if (create_api_loopback (tm))
     return -1;
 
-  /* Init test data */
-  vec_validate (tm->connect_test_data, 64 * 1024 - 1);
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+
+  /* Init test data. Big buffer */
+  vec_validate (tm->connect_test_data, 1024 * 1024 - 1);
   for (i = 0; i < vec_len (tm->connect_test_data); i++)
     tm->connect_test_data[i] = i & 0xff;
 
-  tm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
-  vec_validate (tm->rx_buf, vec_len (tm->connect_test_data) - 1);
+  vec_validate (tm->rx_buf, num_threads - 1);
+  for (i = 0; i < num_threads; i++)
+    vec_validate (tm->rx_buf[i], vec_len (tm->connect_test_data) - 1);
 
   tm->is_init = 1;
-  tm->vlib_main = vm;
 
-  vec_validate (tm->connection_index_by_thread, thread_main->n_vlib_mains);
+  vec_validate (tm->connection_index_by_thread, vtm->n_vlib_mains);
+  vec_validate (tm->connections_this_batch_by_thread, vtm->n_vlib_mains);
+  vec_validate (tm->vpp_event_queue, vtm->n_vlib_mains);
+
   return 0;
 }
 
@@ -430,43 +336,59 @@ static int
 builtin_session_connected_callback (u32 app_index, u32 api_context,
                                    stream_session_t * s, u8 is_fail)
 {
-  vl_api_connect_uri_reply_t _m, *mp = &_m;
-  unix_shared_memory_queue_t *q;
-  application_t *app;
-  unix_shared_memory_queue_t *vpp_queue;
-
-  app = application_get (app_index);
-  q = vl_api_client_index_to_input_queue (app->api_client_index);
-
-  if (!q)
-    return -1;
+  tclient_main_t *tm = &tclient_main;
+  session_t *session;
+  u32 session_index;
+  u8 thread_index = vlib_get_thread_index ();
 
-  memset (mp, 0, sizeof (*mp));
-  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_URI_REPLY);
-  mp->context = api_context;
-  if (!is_fail)
+  if (is_fail)
     {
-      vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
-      mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo);
-      mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo);
-      mp->handle = stream_session_handle (s);
-      mp->vpp_event_queue_address = pointer_to_uword (vpp_queue);
-      mp->retval = 0;
-      s->session_state = SESSION_STATE_READY;
+      clib_warning ("connection %d failed!", api_context);
+      signal_evt_to_cli (-1);
+      return 0;
     }
-  else
+
+  ASSERT (s->thread_index == thread_index);
+
+  if (!tm->vpp_event_queue[thread_index])
+    tm->vpp_event_queue[thread_index] =
+      session_manager_get_vpp_event_queue (thread_index);
+
+  /*
+   * Setup session
+   */
+  clib_spinlock_lock_if_init (&tm->sessions_lock);
+  pool_get (tm->sessions, session);
+  clib_spinlock_unlock_if_init (&tm->sessions_lock);
+
+  memset (session, 0, sizeof (*session));
+  session_index = session - tm->sessions;
+  session->bytes_to_send = tm->bytes_to_send;
+  session->bytes_to_receive = tm->no_return ? 0ULL : tm->bytes_to_send;
+  session->server_rx_fifo = s->server_rx_fifo;
+  session->server_rx_fifo->client_session_index = session_index;
+  session->server_tx_fifo = s->server_tx_fifo;
+  session->server_tx_fifo->client_session_index = session_index;
+  session->vpp_session_handle = session_handle (s);
+
+  vec_add1 (tm->connection_index_by_thread[thread_index], session_index);
+  __sync_fetch_and_add (&tm->ready_connections, 1);
+  if (tm->ready_connections == tm->expected_connections)
     {
-      mp->retval = clib_host_to_net_u32 (VNET_API_ERROR_SESSION_CONNECT_FAIL);
+      tm->run_test = 1;
+      /* Signal the CLI process that the action is starting... */
+      signal_evt_to_cli (1);
     }
 
-  vl_api_connect_uri_reply_t_handler (mp);
-
   return 0;
 }
 
 static void
 builtin_session_reset_callback (stream_session_t * s)
 {
+  if (s->session_state == SESSION_STATE_READY)
+    clib_warning ("Reset active connection %U", format_stream_session, s, 2);
+  stream_session_cleanup (s);
   return;
 }
 
@@ -479,6 +401,11 @@ builtin_session_create_callback (stream_session_t * s)
 static void
 builtin_session_disconnect_callback (stream_session_t * s)
 {
+  tclient_main_t *tm = &tclient_main;
+  vnet_disconnect_args_t _a, *a = &_a;
+  a->handle = session_handle (s);
+  a->app_index = tm->app_index;
+  vnet_disconnect_session (a);
   return;
 }
 
@@ -489,24 +416,25 @@ builtin_server_rx_callback (stream_session_t * s)
 }
 
 /* *INDENT-OFF* */
-static session_cb_vft_t builtin_clients =
-  {
-    .session_reset_callback = builtin_session_reset_callback,
-    .session_connected_callback = builtin_session_connected_callback,
-    .session_accept_callback = builtin_session_create_callback,
-    .session_disconnect_callback = builtin_session_disconnect_callback,
-    .builtin_server_rx_callback = builtin_server_rx_callback
-  };
+static session_cb_vft_t builtin_clients = {
+  .session_reset_callback = builtin_session_reset_callback,
+  .session_connected_callback = builtin_session_connected_callback,
+  .session_accept_callback = builtin_session_create_callback,
+  .session_disconnect_callback = builtin_session_disconnect_callback,
+  .builtin_server_rx_callback = builtin_server_rx_callback
+};
 /* *INDENT-ON* */
 
-static int
-attach_builtin_test_clients ()
+static clib_error_t *
+attach_builtin_test_clients_app (u8 * appns_id, u64 appns_flags,
+                                u64 appns_secret)
 {
   tclient_main_t *tm = &tclient_main;
   vnet_app_attach_args_t _a, *a = &_a;
   u8 segment_name[128];
-  u32 segment_name_length;
+  u32 segment_name_length, prealloc_fifos;
   u64 options[16];
+  clib_error_t *error = 0;
 
   segment_name_length = ARRAY_LEN (segment_name);
 
@@ -518,13 +446,86 @@ attach_builtin_test_clients ()
   a->segment_name_length = segment_name_length;
   a->session_cb_vft = &builtin_clients;
 
+  prealloc_fifos = tm->prealloc_fifos ? tm->expected_connections : 1;
+
   options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
-  options[SESSION_OPTIONS_SEGMENT_SIZE] = (2 << 30);   /*$$$$ config / arg */
-  options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+  options[SESSION_OPTIONS_SEGMENT_SIZE] = (2ULL << 32);
+  options[SESSION_OPTIONS_RX_FIFO_SIZE] = tm->fifo_size;
+  options[SESSION_OPTIONS_TX_FIFO_SIZE] = tm->fifo_size;
+  options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = tm->private_segment_count;
+  options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = tm->private_segment_size;
+  options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = prealloc_fifos;
 
+  options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+  if (appns_id)
+    {
+      options[APP_OPTIONS_FLAGS] |= appns_flags;
+      options[APP_OPTIONS_NAMESPACE_SECRET] = appns_secret;
+    }
   a->options = options;
+  a->namespace_id = appns_id;
+
+  if ((error = vnet_application_attach (a)))
+    return error;
 
-  return vnet_application_attach (a);
+  tm->app_index = a->app_index;
+  return 0;
+}
+
+static void *
+tclient_thread_fn (void *arg)
+{
+  return 0;
+}
+
+/** Start a transmit thread */
+int
+start_tx_pthread (tclient_main_t * tm)
+{
+  if (tm->client_thread_handle == 0)
+    {
+      int rv = pthread_create (&tm->client_thread_handle,
+                              NULL /*attr */ ,
+                              tclient_thread_fn, 0);
+      if (rv)
+       {
+         tm->client_thread_handle = 0;
+         return -1;
+       }
+    }
+  return 0;
+}
+
+clib_error_t *
+clients_connect (vlib_main_t * vm, u8 * uri, u32 n_clients)
+{
+  tclient_main_t *tm = &tclient_main;
+  vnet_connect_args_t _a, *a = &_a;
+  clib_error_t *error = 0;
+  int i;
+  for (i = 0; i < n_clients; i++)
+    {
+      memset (a, 0, sizeof (*a));
+
+      a->uri = (char *) uri;
+      a->api_context = i;
+      a->app_index = tm->app_index;
+      a->mp = 0;
+
+      if ((error = vnet_connect_uri (a)))
+       return error;
+
+
+      /* Crude pacing for call setups  */
+      if ((i % 4) == 0)
+       vlib_process_suspend (vm, 10e-6);
+      ASSERT (i + 1 >= tm->ready_connections);
+      while (i + 1 - tm->ready_connections > 1000)
+       {
+         vlib_process_suspend (vm, 100e-6);
+       }
+    }
+  return 0;
 }
 
 static clib_error_t *
@@ -534,17 +535,26 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
 {
   tclient_main_t *tm = &tclient_main;
   vlib_thread_main_t *thread_main = vlib_get_thread_main ();
-  uword *event_data = 0;
-  uword event_type;
-  u8 *connect_uri = (u8 *) "tcp://6.0.1.1/1234";
-  u8 *uri;
+  uword *event_data = 0, event_type;
+  u8 *default_connect_uri = (u8 *) "tcp://6.0.1.1/1234", *uri, *appns_id;
+  u64 tmp, total_bytes, appns_flags = 0, appns_secret = 0;
+  f64 test_timeout = 20.0, syn_timeout = 20.0, delta;
+  f64 time_before_connects;
   u32 n_clients = 1;
+  int preallocate_sessions = 0;
+  char *transfer_type;
+  clib_error_t *error = 0;
   int i;
-  u64 tmp;
-  f64 cli_timeout = 20.0;
-  f64 delta;
 
   tm->bytes_to_send = 8192;
+  tm->no_return = 0;
+  tm->fifo_size = 64 << 10;
+  tm->connections_per_batch = 1000;
+  tm->private_segment_count = 0;
+  tm->private_segment_size = 0;
+  tm->vlib_main = vm;
+  if (thread_main->n_vlib_mains > 1)
+    clib_spinlock_init (&tm->sessions_lock);
   vec_free (tm->connect_uri);
 
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -559,44 +569,86 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
        ;
       else if (unformat (input, "uri %s", &tm->connect_uri))
        ;
-      else if (unformat (input, "cli-timeout %f", &cli_timeout))
+      else if (unformat (input, "test-timeout %f", &test_timeout))
+       ;
+      else if (unformat (input, "syn-timeout %f", &syn_timeout))
+       ;
+      else if (unformat (input, "no-return"))
+       tm->no_return = 1;
+      else if (unformat (input, "fifo-size %d", &tm->fifo_size))
+       tm->fifo_size <<= 10;
+      else if (unformat (input, "private-segment-count %d",
+                        &tm->private_segment_count))
+       ;
+      else if (unformat (input, "private-segment-size %U",
+                        unformat_memory_size, &tmp))
+       {
+         if (tmp >= 0x100000000ULL)
+           return clib_error_return
+             (0, "private segment size %lld (%llu) too large", tmp, tmp);
+         tm->private_segment_size = tmp;
+       }
+      else if (unformat (input, "preallocate-fifos"))
+       tm->prealloc_fifos = 1;
+      else if (unformat (input, "preallocate-sessions"))
+       preallocate_sessions = 1;
+      else
+       if (unformat (input, "client-batch %d", &tm->connections_per_batch))
+       ;
+      else if (unformat (input, "appns %_%v%_", &appns_id))
+       ;
+      else if (unformat (input, "all-scope"))
+       appns_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE
+                       | APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE);
+      else if (unformat (input, "local-scope"))
+       appns_flags = APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE;
+      else if (unformat (input, "global-scope"))
+       appns_flags = APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
+      else if (unformat (input, "secret %lu", &appns_secret))
        ;
       else
        return clib_error_return (0, "unknown input `%U'",
                                  format_unformat_error, input);
     }
 
+  /* Store cli process node index for signalling */
+  tm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+
   if (tm->is_init == 0)
     {
       if (tcp_test_clients_init (vm))
        return clib_error_return (0, "failed init");
     }
 
+
   tm->ready_connections = 0;
   tm->expected_connections = n_clients;
   tm->rx_total = 0;
+  tm->tx_total = 0;
 
-  uri = connect_uri;
+  uri = default_connect_uri;
   if (tm->connect_uri)
     uri = tm->connect_uri;
 
 #if TCP_BUILTIN_CLIENT_PTHREAD
-  /* Start a transmit thread */
-  if (tm->client_thread_handle == 0)
+  start_tx_pthread ();
+#endif
+
+  vlib_worker_thread_barrier_sync (vm);
+  vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+  vlib_worker_thread_barrier_release (vm);
+
+  if (tm->test_client_attached == 0)
     {
-      int rv = pthread_create (&tm->client_thread_handle,
-                              NULL /*attr */ ,
-                              tclient_thread_fn, 0);
-      if (rv)
+      if ((error = attach_builtin_test_clients_app (appns_id, appns_flags,
+                                                   appns_secret)))
        {
-         tm->client_thread_handle = 0;
-         return clib_error_return (0, "pthread_create returned %d", rv);
+         vec_free (appns_id);
+         clib_error_report (error);
+         return error;
        }
+      vec_free (appns_id);
     }
-#endif
-  vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
-  if (tm->test_client_attached == 0)
-    attach_builtin_test_clients ();
   tm->test_client_attached = 1;
 
   /* Turn on the builtin client input nodes */
@@ -604,30 +656,23 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
     vlib_node_set_state (vlib_mains[i], builtin_client_node.index,
                         VLIB_NODE_STATE_POLLING);
 
-  tm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
-
-  /* Fire off connect requests */
-  for (i = 0; i < n_clients; i++)
+  if (preallocate_sessions)
     {
-      vl_api_connect_uri_t _cmp, *cmp = &_cmp;
-      void vl_api_connect_uri_t_handler (vl_api_connect_uri_t * cmp);
-
-      memset (cmp, 0, sizeof (*cmp));
-
-      cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI);
-      cmp->client_index = tm->my_client_index;
-      cmp->context = ntohl (0xfeedface);
-      memcpy (cmp->uri, uri, strlen ((char *) uri) + 1);
-
-      vl_api_connect_uri_t_handler (cmp);
-      /* Crude pacing for call setups, 100k/sec  */
-      vlib_process_suspend (vm, 10e-6);
+      session_t *sp __attribute__ ((unused));
+      for (i = 0; i < n_clients; i++)
+       pool_get (tm->sessions, sp);
+      for (i = 0; i < n_clients; i++)
+       pool_put_index (tm->sessions, i);
     }
 
+  /* Fire off connect requests */
+  time_before_connects = vlib_time_now (vm);
+  if ((error = clients_connect (vm, uri, n_clients)))
+    return error;
+
   /* Park until the sessions come up, or ten seconds elapse... */
-  vlib_process_wait_for_event_or_clock (vm, 10.0 /* timeout, seconds */ );
+  vlib_process_wait_for_event_or_clock (vm, syn_timeout);
   event_type = vlib_process_get_events (vm, &event_data);
-
   switch (event_type)
     {
     case ~0:
@@ -636,6 +681,16 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
       goto cleanup;
 
     case 1:
+      delta = vlib_time_now (vm) - time_before_connects;
+
+      if (delta != 0.0)
+       {
+         vlib_cli_output
+           (vm, "%d three-way handshakes in %.2f seconds, %.2f/sec",
+            n_clients, delta, ((f64) n_clients) / delta);
+       }
+
+      tm->test_start_time = vlib_time_now (tm->vlib_main);
       vlib_cli_output (vm, "Test started at %.6f", tm->test_start_time);
       break;
 
@@ -645,9 +700,8 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
     }
 
   /* Now wait for the sessions to finish... */
-  vlib_process_wait_for_event_or_clock (vm, cli_timeout);
+  vlib_process_wait_for_event_or_clock (vm, test_timeout);
   event_type = vlib_process_get_events (vm, &event_data);
-
   switch (event_type)
     {
     case ~0:
@@ -656,6 +710,7 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
       goto cleanup;
 
     case 2:
+      tm->test_end_time = vlib_time_now (vm);
       vlib_cli_output (vm, "Test finished at %.6f", tm->test_end_time);
       break;
 
@@ -668,23 +723,45 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
 
   if (delta != 0.0)
     {
+      total_bytes = (tm->no_return ? tm->tx_total : tm->rx_total);
+      transfer_type = tm->no_return ? "half-duplex" : "full-duplex";
       vlib_cli_output (vm,
                       "%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds",
-                      tm->rx_total, tm->rx_total / (1ULL << 20),
-                      tm->rx_total / (1ULL << 30), delta);
-      vlib_cli_output (vm, "%.2f bytes/second full-duplex",
-                      ((f64) tm->rx_total) / (delta));
-      vlib_cli_output (vm, "%.4f gbit/second full-duplex",
-                      (((f64) tm->rx_total * 8.0) / delta / 1e9));
+                      total_bytes, total_bytes / (1ULL << 20),
+                      total_bytes / (1ULL << 30), delta);
+      vlib_cli_output (vm, "%.2f bytes/second %s",
+                      ((f64) total_bytes) / (delta), transfer_type);
+      vlib_cli_output (vm, "%.4f gbit/second %s",
+                      (((f64) total_bytes * 8.0) / delta / 1e9),
+                      transfer_type);
     }
   else
     vlib_cli_output (vm, "zero delta-t?");
 
 cleanup:
-  pool_free (tm->sessions);
+  tm->run_test = 0;
   for (i = 0; i < vec_len (tm->connection_index_by_thread); i++)
-    vec_reset_length (tm->connection_index_by_thread[i]);
+    {
+      vec_reset_length (tm->connection_index_by_thread[i]);
+      vec_reset_length (tm->connections_this_batch_by_thread[i]);
+    }
 
+  pool_free (tm->sessions);
+
+  /* Detach the application, so we can use different fifo sizes next time */
+  if (tm->test_client_attached)
+    {
+      vnet_app_detach_args_t _da, *da = &_da;
+      int rv;
+
+      da->app_index = tm->app_index;
+
+      rv = vnet_application_detach (da);
+      if (rv)
+       vlib_cli_output (vm, "WARNING: app detach failed...");
+      tm->test_client_attached = 0;
+      tm->app_index = ~0;
+    }
   return 0;
 }
 
@@ -692,9 +769,13 @@ cleanup:
 VLIB_CLI_COMMAND (test_clients_command, static) =
 {
   .path = "test tcp clients",
-  .short_help = "test tcp clients [nclients %d]"
-  "[iterations %d] [bytes %d] [uri tcp://6.0.1.1/1234]",
+  .short_help = "test tcp clients [nclients %d] [[m|g]bytes <bytes>] "
+      "[test-timeout <time>][syn-timeout <time>][no-return][fifo-size <size>]"
+      "[private-segment-count <count>][private-segment-size <bytes>[m|g]]"
+      "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]"
+      "[uri <tcp://ip/port>]",
   .function = test_tcp_clients_command_fn,
+  .is_mp_safe = 1,
 };
 /* *INDENT-ON* */