session: first approximation implementation of tls
[vpp.git] / src / vnet / session / session_node.c
index cd52742..9cd0ef1 100644 (file)
 #include <math.h>
 #include <vlib/vlib.h>
 #include <vnet/vnet.h>
-#include <vnet/tcp/tcp.h>
 #include <vppinfra/elog.h>
+#include <vnet/session/transport.h>
 #include <vnet/session/application.h>
 #include <vnet/session/session_debug.h>
-#include <vlibmemory/unix_shared_memory_queue.h>
+#include <svm/queue.h>
 
 vlib_node_registration_t session_queue_node;
 
@@ -64,13 +64,6 @@ static char *session_queue_error_strings[] = {
 #undef _
 };
 
-static u32 session_type_to_next[] = {
-  SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT,
-  SESSION_QUEUE_NEXT_IP4_LOOKUP,
-  SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT,
-  SESSION_QUEUE_NEXT_IP6_LOOKUP,
-};
-
 always_inline void
 session_tx_fifo_chain_tail (session_manager_main_t * smm, vlib_main_t * vm,
                            u8 thread_index, svm_fifo_t * fifo,
@@ -143,6 +136,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 n_bufs_per_evt, n_frames_per_evt, n_bufs_per_frame;
   transport_connection_t *tc0;
   transport_proto_vft_t *transport_vft;
+  transport_proto_t tp;
   u32 next_index, next0, *to_next, n_left_to_next, bi0;
   vlib_buffer_t *b0;
   u32 tx_offset = 0, max_dequeue0, n_bytes_per_seg, left_for_seg;
@@ -152,9 +146,10 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 n_bytes_per_buf, deq_per_buf, deq_per_first_buf;
   u32 buffers_allocated, buffers_allocated_this_call;
 
-  next_index = next0 = session_type_to_next[s0->session_type];
+  next_index = next0 = smm->session_type_to_next[s0->session_type];
 
-  transport_vft = session_get_transport_vft (s0->session_type);
+  tp = session_get_transport_proto (s0);
+  transport_vft = transport_protocol_get_vft (tp);
   tc0 = transport_vft->get_connection (s0->connection_index, thread_index);
 
   /* Make sure we have space to send and there's something to dequeue */
@@ -168,21 +163,25 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
       return 0;
     }
 
+  /* Allow enqueuing of a new event */
+  svm_fifo_unset_event (s0->server_tx_fifo);
+
+  /* Check how much we can pull. */
+  max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo);
+
   if (peek_data)
     {
-      /* Offset in rx fifo from where to peek data  */
+      /* Offset in rx fifo from where to peek data */
       tx_offset = transport_vft->tx_fifo_offset (tc0);
+      if (PREDICT_FALSE (tx_offset >= max_dequeue0))
+       max_dequeue0 = 0;
+      else
+       max_dequeue0 -= tx_offset;
     }
 
-  /* Check how much we can pull. If buffering, subtract the offset */
-  max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo) - tx_offset;
-
   /* Nothing to read return */
   if (max_dequeue0 == 0)
-    {
-      svm_fifo_unset_event (s0->server_tx_fifo);
-      return 0;
-    }
+    return 0;
 
   /* Ensure we're not writing more than transport window allows */
   if (max_dequeue0 < snd_space0)
@@ -245,8 +244,6 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
            }
          ASSERT (n_bufs >= n_bufs_per_frame);
        }
-      /* Allow enqueuing of a new event */
-      svm_fifo_unset_event (s0->server_tx_fifo);
 
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
       while (left_to_snd0 && n_left_to_next)
@@ -267,7 +264,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
 
          b0 = vlib_get_buffer (vm, bi0);
          b0->error = 0;
-         b0->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
+         b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
          b0->current_data = 0;
          b0->total_length_not_including_first_buffer = 0;
 
@@ -277,6 +274,8 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
            {
              n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, tx_offset,
                                            len_to_deq0, data0);
+             if (n_bytes_read <= 0)
+               goto dequeue_fail;
              /* Keep track of progress locally, transport is also supposed to
               * increment it independently when pushing the header */
              tx_offset += n_bytes_read;
@@ -285,11 +284,10 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
            {
              n_bytes_read = svm_fifo_dequeue_nowait (s0->server_tx_fifo,
                                                      len_to_deq0, data0);
+             if (n_bytes_read <= 0)
+               goto dequeue_fail;
            }
 
-         if (n_bytes_read <= 0)
-           goto dequeue_fail;
-
          b0->current_length = n_bytes_read;
 
          left_to_snd0 -= n_bytes_read;
@@ -314,15 +312,15 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
 
          /* *INDENT-OFF* */
          SESSION_EVT_DBG(SESSION_EVT_DEQ, s0, ({
-             ed->data[0] = e0->event_id;
+             ed->data[0] = e0->event_type;
              ed->data[1] = max_dequeue0;
              ed->data[2] = len_to_deq0;
              ed->data[3] = left_to_snd0;
          }));
          /* *INDENT-ON* */
 
-
          VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
          if (PREDICT_FALSE (n_trace > 0))
            {
              session_queue_trace_t *t0;
@@ -391,12 +389,24 @@ session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node,
                                         n_tx_pkts, 0);
 }
 
+int
+session_tx_fifo_dequeue_internal (vlib_main_t * vm,
+                                 vlib_node_runtime_t * node,
+                                 session_manager_main_t * smm,
+                                 session_fifo_event_t * e0,
+                                 stream_session_t * s0, u32 thread_index,
+                                 int *n_tx_pkts)
+{
+  application_t *app;
+  app = application_get (s0->opaque);
+  svm_fifo_unset_event (s0->server_tx_fifo);
+  return app->cb_fns.builtin_app_tx_callback (s0);
+}
+
 always_inline stream_session_t *
 session_event_get_session (session_fifo_event_t * e, u8 thread_index)
 {
-  ASSERT (e->fifo->master_thread_index == thread_index);
-  return stream_session_get_if_valid (e->fifo->master_session_index,
-                                     thread_index);
+  return session_get_if_valid (e->fifo->master_session_index, thread_index);
 }
 
 void
@@ -410,7 +420,7 @@ dump_thread_0_event_queue (void)
   int i, index;
   i8 *headp;
 
-  unix_shared_memory_queue_t *q;
+  svm_queue_t *q;
   q = smm->vpp_event_queues[my_thread_index];
 
   index = q->head;
@@ -428,7 +438,7 @@ dump_thread_0_event_queue (void)
          break;
 
        case FIFO_EVENT_DISCONNECT:
-         s0 = stream_session_get_from_handle (e->session_handle);
+         s0 = session_get_from_handle (e->session_handle);
          fformat (stdout, "[%04d] disconnect session %d\n", i,
                   s0->session_index);
          break;
@@ -471,7 +481,7 @@ session_node_cmp_event (session_fifo_event_t * e, svm_fifo_t * f)
     case FIFO_EVENT_DISCONNECT:
       break;
     case FIFO_EVENT_RPC:
-      s = stream_session_get_from_handle (e->session_handle);
+      s = session_get_from_handle (e->session_handle);
       if (!s)
        {
          clib_warning ("session has event but doesn't exist!");
@@ -490,7 +500,7 @@ u8
 session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e)
 {
   session_manager_main_t *smm = vnet_get_session_manager_main ();
-  unix_shared_memory_queue_t *q;
+  svm_queue_t *q;
   session_fifo_event_t *pending_event_vector, *evt;
   int i, index, found = 0;
   i8 *headp;
@@ -509,7 +519,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e)
       clib_memcpy (e, headp, q->elsize);
       found = session_node_cmp_event (e, f);
       if (found)
-       break;
+       return 1;
       if (++index == q->maxsize)
        index = 0;
     }
@@ -534,10 +544,10 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
                       vlib_frame_t * frame)
 {
   session_manager_main_t *smm = vnet_get_session_manager_main ();
-  session_fifo_event_t *my_pending_event_vector, *e;
+  session_fifo_event_t *my_pending_event_vector, *pending_disconnects, *e;
   session_fifo_event_t *my_fifo_events;
   u32 n_to_dequeue, n_events;
-  unix_shared_memory_queue_t *q;
+  svm_queue_t *q;
   application_t *app;
   int n_tx_packets = 0;
   u32 my_thread_index = vm->thread_index;
@@ -548,9 +558,9 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   SESSION_EVT_DBG (SESSION_EVT_POLL_GAP_TRACK, smm, my_thread_index);
 
   /*
-   *  Update TCP time
+   *  Update transport time
    */
-  tcp_update_time (now, my_thread_index);
+  transport_update_time (now, my_thread_index);
 
   /*
    * Get vpp queue events
@@ -564,8 +574,10 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   /* min number of events we can dequeue without blocking */
   n_to_dequeue = q->cursize;
   my_pending_event_vector = smm->pending_event_vector[my_thread_index];
+  pending_disconnects = smm->pending_disconnects[my_thread_index];
 
-  if (n_to_dequeue == 0 && vec_len (my_pending_event_vector) == 0)
+  if (!n_to_dequeue && !vec_len (my_pending_event_vector)
+      && !vec_len (pending_disconnects))
     return 0;
 
   SESSION_EVT_DBG (SESSION_EVT_DEQ_NODE, 0);
@@ -588,7 +600,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   for (i = 0; i < n_to_dequeue; i++)
     {
       vec_add2 (my_fifo_events, e, 1);
-      unix_shared_memory_queue_sub_raw (q, (u8 *) e);
+      svm_queue_sub_raw (q, (u8 *) e);
     }
 
   /* The other side of the connection is not polling */
@@ -597,9 +609,11 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   pthread_mutex_unlock (&q->mutex);
 
   vec_append (my_fifo_events, my_pending_event_vector);
+  vec_append (my_fifo_events, smm->pending_disconnects[my_thread_index]);
 
   _vec_len (my_pending_event_vector) = 0;
   smm->pending_event_vector[my_thread_index] = my_pending_event_vector;
+  _vec_len (smm->pending_disconnects[my_thread_index]) = 0;
 
 skip_dequeue:
   n_events = vec_len (my_fifo_events);
@@ -615,7 +629,7 @@ skip_dequeue:
        case FIFO_EVENT_APP_TX:
          s0 = session_event_get_session (e0, my_thread_index);
 
-         if (CLIB_DEBUG && !s0)
+         if (PREDICT_FALSE (!s0))
            {
              clib_warning ("It's dead, Jim!");
              continue;
@@ -623,7 +637,10 @@ skip_dequeue:
          /* Can retransmit for closed sessions but can't do anything if
           * session is not ready or closed */
          if (PREDICT_FALSE (s0->session_state < SESSION_STATE_READY))
-           continue;
+           {
+             vec_add1 (smm->pending_event_vector[my_thread_index], *e0);
+             continue;
+           }
          /* Spray packets in per session type frames, since they go to
           * different nodes */
          rv = (smm->session_tx_fns[s0->session_type]) (vm, node, smm, e0, s0,
@@ -638,14 +655,23 @@ skip_dequeue:
            }
          break;
        case FIFO_EVENT_DISCONNECT:
-         s0 = stream_session_get_from_handle (e0->session_handle);
-         stream_session_disconnect (s0);
+         /* Make sure disconnects run after the pending list is drained */
+         if (!e0->postponed)
+           {
+             e0->postponed = 1;
+             vec_add1 (smm->pending_disconnects[my_thread_index], *e0);
+             continue;
+           }
+         s0 = session_get_from_handle (e0->session_handle);
+         stream_session_disconnect_transport (s0);
          break;
        case FIFO_EVENT_BUILTIN_RX:
          s0 = session_event_get_session (e0, my_thread_index);
+         if (PREDICT_FALSE (!s0))
+           continue;
          svm_fifo_unset_event (s0->server_rx_fifo);
          app = application_get (s0->app_index);
-         app->cb_fns.builtin_server_rx_callback (s0);
+         app->cb_fns.builtin_app_rx_callback (s0);
          break;
        case FIFO_EVENT_RPC:
          fp = e0->rpc_args.fp;
@@ -677,19 +703,29 @@ VLIB_REGISTER_NODE (session_queue_node) =
   .type = VLIB_NODE_TYPE_INPUT,
   .n_errors = ARRAY_LEN (session_queue_error_strings),
   .error_strings = session_queue_error_strings,
-  .n_next_nodes = SESSION_QUEUE_N_NEXT,
   .state = VLIB_NODE_STATE_DISABLED,
-  .next_nodes =
-  {
-      [SESSION_QUEUE_NEXT_DROP] = "error-drop",
-      [SESSION_QUEUE_NEXT_IP4_LOOKUP] = "ip4-lookup",
-      [SESSION_QUEUE_NEXT_IP6_LOOKUP] = "ip6-lookup",
-      [SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT] = "tcp4-output",
-      [SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT] = "tcp6-output",
-  },
 };
 /* *INDENT-ON* */
 
+static clib_error_t *
+session_queue_exit (vlib_main_t * vm)
+{
+  if (vec_len (vlib_mains) < 2)
+    return 0;
+
+  /*
+   * Shut off (especially) worker-thread session nodes.
+   * Otherwise, vpp can crash as the main thread unmaps the
+   * API segment.
+   */
+  vlib_worker_thread_barrier_sync (vm);
+  session_node_enable_disable (0 /* is_enable */ );
+  vlib_worker_thread_barrier_release (vm);
+  return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (session_queue_exit);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *