session: add support for application namespacing
[vpp.git] / src / vnet / session / session_node.c
index 8d703b0..d2291fa 100644 (file)
@@ -75,20 +75,25 @@ always_inline void
 session_tx_fifo_chain_tail (session_manager_main_t * smm, vlib_main_t * vm,
                            u8 thread_index, svm_fifo_t * fifo,
                            vlib_buffer_t * b0, u32 bi0, u8 n_bufs_per_seg,
-                           u32 * left_to_snd0, u16 * n_bufs, u32 * rx_offset,
-                           u16 deq_per_buf, u8 peek_data)
+                           u32 left_from_seg, u32 * left_to_snd0,
+                           u16 * n_bufs, u32 * tx_offset, u16 deq_per_buf,
+                           u8 peek_data)
 {
   vlib_buffer_t *chain_b0, *prev_b0;
-  u32 chain_bi0;
+  u32 chain_bi0, to_deq;
   u16 len_to_deq0, n_bytes_read;
   u8 *data0, j;
 
+  b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+  b0->total_length_not_including_first_buffer = 0;
+
   chain_bi0 = bi0;
   chain_b0 = b0;
+  to_deq = left_from_seg;
   for (j = 1; j < n_bufs_per_seg; j++)
     {
       prev_b0 = chain_b0;
-      len_to_deq0 = clib_min (*left_to_snd0, deq_per_buf);
+      len_to_deq0 = clib_min (to_deq, deq_per_buf);
 
       *n_bufs -= 1;
       chain_bi0 = smm->tx_buffers[thread_index][*n_bufs];
@@ -99,8 +104,8 @@ session_tx_fifo_chain_tail (session_manager_main_t * smm, vlib_main_t * vm,
       data0 = vlib_buffer_get_current (chain_b0);
       if (peek_data)
        {
-         n_bytes_read = svm_fifo_peek (fifo, *rx_offset, len_to_deq0, data0);
-         *rx_offset += n_bytes_read;
+         n_bytes_read = svm_fifo_peek (fifo, *tx_offset, len_to_deq0, data0);
+         *tx_offset += n_bytes_read;
        }
       else
        {
@@ -117,10 +122,13 @@ session_tx_fifo_chain_tail (session_manager_main_t * smm, vlib_main_t * vm,
       /* update current buffer */
       chain_b0->next_buffer = 0;
 
-      *left_to_snd0 -= n_bytes_read;
-      if (*left_to_snd0 == 0)
+      to_deq -= n_bytes_read;
+      if (to_deq == 0)
        break;
     }
+  ASSERT (to_deq == 0
+         && b0->total_length_not_including_first_buffer == left_from_seg);
+  *left_to_snd0 -= left_from_seg;
 }
 
 always_inline int
@@ -132,16 +140,16 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   u32 n_trace = vlib_get_trace_count (vm, node);
   u32 left_to_snd0, max_len_to_snd0, len_to_deq0, snd_space0;
-  u32 n_bufs_per_evt, n_frames_per_evt;
+  u32 n_bufs_per_evt, n_frames_per_evt, n_bufs_per_frame;
   transport_connection_t *tc0;
   transport_proto_vft_t *transport_vft;
   u32 next_index, next0, *to_next, n_left_to_next, bi0;
   vlib_buffer_t *b0;
-  u32 rx_offset = 0, max_dequeue0, n_bytes_per_seg;
+  u32 tx_offset = 0, max_dequeue0, n_bytes_per_seg, left_for_seg;
   u16 snd_mss0, n_bufs_per_seg, n_bufs;
   u8 *data0;
   int i, n_bytes_read;
-  u32 n_bytes_per_buf, deq_per_buf;
+  u32 n_bytes_per_buf, deq_per_buf, deq_per_first_buf;
   u32 buffers_allocated, buffers_allocated_this_call;
 
   next_index = next0 = session_type_to_next[s0->session_type];
@@ -160,15 +168,19 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
       return 0;
     }
 
+  /* Check how much we can pull. */
+  max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo);
+
   if (peek_data)
     {
-      /* Offset in rx fifo from where to peek data  */
-      rx_offset = transport_vft->tx_fifo_offset (tc0);
+      /* Offset in rx fifo from where to peek data */
+      tx_offset = transport_vft->tx_fifo_offset (tc0);
+      if (PREDICT_FALSE (tx_offset >= max_dequeue0))
+       max_dequeue0 = 0;
+      else
+       max_dequeue0 -= tx_offset;
     }
 
-  /* Check how much we can pull. If buffering, subtract the offset */
-  max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo) - rx_offset;
-
   /* Nothing to read return */
   if (max_dequeue0 == 0)
     {
@@ -186,57 +198,62 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
     }
   else
     {
+      /* Expectation is that snd_space0 is already a multiple of snd_mss */
       max_len_to_snd0 = snd_space0;
     }
 
   n_bytes_per_buf = vlib_buffer_free_list_buffer_size
     (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+  ASSERT (n_bytes_per_buf > MAX_HDRS_LEN);
   n_bytes_per_seg = MAX_HDRS_LEN + snd_mss0;
   n_bufs_per_seg = ceil ((double) n_bytes_per_seg / n_bytes_per_buf);
-  n_bufs_per_evt = (ceil ((double) max_len_to_snd0 / n_bytes_per_seg))
-    * n_bufs_per_seg;
+  n_bufs_per_evt = ceil ((double) max_len_to_snd0 / n_bytes_per_seg);
   n_frames_per_evt = ceil ((double) n_bufs_per_evt / VLIB_FRAME_SIZE);
+  n_bufs_per_frame = n_bufs_per_seg * VLIB_FRAME_SIZE;
 
   deq_per_buf = clib_min (snd_mss0, n_bytes_per_buf);
+  deq_per_first_buf = clib_min (snd_mss0, n_bytes_per_buf - MAX_HDRS_LEN);
 
   n_bufs = vec_len (smm->tx_buffers[thread_index]);
   left_to_snd0 = max_len_to_snd0;
   for (i = 0; i < n_frames_per_evt; i++)
     {
       /* Make sure we have at least one full frame of buffers ready */
-      if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE))
+      if (PREDICT_FALSE (n_bufs < n_bufs_per_frame))
        {
          vec_validate (smm->tx_buffers[thread_index],
-                       n_bufs + 2 * VLIB_FRAME_SIZE - 1);
-
+                       n_bufs + n_bufs_per_frame - 1);
          buffers_allocated = 0;
          do
            {
-             buffers_allocated_this_call =
-               vlib_buffer_alloc
-               (vm,
-                &smm->tx_buffers[thread_index][n_bufs + buffers_allocated],
-                2 * VLIB_FRAME_SIZE - buffers_allocated);
+             buffers_allocated_this_call = vlib_buffer_alloc (vm,
+                                                              &smm->tx_buffers
+                                                              [thread_index]
+                                                              [n_bufs +
+                                                               buffers_allocated],
+                                                              n_bufs_per_frame
+                                                              -
+                                                              buffers_allocated);
              buffers_allocated += buffers_allocated_this_call;
            }
          while (buffers_allocated_this_call > 0
-                && ((buffers_allocated + n_bufs < VLIB_FRAME_SIZE)));
+                && ((buffers_allocated + n_bufs < n_bufs_per_frame)));
 
          n_bufs += buffers_allocated;
-
          _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
 
-         if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE))
+         if (PREDICT_FALSE (n_bufs < n_bufs_per_frame))
            {
              vec_add1 (smm->pending_event_vector[thread_index], *e0);
              return -1;
            }
+         ASSERT (n_bufs >= n_bufs_per_frame);
        }
       /* Allow enqueuing of a new event */
       svm_fifo_unset_event (s0->server_tx_fifo);
 
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-      while (left_to_snd0 && n_left_to_next >= n_bufs_per_seg)
+      while (left_to_snd0 && n_left_to_next)
        {
          /*
           * Handle first buffer in chain separately
@@ -245,7 +262,6 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
          /* Get free buffer */
          ASSERT (n_bufs >= 1);
          bi0 = smm->tx_buffers[thread_index][--n_bufs];
-         ASSERT (bi0);
          _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
 
          /* usual speculation, or the enqueue_x1 macro will barf */
@@ -255,31 +271,30 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
 
          b0 = vlib_get_buffer (vm, bi0);
          b0->error = 0;
-         b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID
-           | VNET_BUFFER_F_LOCALLY_ORIGINATED;
+         b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
          b0->current_data = 0;
          b0->total_length_not_including_first_buffer = 0;
 
-         len_to_deq0 = clib_min (left_to_snd0, deq_per_buf);
-
+         len_to_deq0 = clib_min (left_to_snd0, deq_per_first_buf);
          data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN);
          if (peek_data)
            {
-             n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, rx_offset,
+             n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, tx_offset,
                                            len_to_deq0, data0);
+             if (n_bytes_read <= 0)
+               goto dequeue_fail;
              /* Keep track of progress locally, transport is also supposed to
               * increment it independently when pushing the header */
-             rx_offset += n_bytes_read;
+             tx_offset += n_bytes_read;
            }
          else
            {
              n_bytes_read = svm_fifo_dequeue_nowait (s0->server_tx_fifo,
                                                      len_to_deq0, data0);
+             if (n_bytes_read <= 0)
+               goto dequeue_fail;
            }
 
-         if (n_bytes_read <= 0)
-           goto dequeue_fail;
-
          b0->current_length = n_bytes_read;
 
          left_to_snd0 -= n_bytes_read;
@@ -288,12 +303,15 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
          /*
           * Fill in the remaining buffers in the chain, if any
           */
-         if (PREDICT_FALSE (n_bufs_per_seg > 1))
-           session_tx_fifo_chain_tail (smm, vm, thread_index,
-                                       s0->server_tx_fifo, b0, bi0,
-                                       n_bufs_per_seg, &left_to_snd0,
-                                       &n_bufs, &rx_offset, deq_per_buf,
-                                       peek_data);
+         if (PREDICT_FALSE (n_bufs_per_seg > 1 && left_to_snd0))
+           {
+             left_for_seg = clib_min (snd_mss0 - n_bytes_read, left_to_snd0);
+             session_tx_fifo_chain_tail (smm, vm, thread_index,
+                                         s0->server_tx_fifo, b0, bi0,
+                                         n_bufs_per_seg, left_for_seg,
+                                         &left_to_snd0, &n_bufs, &tx_offset,
+                                         deq_per_buf, peek_data);
+           }
 
          /* Ask transport to push header after current_length and
           * total_length_not_including_first_buffer are updated */
@@ -308,8 +326,10 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
          }));
          /* *INDENT-ON* */
 
-
          VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+         if (VLIB_BUFFER_TRACE_TRAJECTORY)
+           b0->pre_data[1] = 3;
+
          if (PREDICT_FALSE (n_trace > 0))
            {
              session_queue_trace_t *t0;
@@ -381,7 +401,6 @@ session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node,
 always_inline stream_session_t *
 session_event_get_session (session_fifo_event_t * e, u8 thread_index)
 {
-  ASSERT (e->fifo->master_thread_index == thread_index);
   return stream_session_get_if_valid (e->fifo->master_session_index,
                                      thread_index);
 }
@@ -415,7 +434,7 @@ dump_thread_0_event_queue (void)
          break;
 
        case FIFO_EVENT_DISCONNECT:
-         s0 = stream_session_get_from_handle (e->session_handle);
+         s0 = session_get_from_handle (e->session_handle);
          fformat (stdout, "[%04d] disconnect session %d\n", i,
                   s0->session_index);
          break;
@@ -458,7 +477,7 @@ session_node_cmp_event (session_fifo_event_t * e, svm_fifo_t * f)
     case FIFO_EVENT_DISCONNECT:
       break;
     case FIFO_EVENT_RPC:
-      s = stream_session_get_from_handle (e->session_handle);
+      s = session_get_from_handle (e->session_handle);
       if (!s)
        {
          clib_warning ("session has event but doesn't exist!");
@@ -602,13 +621,14 @@ skip_dequeue:
        case FIFO_EVENT_APP_TX:
          s0 = session_event_get_session (e0, my_thread_index);
 
-         if (CLIB_DEBUG && !s0)
+         if (PREDICT_FALSE (!s0))
            {
              clib_warning ("It's dead, Jim!");
              continue;
            }
-
-         if (PREDICT_FALSE (s0->session_state == SESSION_STATE_CLOSED))
+         /* Can retransmit for closed sessions but can't do anything if
+          * session is not ready or closed */
+         if (PREDICT_FALSE (s0->session_state < SESSION_STATE_READY))
            continue;
          /* Spray packets in per session type frames, since they go to
           * different nodes */
@@ -624,11 +644,13 @@ skip_dequeue:
            }
          break;
        case FIFO_EVENT_DISCONNECT:
-         s0 = stream_session_get_from_handle (e0->session_handle);
+         s0 = session_get_from_handle (e0->session_handle);
          stream_session_disconnect (s0);
          break;
        case FIFO_EVENT_BUILTIN_RX:
          s0 = session_event_get_session (e0, my_thread_index);
+         if (PREDICT_FALSE (!s0))
+           continue;
          svm_fifo_unset_event (s0->server_rx_fifo);
          app = application_get (s0->app_index);
          app->cb_fns.builtin_server_rx_callback (s0);