rdma: ring db only once per vector on tx
[vpp.git] / src / vlib / buffer_funcs.h
index e9d0704..7103227 100644 (file)
@@ -140,6 +140,42 @@ vlib_buffer_copy_indices (u32 * dst, u32 * src, u32 n_indices)
     }
 }
 
+always_inline void
+vlib_buffer_copy_indices_from_ring (u32 * dst, u32 * ring, u32 start,
+                                   u32 ring_size, u32 n_buffers)
+{
+  ASSERT (n_buffers <= ring_size);
+
+  if (PREDICT_TRUE (start + n_buffers <= ring_size))
+    {
+      vlib_buffer_copy_indices (dst, ring + start, n_buffers);
+    }
+  else
+    {
+      u32 n = ring_size - start;
+      vlib_buffer_copy_indices (dst, ring + start, n);
+      vlib_buffer_copy_indices (dst + n, ring, n_buffers - n);
+    }
+}
+
+always_inline void
+vlib_buffer_copy_indices_to_ring (u32 * ring, u32 * src, u32 start,
+                                 u32 ring_size, u32 n_buffers)
+{
+  ASSERT (n_buffers <= ring_size);
+
+  if (PREDICT_TRUE (start + n_buffers <= ring_size))
+    {
+      vlib_buffer_copy_indices (ring + start, src, n_buffers);
+    }
+  else
+    {
+      u32 n = ring_size - start;
+      vlib_buffer_copy_indices (ring + start, src, n);
+      vlib_buffer_copy_indices (ring, src + n, n_buffers - n);
+    }
+}
+
 STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64);
 static_always_inline void
 vlib_buffer_copy_template (vlib_buffer_t * b, vlib_buffer_t * bt)
@@ -474,6 +510,13 @@ vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index)
 u8 *vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index,
                          uword follow_chain);
 
+u8 *vlib_validate_buffers (vlib_main_t * vm,
+                          u32 * buffers,
+                          uword next_buffer_stride,
+                          uword n_buffers,
+                          vlib_buffer_known_state_t known_state,
+                          uword follow_buffer_next);
+
 static_always_inline vlib_buffer_pool_t *
 vlib_get_buffer_pool (vlib_main_t * vm, u8 buffer_pool_index)
 {
@@ -491,19 +534,19 @@ vlib_buffer_pool_get (vlib_main_t * vm, u8 buffer_pool_index, u32 * buffers,
   ASSERT (bp->buffers);
 
   clib_spinlock_lock (&bp->lock);
-  len = vec_len (bp->buffers);
+  len = bp->n_avail;
   if (PREDICT_TRUE (n_buffers < len))
     {
       len -= n_buffers;
       vlib_buffer_copy_indices (buffers, bp->buffers + len, n_buffers);
-      _vec_len (bp->buffers) = len;
+      bp->n_avail = len;
       clib_spinlock_unlock (&bp->lock);
       return n_buffers;
     }
   else
     {
       vlib_buffer_copy_indices (buffers, bp->buffers, len);
-      _vec_len (bp->buffers) = 0;
+      bp->n_avail = 0;
       clib_spinlock_unlock (&bp->lock);
       return len;
     }
@@ -533,14 +576,26 @@ vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
 
   dst = buffers;
   n_left = n_buffers;
-  len = vec_len (bpt->cached_buffers);
+  len = bpt->n_cached;
 
   /* per-thread cache contains enough buffers */
   if (len >= n_buffers)
     {
       src = bpt->cached_buffers + len - n_buffers;
       vlib_buffer_copy_indices (dst, src, n_buffers);
-      _vec_len (bpt->cached_buffers) -= n_buffers;
+      bpt->n_cached -= n_buffers;
+
+      if (CLIB_DEBUG > 0)
+       vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
+                                        VLIB_BUFFER_KNOWN_FREE);
+      return n_buffers;
+    }
+
+  /* alloc bigger than cache - take buffers directly from main pool */
+  if (n_buffers >= VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ)
+    {
+      n_buffers = vlib_buffer_pool_get (vm, buffer_pool_index, buffers,
+                                       n_buffers);
 
       if (CLIB_DEBUG > 0)
        vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
@@ -552,23 +607,22 @@ vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
   if (len)
     {
       vlib_buffer_copy_indices (dst, bpt->cached_buffers, len);
-      _vec_len (bpt->cached_buffers) = 0;
+      bpt->n_cached = 0;
       dst += len;
       n_left -= len;
     }
 
   len = round_pow2 (n_left, 32);
-  vec_validate_aligned (bpt->cached_buffers, len - 1, CLIB_CACHE_LINE_BYTES);
   len = vlib_buffer_pool_get (vm, buffer_pool_index, bpt->cached_buffers,
                              len);
-  _vec_len (bpt->cached_buffers) = len;
+  bpt->n_cached = len;
 
   if (len)
     {
       u32 n_copy = clib_min (len, n_left);
       src = bpt->cached_buffers + len - n_copy;
       vlib_buffer_copy_indices (dst, src, n_copy);
-      _vec_len (bpt->cached_buffers) -= n_copy;
+      bpt->n_cached -= n_copy;
       n_left -= n_copy;
     }
 
@@ -681,26 +735,33 @@ vlib_buffer_pool_put (vlib_main_t * vm, u8 buffer_pool_index,
                      u32 * buffers, u32 n_buffers)
 {
   vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
-  vlib_buffer_pool_thread_t *bpt =
-    vec_elt_at_index (bp->threads, vm->thread_index);
+  vlib_buffer_pool_thread_t *bpt = vec_elt_at_index (bp->threads,
+                                                    vm->thread_index);
+  u32 n_cached, n_empty;
 
   if (CLIB_DEBUG > 0)
     vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
                                     VLIB_BUFFER_KNOWN_ALLOCATED);
 
-  vec_add_aligned (bpt->cached_buffers, buffers, n_buffers,
-                  CLIB_CACHE_LINE_BYTES);
-
-  if (vec_len (bpt->cached_buffers) > 4 * VLIB_FRAME_SIZE)
+  n_cached = bpt->n_cached;
+  n_empty = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ - n_cached;
+  if (n_buffers <= n_empty)
     {
-      clib_spinlock_lock (&bp->lock);
-      /* keep last stored buffers, as they are more likely hot in the cache */
-      vec_add_aligned (bp->buffers, bpt->cached_buffers, VLIB_FRAME_SIZE,
-                      CLIB_CACHE_LINE_BYTES);
-      vec_delete (bpt->cached_buffers, VLIB_FRAME_SIZE, 0);
-      bpt->n_alloc -= VLIB_FRAME_SIZE;
-      clib_spinlock_unlock (&bp->lock);
+      vlib_buffer_copy_indices (bpt->cached_buffers + n_cached,
+                               buffers, n_buffers);
+      bpt->n_cached = n_cached + n_buffers;
+      return;
     }
+
+  vlib_buffer_copy_indices (bpt->cached_buffers + n_cached,
+                           buffers + n_buffers - n_empty, n_empty);
+  bpt->n_cached = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ;
+
+  clib_spinlock_lock (&bp->lock);
+  vlib_buffer_copy_indices (bp->buffers + bp->n_avail, buffers,
+                           n_buffers - n_empty);
+  bp->n_avail += n_buffers - n_empty;
+  clib_spinlock_unlock (&bp->lock);
 }
 
 static_always_inline void
@@ -838,7 +899,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
          n_queue = 0;
        }
 
-      if (flags & VLIB_BUFFER_NEXT_PRESENT)
+      if (maybe_next && (flags & VLIB_BUFFER_NEXT_PRESENT))
        {
          bi = next;
          goto next_in_chain;
@@ -954,13 +1015,18 @@ vlib_buffer_free_from_ring_no_next (vlib_main_t * vm, u32 * ring, u32 start,
 int vlib_buffer_add_data (vlib_main_t * vm, u32 * buffer_index, void *data,
                          u32 n_data_bytes);
 
+/* Define vlib_buffer and vnet_buffer flags bits preserved for copy/clone */
+#define VLIB_BUFFER_COPY_CLONE_FLAGS_MASK                      \
+  (VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID | \
+   VLIB_BUFFER_IS_TRACED | ~VLIB_BUFFER_FLAGS_ALL)
+
 /* duplicate all buffers in chain */
 always_inline vlib_buffer_t *
 vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b)
 {
   vlib_buffer_t *s, *d, *fd;
   uword n_alloc, n_buffers = 1;
-  u32 flag_mask = VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID;
+  u32 flag_mask = VLIB_BUFFER_COPY_CLONE_FLAGS_MASK;
   int i;
 
   s = b;
@@ -987,6 +1053,7 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b)
   d->current_data = s->current_data;
   d->current_length = s->current_length;
   d->flags = s->flags & flag_mask;
+  d->trace_handle = s->trace_handle;
   d->total_length_not_including_first_buffer =
     s->total_length_not_including_first_buffer;
   clib_memcpy_fast (d->opaque, s->opaque, sizeof (s->opaque));
@@ -1131,8 +1198,9 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers,
          d->total_length_not_including_first_buffer +=
            s->total_length_not_including_first_buffer;
        }
-      d->flags = s->flags | VLIB_BUFFER_NEXT_PRESENT;
-      d->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
+      d->flags = (s->flags & VLIB_BUFFER_COPY_CLONE_FLAGS_MASK) |
+       VLIB_BUFFER_NEXT_PRESENT;
+      d->trace_handle = s->trace_handle;
       clib_memcpy_fast (d->opaque, s->opaque, sizeof (s->opaque));
       clib_memcpy_fast (d->opaque2, s->opaque2, sizeof (s->opaque2));
       clib_memcpy_fast (vlib_buffer_get_current (d),
@@ -1307,7 +1375,7 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
 void vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * first);
 
 format_function_t format_vlib_buffer, format_vlib_buffer_and_data,
-  format_vlib_buffer_contents;
+  format_vlib_buffer_contents, format_vlib_buffer_no_chain;
 
 typedef struct
 {
@@ -1416,13 +1484,19 @@ vlib_buffer_chain_linearize (vlib_main_t * vm, vlib_buffer_t * b)
 
       if (dst_left == 0)
        {
-         if (db != first)
-           db->current_data = 0;
          db->current_length = dp - (u8 *) vlib_buffer_get_current (db);
          ASSERT (db->flags & VLIB_BUFFER_NEXT_PRESENT);
          db = vlib_get_buffer (vm, db->next_buffer);
          dst_left = data_size;
-         dp = db->data;
+         if (db->current_data > 0)
+           {
+             db->current_data = 0;
+           }
+         else
+           {
+             dst_left += -db->current_data;
+           }
+         dp = vlib_buffer_get_current (db);
        }
 
       while (src_left == 0)