#include <vppinfra/hash.h>
#include <vppinfra/fifo.h>
+#include <vlib/buffer.h>
+#include <vlib/physmem_funcs.h>
+#include <vlib/main.h>
+#include <vlib/node.h>
/** \file
vlib buffer access methods.
*/
+typedef void (vlib_buffer_enqueue_to_next_fn_t) (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ u32 *buffers, u16 *nexts,
+ uword count);
+typedef void (vlib_buffer_enqueue_to_single_next_fn_t) (
+ vlib_main_t *vm, vlib_node_runtime_t *node, u32 *ers, u16 next_index,
+ u32 count);
+
+typedef u32 (vlib_buffer_enqueue_to_thread_fn_t) (
+ vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
+ u32 *buffer_indices, u16 *thread_indices, u32 n_packets,
+ int drop_on_congestion);
+
+typedef u32 (vlib_frame_queue_dequeue_fn_t) (vlib_main_t *vm,
+ vlib_frame_queue_main_t *fqm);
+
+typedef struct
+{
+ vlib_buffer_enqueue_to_next_fn_t *buffer_enqueue_to_next_fn;
+ vlib_buffer_enqueue_to_single_next_fn_t *buffer_enqueue_to_single_next_fn;
+ vlib_buffer_enqueue_to_thread_fn_t *buffer_enqueue_to_thread_fn;
+ vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
+} vlib_buffer_func_main_t;
+
+extern vlib_buffer_func_main_t vlib_buffer_func_main;
+
always_inline void
vlib_buffer_validate (vlib_main_t * vm, vlib_buffer_t * b)
{
static_always_inline void
vlib_buffer_copy_indices (u32 * dst, u32 * src, u32 n_indices)
{
- clib_memcpy_fast (dst, src, n_indices * sizeof (u32));
+ clib_memcpy_u32 (dst, src, n_indices);
+}
+
+always_inline void
+vlib_buffer_copy_indices_from_ring (u32 * dst, u32 * ring, u32 start,
+ u32 ring_size, u32 n_buffers)
+{
+ ASSERT (n_buffers <= ring_size);
+
+ if (PREDICT_TRUE (start + n_buffers <= ring_size))
+ {
+ vlib_buffer_copy_indices (dst, ring + start, n_buffers);
+ }
+ else
+ {
+ u32 n = ring_size - start;
+ vlib_buffer_copy_indices (dst, ring + start, n);
+ vlib_buffer_copy_indices (dst + n, ring, n_buffers - n);
+ }
+}
+
+always_inline void
+vlib_buffer_copy_indices_to_ring (u32 * ring, u32 * src, u32 start,
+ u32 ring_size, u32 n_buffers)
+{
+ ASSERT (n_buffers <= ring_size);
+
+ if (PREDICT_TRUE (start + n_buffers <= ring_size))
+ {
+ vlib_buffer_copy_indices (ring + start, src, n_buffers);
+ }
+ else
+ {
+ u32 n = ring_size - start;
+ vlib_buffer_copy_indices (ring + start, src, n);
+ vlib_buffer_copy_indices (ring, src + n, n_buffers - n);
+ }
}
STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64);
always_inline u8
vlib_buffer_pool_get_default_for_numa (vlib_main_t * vm, u32 numa_node)
{
- ASSERT (numa_node < vm->buffer_main->n_numa_nodes);
- return numa_node;
+ ASSERT (numa_node < VLIB_BUFFER_MAX_NUMA_NODES);
+ return vm->buffer_main->default_buffer_pool_index_for_numa[numa_node];
}
/** \brief Translate array of buffer indices into buffer pointers with offset
i32 offset)
{
uword buffer_mem_start = vm->buffer_main->buffer_mem_start;
-#ifdef CLIB_HAVE_VEC256
- u64x4 off = u64x4_splat (buffer_mem_start + offset);
+#ifdef CLIB_HAVE_VEC512
+ u64x8 of8 = u64x8_splat (buffer_mem_start + offset);
+ u64x4 off = u64x8_extract_lo (of8);
/* if count is not const, compiler will not unroll while loop
se we maintain two-in-parallel variant */
+ while (count >= 32)
+ {
+ u64x8 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (bi));
+ u64x8 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 8));
+ u64x8 b2 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 16));
+ u64x8 b3 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 24));
+ /* shift and add to get vlib_buffer_t pointer */
+ u64x8_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b);
+ u64x8_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 8);
+ u64x8_store_unaligned ((b2 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 16);
+ u64x8_store_unaligned ((b3 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 24);
+ b += 32;
+ bi += 32;
+ count -= 32;
+ }
while (count >= 8)
{
- u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi));
- u64x4 b1 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi + 4));
+ u64x8 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (bi));
/* shift and add to get vlib_buffer_t pointer */
- u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
- u64x4_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 4);
+ u64x8_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b);
b += 8;
bi += 8;
count -= 8;
}
+#elif defined CLIB_HAVE_VEC256
+ u64x4 off = u64x4_splat (buffer_mem_start + offset);
+ /* if count is not const, compiler will not unroll while loop
+ se we maintain two-in-parallel variant */
+ while (count >= 32)
+ {
+ u64x4 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (bi));
+ u64x4 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 4));
+ u64x4 b2 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 8));
+ u64x4 b3 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 12));
+ u64x4 b4 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 16));
+ u64x4 b5 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 20));
+ u64x4 b6 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 24));
+ u64x4 b7 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 28));
+ /* shift and add to get vlib_buffer_t pointer */
+ u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
+ u64x4_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 4);
+ u64x4_store_unaligned ((b2 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 8);
+ u64x4_store_unaligned ((b3 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 12);
+ u64x4_store_unaligned ((b4 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 16);
+ u64x4_store_unaligned ((b5 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 20);
+ u64x4_store_unaligned ((b6 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 24);
+ u64x4_store_unaligned ((b7 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 28);
+ b += 32;
+ bi += 32;
+ count -= 32;
+ }
#endif
while (count >= 4)
{
#ifdef CLIB_HAVE_VEC256
- u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi));
+ u64x4 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (bi));
/* shift and add to get vlib_buffer_t pointer */
u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
#elif defined (CLIB_HAVE_VEC128)
u64x2 off = u64x2_splat (buffer_mem_start + offset);
u32x4 bi4 = u32x4_load_unaligned (bi);
- u64x2 b0 = u32x4_extend_to_u64x2 ((u32x4) bi4);
+ u64x2 b0 = u64x2_from_u32x4 ((u32x4) bi4);
#if defined (__aarch64__)
- u64x2 b1 = u32x4_extend_to_u64x2_high ((u32x4) bi4);
+ u64x2 b1 = u64x2_from_u32x4_high ((u32x4) bi4);
#else
bi4 = u32x4_shuffle (bi4, 2, 3, 0, 1);
- u64x2 b1 = u32x4_extend_to_u64x2 ((u32x4) bi4);
+ u64x2 b1 = u64x2_from_u32x4 ((u32x4) bi4);
#endif
u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2);
u8 *vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index,
uword follow_chain);
+u8 *vlib_validate_buffers (vlib_main_t * vm,
+ u32 * buffers,
+ uword next_buffer_stride,
+ uword n_buffers,
+ vlib_buffer_known_state_t known_state,
+ uword follow_buffer_next);
+
static_always_inline vlib_buffer_pool_t *
vlib_get_buffer_pool (vlib_main_t * vm, u8 buffer_pool_index)
{
return vec_elt_at_index (bm->buffer_pools, buffer_pool_index);
}
-static_always_inline uword
+static_always_inline __clib_warn_unused_result uword
vlib_buffer_pool_get (vlib_main_t * vm, u8 buffer_pool_index, u32 * buffers,
u32 n_buffers)
{
ASSERT (bp->buffers);
clib_spinlock_lock (&bp->lock);
- len = vec_len (bp->buffers);
+ len = bp->n_avail;
if (PREDICT_TRUE (n_buffers < len))
{
len -= n_buffers;
vlib_buffer_copy_indices (buffers, bp->buffers + len, n_buffers);
- _vec_len (bp->buffers) = len;
+ bp->n_avail = len;
clib_spinlock_unlock (&bp->lock);
return n_buffers;
}
else
{
vlib_buffer_copy_indices (buffers, bp->buffers, len);
- _vec_len (bp->buffers) = 0;
+ bp->n_avail = 0;
clib_spinlock_unlock (&bp->lock);
return len;
}
less than the number requested or zero
*/
-always_inline u32
+always_inline __clib_warn_unused_result u32
vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
u8 buffer_pool_index)
{
vlib_buffer_pool_thread_t *bpt;
u32 *src, *dst, len, n_left;
+ /* If buffer allocation fault injection is configured */
+ if (VLIB_BUFFER_ALLOC_FAULT_INJECTOR > 0)
+ {
+ u32 vlib_buffer_alloc_may_fail (vlib_main_t *, u32);
+
+ /* See how many buffers we're willing to allocate */
+ n_buffers = vlib_buffer_alloc_may_fail (vm, n_buffers);
+ if (n_buffers == 0)
+ return (n_buffers);
+ }
+
bp = vec_elt_at_index (bm->buffer_pools, buffer_pool_index);
bpt = vec_elt_at_index (bp->threads, vm->thread_index);
dst = buffers;
n_left = n_buffers;
- len = vec_len (bpt->cached_buffers);
+ len = bpt->n_cached;
/* per-thread cache contains enough buffers */
if (len >= n_buffers)
{
src = bpt->cached_buffers + len - n_buffers;
vlib_buffer_copy_indices (dst, src, n_buffers);
- _vec_len (bpt->cached_buffers) -= n_buffers;
+ bpt->n_cached -= n_buffers;
+ goto done;
+ }
- if (CLIB_DEBUG > 0)
- vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
- VLIB_BUFFER_KNOWN_FREE);
- return n_buffers;
+ /* alloc bigger than cache - take buffers directly from main pool */
+ if (n_buffers >= VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ)
+ {
+ n_buffers = vlib_buffer_pool_get (vm, buffer_pool_index, buffers,
+ n_buffers);
+ goto done;
}
/* take everything available in the cache */
if (len)
{
vlib_buffer_copy_indices (dst, bpt->cached_buffers, len);
- _vec_len (bpt->cached_buffers) = 0;
+ bpt->n_cached = 0;
dst += len;
n_left -= len;
}
len = round_pow2 (n_left, 32);
- vec_validate_aligned (bpt->cached_buffers, len - 1, CLIB_CACHE_LINE_BYTES);
len = vlib_buffer_pool_get (vm, buffer_pool_index, bpt->cached_buffers,
len);
- _vec_len (bpt->cached_buffers) = len;
+ bpt->n_cached = len;
if (len)
{
u32 n_copy = clib_min (len, n_left);
src = bpt->cached_buffers + len - n_copy;
vlib_buffer_copy_indices (dst, src, n_copy);
- _vec_len (bpt->cached_buffers) -= n_copy;
+ bpt->n_cached -= n_copy;
n_left -= n_copy;
}
n_buffers -= n_left;
+done:
/* Verify that buffers are known free. */
if (CLIB_DEBUG > 0)
vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
VLIB_BUFFER_KNOWN_FREE);
-
+ if (PREDICT_FALSE (bm->alloc_callback_fn != 0))
+ bm->alloc_callback_fn (vm, buffer_pool_index, buffers, n_buffers);
return n_buffers;
}
@return - (u32) number of buffers actually allocated, may be
less than the number requested or zero
*/
-always_inline u32
+always_inline __clib_warn_unused_result u32
vlib_buffer_alloc_on_numa (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
u32 numa_node)
{
less than the number requested or zero
*/
-always_inline u32
+always_inline __clib_warn_unused_result u32
vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
{
return vlib_buffer_alloc_on_numa (vm, buffers, n_buffers, vm->numa_node);
@return - (u32) number of buffers actually allocated, may be
less than the number requested or zero
*/
-always_inline u32
+always_inline __clib_warn_unused_result u32
vlib_buffer_alloc_to_ring (vlib_main_t * vm, u32 * ring, u32 start,
u32 ring_size, u32 n_buffers)
{
@return - (u32) number of buffers actually allocated, may be
less than the number requested or zero
*/
-always_inline u32
+always_inline __clib_warn_unused_result u32
vlib_buffer_alloc_to_ring_from_pool (vlib_main_t * vm, u32 * ring, u32 start,
u32 ring_size, u32 n_buffers,
u8 buffer_pool_index)
vlib_buffer_pool_put (vlib_main_t * vm, u8 buffer_pool_index,
u32 * buffers, u32 n_buffers)
{
+ vlib_buffer_main_t *bm = vm->buffer_main;
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
- vlib_buffer_pool_thread_t *bpt =
- vec_elt_at_index (bp->threads, vm->thread_index);
+ vlib_buffer_pool_thread_t *bpt = vec_elt_at_index (bp->threads,
+ vm->thread_index);
+ u32 n_cached, n_empty;
if (CLIB_DEBUG > 0)
vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
VLIB_BUFFER_KNOWN_ALLOCATED);
+ if (PREDICT_FALSE (bm->free_callback_fn != 0))
+ bm->free_callback_fn (vm, buffer_pool_index, buffers, n_buffers);
- vec_add_aligned (bpt->cached_buffers, buffers, n_buffers,
- CLIB_CACHE_LINE_BYTES);
-
- if (vec_len (bpt->cached_buffers) > 4 * VLIB_FRAME_SIZE)
+ n_cached = bpt->n_cached;
+ n_empty = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ - n_cached;
+ if (n_buffers <= n_empty)
{
- clib_spinlock_lock (&bp->lock);
- /* keep last stored buffers, as they are more likely hot in the cache */
- vec_add_aligned (bp->buffers, bpt->cached_buffers, VLIB_FRAME_SIZE,
- CLIB_CACHE_LINE_BYTES);
- vec_delete (bpt->cached_buffers, VLIB_FRAME_SIZE, 0);
- bpt->n_alloc -= VLIB_FRAME_SIZE;
- clib_spinlock_unlock (&bp->lock);
+ vlib_buffer_copy_indices (bpt->cached_buffers + n_cached,
+ buffers, n_buffers);
+ bpt->n_cached = n_cached + n_buffers;
+ return;
}
+
+ vlib_buffer_copy_indices (bpt->cached_buffers + n_cached,
+ buffers + n_buffers - n_empty, n_empty);
+ bpt->n_cached = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ;
+
+ clib_spinlock_lock (&bp->lock);
+ vlib_buffer_copy_indices (bp->buffers + bp->n_avail, buffers,
+ n_buffers - n_empty);
+ bp->n_avail += n_buffers - n_empty;
+ clib_spinlock_unlock (&bp->lock);
}
static_always_inline void
u8 buffer_pool_index = ~0;
u32 n_queue = 0, queue[queue_size + 4];
vlib_buffer_t bt = { };
-#if defined(CLIB_HAVE_VEC128) && !__aarch64__
+#if defined(CLIB_HAVE_VEC128)
vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 };
- vlib_buffer_t bpi_vec = {.buffer_pool_index = ~0 };
+ vlib_buffer_t bpi_vec = {};
vlib_buffer_t flags_refs_mask = {
.flags = VLIB_BUFFER_NEXT_PRESENT,
- .ref_count = ~0
+ .ref_count = ~1
};
#endif
+ if (PREDICT_FALSE (n_buffers == 0))
+ return;
+
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffers[0]);
+ buffer_pool_index = b->buffer_pool_index;
+ bp = vlib_get_buffer_pool (vm, buffer_pool_index);
+ vlib_buffer_copy_template (&bt, &bp->buffer_template);
+#if defined(CLIB_HAVE_VEC128)
+ bpi_vec.buffer_pool_index = buffer_pool_index;
+#endif
+
while (n_buffers)
{
vlib_buffer_t *b[8];
u32 bi, sum = 0, flags, next;
- if (n_buffers < 12)
+ if (n_buffers < 4)
goto one_by_one;
vlib_get_buffers (vm, buffers, b, 4);
- vlib_get_buffers (vm, buffers + 8, b + 4, 4);
- vlib_prefetch_buffer_header (b[4], LOAD);
- vlib_prefetch_buffer_header (b[5], LOAD);
- vlib_prefetch_buffer_header (b[6], LOAD);
- vlib_prefetch_buffer_header (b[7], LOAD);
+ if (n_buffers >= 12)
+ {
+ vlib_get_buffers (vm, buffers + 8, b + 4, 4);
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
+ }
-#if defined(CLIB_HAVE_VEC128) && !__aarch64__
+#if defined(CLIB_HAVE_VEC128)
u8x16 p0, p1, p2, p3, r;
p0 = u8x16_load_unaligned (b[0]);
p1 = u8x16_load_unaligned (b[1]);
if (PREDICT_FALSE (buffer_pool_index != b[0]->buffer_pool_index))
{
- buffer_pool_index = b[0]->buffer_pool_index;
-#if defined(CLIB_HAVE_VEC128) && !__aarch64__
- bpi_vec.buffer_pool_index = buffer_pool_index;
-#endif
- bp = vlib_get_buffer_pool (vm, buffer_pool_index);
- vlib_buffer_copy_template (&bt, &bp->buffer_template);
if (n_queue)
{
vlib_buffer_pool_put (vm, buffer_pool_index, queue, n_queue);
n_queue = 0;
}
+
+ buffer_pool_index = b[0]->buffer_pool_index;
+#if defined(CLIB_HAVE_VEC128)
+ bpi_vec.buffer_pool_index = buffer_pool_index;
+#endif
+ bp = vlib_get_buffer_pool (vm, buffer_pool_index);
+ vlib_buffer_copy_template (&bt, &bp->buffer_template);
}
vlib_buffer_validate (vm, b[0]);
n_queue = 0;
}
- if (flags & VLIB_BUFFER_NEXT_PRESENT)
+ if (maybe_next && (flags & VLIB_BUFFER_NEXT_PRESENT))
{
bi = next;
goto next_in_chain;
int vlib_buffer_add_data (vlib_main_t * vm, u32 * buffer_index, void *data,
u32 n_data_bytes);
+/* Define vlib_buffer and vnet_buffer flags bits preserved for copy/clone */
+#define VLIB_BUFFER_COPY_CLONE_FLAGS_MASK \
+ (VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID | \
+ VLIB_BUFFER_IS_TRACED | ~VLIB_BUFFER_FLAGS_ALL)
+
/* duplicate all buffers in chain */
always_inline vlib_buffer_t *
vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b)
{
vlib_buffer_t *s, *d, *fd;
uword n_alloc, n_buffers = 1;
- u32 flag_mask = VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ u32 flag_mask = VLIB_BUFFER_COPY_CLONE_FLAGS_MASK;
int i;
s = b;
d->current_data = s->current_data;
d->current_length = s->current_length;
d->flags = s->flags & flag_mask;
+ d->trace_handle = s->trace_handle;
d->total_length_not_including_first_buffer =
s->total_length_not_including_first_buffer;
clib_memcpy_fast (d->opaque, s->opaque, sizeof (s->opaque));
return fd;
}
+/* duplicate first buffer in chain */
+always_inline vlib_buffer_t *
+vlib_buffer_copy_no_chain (vlib_main_t * vm, vlib_buffer_t * b, u32 * di)
+{
+ vlib_buffer_t *d;
+
+ if ((vlib_buffer_alloc (vm, di, 1)) != 1)
+ return 0;
+
+ d = vlib_get_buffer (vm, *di);
+ /* 1st segment */
+ d->current_data = b->current_data;
+ d->current_length = b->current_length;
+ clib_memcpy_fast (d->opaque, b->opaque, sizeof (b->opaque));
+ clib_memcpy_fast (d->opaque2, b->opaque2, sizeof (b->opaque2));
+ clib_memcpy_fast (vlib_buffer_get_current (d),
+ vlib_buffer_get_current (b), b->current_length);
+
+ return d;
+}
+
+/* \brief Move packet from current position to offset position in buffer.
+ Only work for small packet using one buffer with room to fit the move
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param b - (vlib_buffer_t *) pointer to buffer
+ @param offset - (i16) position to move the packet in buffer
+ */
+always_inline void
+vlib_buffer_move (vlib_main_t * vm, vlib_buffer_t * b, i16 offset)
+{
+ ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
+ ASSERT (offset + VLIB_BUFFER_PRE_DATA_SIZE >= 0);
+ ASSERT (offset + b->current_length <
+ vlib_buffer_get_default_data_size (vm));
+
+ u8 *source = vlib_buffer_get_current (b);
+ b->current_data = offset;
+ u8 *destination = vlib_buffer_get_current (b);
+ u16 length = b->current_length;
+
+ if (source + length <= destination) /* no overlap */
+ clib_memcpy_fast (destination, source, length);
+ else
+ memmove (destination, source, length);
+}
+
/** \brief Create a maximum of 256 clones of buffer and store them
in the supplied array
@param n_buffers - (u16) number of buffer clones requested (<=256)
@param head_end_offset - (u16) offset relative to current position
where packet head ends
+ @param offset - (i16) copy packet head at current position if 0,
+ else at offset position to change headroom space as specified
@return - (u16) number of buffers actually cloned, may be
less than the number requested or zero
*/
always_inline u16
vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers,
- u16 n_buffers, u16 head_end_offset)
+ u16 n_buffers, u16 head_end_offset, i16 offset)
{
u16 i;
vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer);
ASSERT (s->ref_count == 1);
ASSERT (n_buffers);
ASSERT (n_buffers <= 256);
+ ASSERT (offset + VLIB_BUFFER_PRE_DATA_SIZE >= 0);
+ ASSERT ((offset + head_end_offset) <
+ vlib_buffer_get_default_data_size (vm));
if (s->current_length <= head_end_offset + CLIB_CACHE_LINE_BYTES * 2)
{
buffers[0] = src_buffer;
+ if (offset)
+ vlib_buffer_move (vm, s, offset);
+
for (i = 1; i < n_buffers; i++)
{
vlib_buffer_t *d;
return n_buffers;
}
- if (PREDICT_FALSE (n_buffers == 1))
+ if (PREDICT_FALSE ((n_buffers == 1) && (offset == 0)))
{
buffers[0] = src_buffer;
return 1;
for (i = 0; i < n_buffers; i++)
{
vlib_buffer_t *d = vlib_get_buffer (vm, buffers[i]);
- d->current_data = s->current_data;
+ if (offset)
+ d->current_data = offset;
+ else
+ d->current_data = s->current_data;
+
d->current_length = head_end_offset;
ASSERT (d->buffer_pool_index == s->buffer_pool_index);
d->total_length_not_including_first_buffer +=
s->total_length_not_including_first_buffer;
}
- d->flags = s->flags | VLIB_BUFFER_NEXT_PRESENT;
- d->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
+ d->flags = (s->flags & VLIB_BUFFER_COPY_CLONE_FLAGS_MASK) |
+ VLIB_BUFFER_NEXT_PRESENT;
+ d->trace_handle = s->trace_handle;
clib_memcpy_fast (d->opaque, s->opaque, sizeof (s->opaque));
clib_memcpy_fast (d->opaque2, s->opaque2, sizeof (s->opaque2));
clib_memcpy_fast (vlib_buffer_get_current (d),
d->next_buffer = src_buffer;
}
vlib_buffer_advance (s, head_end_offset);
- s->ref_count = n_buffers;
+ s->ref_count = n_buffers ? n_buffers : s->ref_count;
while (s->flags & VLIB_BUFFER_NEXT_PRESENT)
{
s = vlib_get_buffer (vm, s->next_buffer);
- s->ref_count = n_buffers;
+ s->ref_count = n_buffers ? n_buffers : s->ref_count;
}
return n_buffers;
@param n_buffers - (u16) number of buffer clones requested (<=256)
@param head_end_offset - (u16) offset relative to current position
where packet head ends
+ @param offset - (i16) copy packet head at current position if 0,
+ else at offset position to change headroom space as specified
@return - (u16) number of buffers actually cloned, may be
less than the number requested or zero
*/
always_inline u16
-vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers,
- u16 n_buffers, u16 head_end_offset)
+vlib_buffer_clone_at_offset (vlib_main_t * vm, u32 src_buffer, u32 * buffers,
+ u16 n_buffers, u16 head_end_offset, i16 offset)
{
vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer);
u16 n_cloned = 0;
n_cloned += vlib_buffer_clone_256 (vm,
vlib_get_buffer_index (vm, copy),
(buffers + n_cloned),
- 256, head_end_offset);
+ 256, head_end_offset, offset);
n_buffers -= 256;
}
n_cloned += vlib_buffer_clone_256 (vm, src_buffer,
buffers + n_cloned,
- n_buffers, head_end_offset);
+ n_buffers, head_end_offset, offset);
return n_cloned;
}
+/** \brief Create multiple clones of buffer and store them
+ in the supplied array
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param src_buffer - (u32) source buffer index
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u16) number of buffer clones requested (<=256)
+ @param head_end_offset - (u16) offset relative to current position
+ where packet head ends
+ @return - (u16) number of buffers actually cloned, may be
+ less than the number requested or zero
+*/
+always_inline u16
+vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers,
+ u16 n_buffers, u16 head_end_offset)
+{
+ return vlib_buffer_clone_at_offset (vm, src_buffer, buffers, n_buffers,
+ head_end_offset, 0);
+}
+
/** \brief Attach cloned tail to the buffer
@param vm - (vlib_main_t *) vlib main data structure pointer
void vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * first);
format_function_t format_vlib_buffer, format_vlib_buffer_and_data,
- format_vlib_buffer_contents;
+ format_vlib_buffer_contents, format_vlib_buffer_no_chain;
typedef struct
{
vec_free (t->packet_data);
}
-/**
- * @brief compress buffer chain in a way where the first buffer is at least
- * VLIB_BUFFER_CLONE_HEAD_SIZE long
- *
- * @param[in] vm - vlib_main
- * @param[in,out] first - first buffer in chain
- * @param[in,out] discard_vector - vector of buffer indexes which were removed
- * from the chain
- */
-always_inline void
-vlib_buffer_chain_compress (vlib_main_t * vm,
- vlib_buffer_t * first, u32 ** discard_vector)
+always_inline u32
+vlib_buffer_space_left_at_end (vlib_main_t * vm, vlib_buffer_t * b)
{
- if (first->current_length >= VLIB_BUFFER_CLONE_HEAD_SIZE ||
- !(first->flags & VLIB_BUFFER_NEXT_PRESENT))
- {
- /* this is already big enough or not a chain */
- return;
- }
+ return b->data + vlib_buffer_get_default_data_size (vm) -
+ ((u8 *) vlib_buffer_get_current (b) + b->current_length);
+}
+
+#define VLIB_BUFFER_LINEARIZE_MAX 64
+
+always_inline u32
+vlib_buffer_chain_linearize (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ vlib_buffer_t *dst_b;
+ u32 n_buffers = 1, to_free = 0;
+ u16 rem_len, dst_len, data_size, src_len = 0;
+ u8 *dst, *src = 0;
+
+ if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
+ return 1;
+
+ ASSERT (1 == b->ref_count);
+ if (PREDICT_FALSE (1 != b->ref_count))
+ return 0;
+
+ data_size = vlib_buffer_get_default_data_size (vm);
+ rem_len = vlib_buffer_length_in_chain (vm, b) - b->current_length;
- u32 want_first_size = clib_min (VLIB_BUFFER_CLONE_HEAD_SIZE,
- vlib_buffer_get_default_data_size (vm) -
- first->current_data);
- do
+ dst_b = b;
+ dst = vlib_buffer_get_tail (dst_b);
+ dst_len = vlib_buffer_space_left_at_end (vm, dst_b);
+
+ b->total_length_not_including_first_buffer -= dst_len;
+
+ while (rem_len > 0)
{
- vlib_buffer_t *second = vlib_get_buffer (vm, first->next_buffer);
- u32 need = want_first_size - first->current_length;
- u32 amount_to_copy = clib_min (need, second->current_length);
- clib_memcpy_fast (((u8 *) vlib_buffer_get_current (first)) +
- first->current_length,
- vlib_buffer_get_current (second), amount_to_copy);
- first->current_length += amount_to_copy;
- second->current_data += amount_to_copy;
- second->current_length -= amount_to_copy;
- if (first->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)
+ u16 copy_len;
+
+ while (0 == src_len)
{
- first->total_length_not_including_first_buffer -= amount_to_copy;
+ ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ if (PREDICT_FALSE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
+ break; /* malformed chained buffer */
+
+ b = vlib_get_buffer (vm, b->next_buffer);
+ src = vlib_buffer_get_current (b);
+ src_len = b->current_length;
}
- if (!second->current_length)
+
+ if (0 == dst_len)
{
- vec_add1 (*discard_vector, first->next_buffer);
- if (second->flags & VLIB_BUFFER_NEXT_PRESENT)
+ ASSERT (dst_b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ if (PREDICT_FALSE (!(dst_b->flags & VLIB_BUFFER_NEXT_PRESENT)))
+ break; /* malformed chained buffer */
+
+ vlib_buffer_t *next_dst_b = vlib_get_buffer (vm, dst_b->next_buffer);
+
+ if (PREDICT_TRUE (1 == next_dst_b->ref_count))
{
- first->next_buffer = second->next_buffer;
+ /* normal case: buffer is not cloned, just use it */
+ dst_b = next_dst_b;
}
else
{
- first->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ /* cloned buffer, build a new dest chain from there */
+ vlib_buffer_t *bufs[VLIB_BUFFER_LINEARIZE_MAX];
+ u32 bis[VLIB_BUFFER_LINEARIZE_MAX + 1];
+ const int n = (rem_len + data_size - 1) / data_size;
+ int n_alloc;
+ int i;
+
+ ASSERT (n <= VLIB_BUFFER_LINEARIZE_MAX);
+ if (PREDICT_FALSE (n > VLIB_BUFFER_LINEARIZE_MAX))
+ return 0;
+
+ n_alloc = vlib_buffer_alloc (vm, bis, n);
+ if (PREDICT_FALSE (n_alloc != n))
+ {
+ vlib_buffer_free (vm, bis, n_alloc);
+ return 0;
+ }
+
+ vlib_get_buffers (vm, bis, bufs, n);
+
+ for (i = 0; i < n - 1; i++)
+ {
+ bufs[i]->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ bufs[i]->next_buffer = bis[i + 1];
+ }
+
+ to_free = dst_b->next_buffer;
+ dst_b->next_buffer = bis[0];
+ dst_b = bufs[0];
}
- second->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+
+ n_buffers++;
+
+ dst_b->current_data = clib_min (0, dst_b->current_data);
+ dst_b->current_length = 0;
+
+ dst = dst_b->data + dst_b->current_data;
+ dst_len = data_size - dst_b->current_data;
}
- }
- while ((first->current_length < want_first_size) &&
- (first->flags & VLIB_BUFFER_NEXT_PRESENT));
-}
-/**
- * @brief linearize buffer chain - the first buffer is filled, if needed,
- * buffers are allocated and filled, returns free space in last buffer or
- * negative on failure
- *
- * @param[in] vm - vlib_main
- * @param[in,out] first - first buffer in chain
- */
-always_inline int
-vlib_buffer_chain_linearize (vlib_main_t * vm, vlib_buffer_t * first)
-{
- vlib_buffer_t *b = first;
- u32 buf_len = vlib_buffer_get_default_data_size (vm);
- // free buffer chain starting from the second buffer
- int free_count = (b->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
- u32 chain_to_free = b->next_buffer;
-
- u32 len = vlib_buffer_length_in_chain (vm, b);
- u32 free_len = buf_len - b->current_data - b->current_length;
- int alloc_len = clib_max (len - free_len, 0); //use the free len in the first buffer
- int n_buffers = (alloc_len + buf_len - 1) / buf_len;
- u32 new_buffers[n_buffers];
+ copy_len = clib_min (src_len, dst_len);
- u32 n_alloc = vlib_buffer_alloc (vm, new_buffers, n_buffers);
- if (n_alloc != n_buffers)
- {
- vlib_buffer_free_no_next (vm, new_buffers, n_alloc);
- return -1;
+ if (PREDICT_TRUE (src == dst))
+ {
+ /* nothing to do */
+ }
+ else if (src + copy_len > dst && dst + copy_len > src)
+ {
+ /* src and dst overlap */
+ ASSERT (b == dst_b);
+ memmove (dst, src, copy_len);
+ }
+ else
+ {
+ clib_memcpy_fast (dst, src, copy_len);
+ }
+
+ dst_b->current_length += copy_len;
+
+ dst += copy_len;
+ src += copy_len;
+ dst_len -= copy_len;
+ src_len -= copy_len;
+ rem_len -= copy_len;
}
- vlib_buffer_t *s = b;
- while (s->flags & VLIB_BUFFER_NEXT_PRESENT)
+ /* in case of a malformed chain buffer, we'll exit early from the loop. */
+ ASSERT (0 == rem_len);
+ b->total_length_not_including_first_buffer -= rem_len;
+
+ if (to_free)
+ vlib_buffer_free_one (vm, to_free);
+
+ if (dst_b->flags & VLIB_BUFFER_NEXT_PRESENT)
{
- s = vlib_get_buffer (vm, s->next_buffer);
- int d_free_len = buf_len - b->current_data - b->current_length;
- ASSERT (d_free_len >= 0);
- // chain buf and split write
- u32 copy_len = clib_min (d_free_len, s->current_length);
- u8 *d = vlib_buffer_put_uninit (b, copy_len);
- clib_memcpy (d, vlib_buffer_get_current (s), copy_len);
- int rest = s->current_length - copy_len;
- if (rest > 0)
+ /* the resulting chain is smaller than the original, cut it there */
+ dst_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ vlib_buffer_free_one (vm, dst_b->next_buffer);
+ if (1 == n_buffers)
{
- //prev buf is full
- ASSERT (vlib_buffer_get_tail (b) == b->data + buf_len);
- ASSERT (n_buffers > 0);
- b = vlib_buffer_chain_buffer (vm, b, new_buffers[--n_buffers]);
- //make full use of the new buffers
- b->current_data = 0;
- d = vlib_buffer_put_uninit (b, rest);
- clib_memcpy (d, vlib_buffer_get_current (s) + copy_len, rest);
+ /* no longer a chained buffer */
+ dst_b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ dst_b->total_length_not_including_first_buffer = 0;
}
}
- vlib_buffer_free (vm, &chain_to_free, free_count);
- b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
- if (b == first) /* no buffers addeed */
- b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
- ASSERT (len == vlib_buffer_length_in_chain (vm, first));
- ASSERT (n_buffers == 0);
- return buf_len - b->current_data - b->current_length;
+
+ return n_buffers;
}
#endif /* included_vlib_buffer_funcs_h */