X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fbuffer_funcs.h;h=d15ef5702f8b92993a64b1ee1f34364df2bf58e8;hb=178cf493d009995b28fdf220f04c98860ff79a9b;hp=78bf93176987d87026336e6d31386123a184baa8;hpb=b6a8ed7fa0709bbf8e091826803f50e6330689cf;p=vpp.git diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 78bf9317698..d15ef5702f8 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -41,6 +41,7 @@ #define included_vlib_buffer_funcs_h #include +#include /** \file vlib buffer access methods. @@ -56,13 +57,92 @@ always_inline vlib_buffer_t * vlib_get_buffer (vlib_main_t * vm, u32 buffer_index) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; uword offset = ((uword) buffer_index) << CLIB_LOG2_CACHE_LINE_BYTES; ASSERT (offset < bm->buffer_mem_size); return uword_to_pointer (bm->buffer_mem_start + offset, void *); } +/** \brief Translate array of buffer indices into buffer pointers with offset + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param bi - (u32 *) array of buffer indices + @param b - (void **) array to store buffer pointers + @param count - (uword) number of elements + @param offset - (i32) offset applied to each pointer +*/ +static_always_inline void +vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count, + i32 offset) +{ +#ifdef CLIB_HAVE_VEC256 + u64x4 off = u64x4_splat (buffer_main.buffer_mem_start + offset); + /* if count is not const, compiler will not unroll while loop + se we maintain two-in-parallel variant */ + while (count >= 8) + { + u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi)); + u64x4 b1 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi + 4)); + /* shift and add to get vlib_buffer_t pointer */ + u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); + u64x4_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 4); + b += 8; + bi += 8; + count -= 8; + } +#endif + while (count >= 4) + { +#ifdef CLIB_HAVE_VEC256 + u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi)); + /* shift and add to get vlib_buffer_t pointer */ + u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); +#elif defined (CLIB_HAVE_VEC128) + u64x2 off = u64x2_splat (buffer_main.buffer_mem_start + offset); + u32x4 bi4 = u32x4_load_unaligned (bi); + u64x2 b0 = u32x4_extend_to_u64x2 ((u32x4) bi4); +#if defined (__aarch64__) + u64x2 b1 = u32x4_extend_to_u64x2_high ((u32x4) bi4); +#else + bi4 = u32x4_shuffle (bi4, 2, 3, 0, 1); + u64x2 b1 = u32x4_extend_to_u64x2 ((u32x4) bi4); +#endif + u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); + u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2); +#else + b[0] = ((u8 *) vlib_get_buffer (vm, bi[0])) + offset; + b[1] = ((u8 *) vlib_get_buffer (vm, bi[1])) + offset; + b[2] = ((u8 *) vlib_get_buffer (vm, bi[2])) + offset; + b[3] = ((u8 *) vlib_get_buffer (vm, bi[3])) + offset; +#endif + b += 4; + bi += 4; + count -= 4; + } + while (count) + { + b[0] = ((u8 *) vlib_get_buffer (vm, bi[0])) + offset; + b += 1; + bi += 1; + count -= 1; + } +} + +/** \brief Translate array of buffer indices into buffer pointers + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param bi - (u32 *) array of buffer indices + @param b - (vlib_buffer_t **) array to store buffer pointers + @param count - (uword) number of elements +*/ + +static_always_inline void +vlib_get_buffers (vlib_main_t * vm, u32 * bi, vlib_buffer_t ** b, int count) +{ + vlib_get_buffers_with_offset (vm, bi, (void **) b, count, 0); +} + /** \brief Translate buffer pointer into buffer index @param vm - (vlib_main_t *) vlib main data structure pointer @@ -73,7 +153,7 @@ vlib_get_buffer (vlib_main_t * vm, u32 buffer_index) always_inline u32 vlib_get_buffer_index (vlib_main_t * vm, void *p) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; uword offset = pointer_to_uword (p) - bm->buffer_mem_start; ASSERT (pointer_to_uword (p) >= bm->buffer_mem_start); ASSERT (offset < bm->buffer_mem_size); @@ -81,6 +161,82 @@ vlib_get_buffer_index (vlib_main_t * vm, void *p) return offset >> CLIB_LOG2_CACHE_LINE_BYTES; } +/** \brief Translate array of buffer pointers into buffer indices with offset + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (void **) array of buffer pointers + @param bi - (u32 *) array to store buffer indices + @param count - (uword) number of elements + @param offset - (i32) offset applied to each pointer +*/ +static_always_inline void +vlib_get_buffer_indices_with_offset (vlib_main_t * vm, void **b, u32 * bi, + uword count, i32 offset) +{ +#ifdef CLIB_HAVE_VEC256 + u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 }; + u64x4 off4 = u64x4_splat (buffer_main.buffer_mem_start - offset); + + while (count >= 8) + { + /* load 4 pointers into 256-bit register */ + u64x4 v0 = u64x4_load_unaligned (b); + u64x4 v1 = u64x4_load_unaligned (b + 4); + u32x8 v2, v3; + + v0 -= off4; + v1 -= off4; + + v0 >>= CLIB_LOG2_CACHE_LINE_BYTES; + v1 >>= CLIB_LOG2_CACHE_LINE_BYTES; + + /* permute 256-bit register so lower u32s of each buffer index are + * placed into lower 128-bits */ + v2 = u32x8_permute ((u32x8) v0, mask); + v3 = u32x8_permute ((u32x8) v1, mask); + + /* extract lower 128-bits and save them to the array of buffer indices */ + u32x4_store_unaligned (u32x8_extract_lo (v2), bi); + u32x4_store_unaligned (u32x8_extract_lo (v3), bi + 4); + bi += 8; + b += 8; + count -= 8; + } +#endif + while (count >= 4) + { + /* equivalent non-nector implementation */ + bi[0] = vlib_get_buffer_index (vm, ((u8 *) b[0]) + offset); + bi[1] = vlib_get_buffer_index (vm, ((u8 *) b[1]) + offset); + bi[2] = vlib_get_buffer_index (vm, ((u8 *) b[2]) + offset); + bi[3] = vlib_get_buffer_index (vm, ((u8 *) b[3]) + offset); + bi += 4; + b += 4; + count -= 4; + } + while (count) + { + bi[0] = vlib_get_buffer_index (vm, ((u8 *) b[0]) + offset); + bi += 1; + b += 1; + count -= 1; + } +} + +/** \brief Translate array of buffer pointers into buffer indices + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (vlib_buffer_t **) array of buffer pointers + @param bi - (u32 *) array to store buffer indices + @param count - (uword) number of elements +*/ +static_always_inline void +vlib_get_buffer_indices (vlib_main_t * vm, vlib_buffer_t ** b, u32 * bi, + uword count) +{ + vlib_get_buffer_indices_with_offset (vm, (void **) b, bi, count, 0); +} + /** \brief Get next buffer in buffer linklist, or zero for end of list. @param vm - (vlib_main_t *) vlib main data structure pointer @@ -148,7 +304,7 @@ vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents) { b = vlib_get_buffer (vm, buffer_index); l = b->current_length; - clib_memcpy (contents + content_len, b->data + b->current_data, l); + clib_memcpy_fast (contents + content_len, b->data + b->current_data, l); content_len += l; if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)) break; @@ -158,15 +314,16 @@ vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents) return content_len; } -/* Return physical address of buffer->data start. */ -always_inline u64 -vlib_get_buffer_data_physical_address (vlib_main_t * vm, u32 buffer_index) +always_inline uword +vlib_buffer_get_pa (vlib_main_t * vm, vlib_buffer_t * b) { - return vlib_physmem_offset_to_physical (&vm->physmem_main, - (((uword) buffer_index) << - CLIB_LOG2_CACHE_LINE_BYTES) + - STRUCT_OFFSET_OF (vlib_buffer_t, - data)); + return vlib_physmem_get_pa (vm, b->data); +} + +always_inline uword +vlib_buffer_get_current_pa (vlib_main_t * vm, vlib_buffer_t * b) +{ + return vlib_buffer_get_pa (vm, b) + b->current_data; } /** \brief Prefetch buffer metadata by buffer index @@ -215,10 +372,15 @@ typedef enum VLIB_BUFFER_KNOWN_ALLOCATED, } vlib_buffer_known_state_t; +void vlib_buffer_validate_alloc_free (vlib_main_t * vm, u32 * buffers, + uword n_buffers, + vlib_buffer_known_state_t + expected_state); + always_inline vlib_buffer_known_state_t -vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index) +vlib_buffer_is_known (u32 buffer_index) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; clib_spinlock_lock (&bm->buffer_known_hash_lockp); uword *p = hash_get (bm->buffer_known_hash, buffer_index); @@ -227,11 +389,11 @@ vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index) } always_inline void -vlib_buffer_set_known_state (vlib_main_t * vm, - u32 buffer_index, +vlib_buffer_set_known_state (u32 buffer_index, vlib_buffer_known_state_t state) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; + clib_spinlock_lock (&bm->buffer_known_hash_lockp); hash_set (bm->buffer_known_hash, buffer_index, state); clib_spinlock_unlock (&bm->buffer_known_hash_lockp); @@ -242,46 +404,32 @@ vlib_buffer_set_known_state (vlib_main_t * vm, u8 *vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index, uword follow_chain); -/** \brief Allocate buffers into supplied array - - @param vm - (vlib_main_t *) vlib main data structure pointer - @param buffers - (u32 * ) buffer index array - @param n_buffers - (u32) number of buffers requested - @return - (u32) number of buffers actually allocated, may be - less than the number requested or zero -*/ -always_inline u32 -vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - - ASSERT (bm->cb.vlib_buffer_alloc_cb); - - return bm->cb.vlib_buffer_alloc_cb (vm, buffers, n_buffers); -} - always_inline u32 vlib_buffer_round_size (u32 size) { return round_pow2 (size, sizeof (vlib_buffer_t)); } -always_inline u32 +always_inline vlib_buffer_free_list_index_t vlib_buffer_get_free_list_index (vlib_buffer_t * b) { - return b->flags & VLIB_BUFFER_FREE_LIST_INDEX_MASK; + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NON_DEFAULT_FREELIST)) + return b->free_list_index; + + return 0; } always_inline void -vlib_buffer_set_free_list_index (vlib_buffer_t * b, u32 index) +vlib_buffer_set_free_list_index (vlib_buffer_t * b, + vlib_buffer_free_list_index_t index) { - /* if there is an need for more free lists we should consider - storig data in the 2nd cacheline */ - ASSERT (VLIB_BUFFER_FREE_LIST_INDEX_MASK & 1); - ASSERT (index <= VLIB_BUFFER_FREE_LIST_INDEX_MASK); - - b->flags &= ~VLIB_BUFFER_FREE_LIST_INDEX_MASK; - b->flags |= index & VLIB_BUFFER_FREE_LIST_INDEX_MASK; + if (PREDICT_FALSE (index)) + { + b->flags |= VLIB_BUFFER_NON_DEFAULT_FREELIST; + b->free_list_index = index; + } + else + b->flags &= ~VLIB_BUFFER_NON_DEFAULT_FREELIST; } /** \brief Allocate buffers from specific freelist into supplied array @@ -295,14 +443,96 @@ vlib_buffer_set_free_list_index (vlib_buffer_t * b, u32 index) always_inline u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm, u32 * buffers, - u32 n_buffers, u32 free_list_index) + u32 n_buffers, + vlib_buffer_free_list_index_t index) +{ + vlib_buffer_main_t *bm = &buffer_main; + vlib_buffer_free_list_t *fl; + u32 *src; + uword len; + + ASSERT (bm->cb.vlib_buffer_fill_free_list_cb); + + fl = pool_elt_at_index (vm->buffer_free_list_pool, index); + + len = vec_len (fl->buffers); + + if (PREDICT_FALSE (len < n_buffers)) + { + bm->cb.vlib_buffer_fill_free_list_cb (vm, fl, n_buffers); + if (PREDICT_FALSE ((len = vec_len (fl->buffers)) == 0)) + return 0; + + /* even if fill free list didn't manage to refill free list + we should give what we have */ + n_buffers = clib_min (len, n_buffers); + + /* following code is intentionaly duplicated to allow compiler + to optimize fast path when n_buffers is constant value */ + src = fl->buffers + len - n_buffers; + clib_memcpy_fast (buffers, src, n_buffers * sizeof (u32)); + _vec_len (fl->buffers) -= n_buffers; + + /* Verify that buffers are known free. */ + vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, + VLIB_BUFFER_KNOWN_FREE); + + return n_buffers; + } + + src = fl->buffers + len - n_buffers; + clib_memcpy_fast (buffers, src, n_buffers * sizeof (u32)); + _vec_len (fl->buffers) -= n_buffers; + + /* Verify that buffers are known free. */ + vlib_buffer_validate_alloc_free (vm, buffers, n_buffers, + VLIB_BUFFER_KNOWN_FREE); + + return n_buffers; +} + +/** \brief Allocate buffers into supplied array + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u32) number of buffers requested + @return - (u32) number of buffers actually allocated, may be + less than the number requested or zero +*/ +always_inline u32 +vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) { - vlib_buffer_main_t *bm = vm->buffer_main; + return vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); +} + +/** \brief Allocate buffers into ring + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index ring + @param start - (u32) first slot in the ring + @param ring_size - (u32) ring size + @param n_buffers - (u32) number of buffers requested + @return - (u32) number of buffers actually allocated, may be + less than the number requested or zero +*/ +always_inline u32 +vlib_buffer_alloc_to_ring (vlib_main_t * vm, u32 * ring, u32 start, + u32 ring_size, u32 n_buffers) +{ + u32 n_alloc; + + ASSERT (n_buffers <= ring_size); - ASSERT (bm->cb.vlib_buffer_alloc_from_free_list_cb); + if (PREDICT_TRUE (start + n_buffers <= ring_size)) + return vlib_buffer_alloc (vm, ring + start, n_buffers); - return bm->cb.vlib_buffer_alloc_from_free_list_cb (vm, buffers, n_buffers, - free_list_index); + n_alloc = vlib_buffer_alloc (vm, ring + start, ring_size - start); + + if (PREDICT_TRUE (n_alloc == ring_size - start)) + n_alloc += vlib_buffer_alloc (vm, ring, n_buffers - n_alloc); + + return n_alloc; } /** \brief Free buffers @@ -320,7 +550,7 @@ vlib_buffer_free (vlib_main_t * vm, /* number of buffers to free */ u32 n_buffers) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; ASSERT (bm->cb.vlib_buffer_free_cb); @@ -341,7 +571,7 @@ vlib_buffer_free_no_next (vlib_main_t * vm, /* number of buffers to free */ u32 n_buffers) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; ASSERT (bm->cb.vlib_buffer_free_no_next_cb); @@ -360,61 +590,94 @@ vlib_buffer_free_one (vlib_main_t * vm, u32 buffer_index) vlib_buffer_free (vm, &buffer_index, /* n_buffers */ 1); } +/** \brief Free buffers from ring + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index ring + @param start - (u32) first slot in the ring + @param ring_size - (u32) ring size + @param n_buffers - (u32) number of buffers +*/ +always_inline void +vlib_buffer_free_from_ring (vlib_main_t * vm, u32 * ring, u32 start, + u32 ring_size, u32 n_buffers) +{ + ASSERT (n_buffers <= ring_size); + + if (PREDICT_TRUE (start + n_buffers <= ring_size)) + { + vlib_buffer_free (vm, ring + start, n_buffers); + } + else + { + vlib_buffer_free (vm, ring + start, ring_size - start); + vlib_buffer_free (vm, ring, n_buffers - (ring_size - start)); + } +} + +/** \brief Free buffers from ring without freeing tail buffers + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index ring + @param start - (u32) first slot in the ring + @param ring_size - (u32) ring size + @param n_buffers - (u32) number of buffers +*/ +always_inline void +vlib_buffer_free_from_ring_no_next (vlib_main_t * vm, u32 * ring, u32 start, + u32 ring_size, u32 n_buffers) +{ + ASSERT (n_buffers <= ring_size); + + if (PREDICT_TRUE (start + n_buffers <= ring_size)) + { + vlib_buffer_free_no_next (vm, ring + start, n_buffers); + } + else + { + vlib_buffer_free_no_next (vm, ring + start, ring_size - start); + vlib_buffer_free_no_next (vm, ring, n_buffers - (ring_size - start)); + } +} + /* Add/delete buffer free lists. */ -u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, - char *fmt, ...); +vlib_buffer_free_list_index_t vlib_buffer_create_free_list (vlib_main_t * vm, + u32 n_data_bytes, + char *fmt, ...); always_inline void -vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +vlib_buffer_delete_free_list (vlib_main_t * vm, + vlib_buffer_free_list_index_t free_list_index) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; ASSERT (bm->cb.vlib_buffer_delete_free_list_cb); bm->cb.vlib_buffer_delete_free_list_cb (vm, free_list_index); } -/* Find already existing public free list with given size or create one. */ -u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, - char *fmt, ...); - -/* Merge two free lists */ -void vlib_buffer_merge_free_lists (vlib_buffer_free_list_t * dst, - vlib_buffer_free_list_t * src); - /* Make sure we have at least given number of unaligned buffers. */ void vlib_buffer_free_list_fill_unaligned (vlib_main_t * vm, vlib_buffer_free_list_t * free_list, uword n_unaligned_buffers); -always_inline u32 -vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - - size = vlib_buffer_round_size (size); - uword *p = hash_get (bm->free_list_by_size, size); - return p ? p[0] : ~0; -} - always_inline vlib_buffer_free_list_t * vlib_buffer_get_buffer_free_list (vlib_main_t * vm, vlib_buffer_t * b, - u32 * index) + vlib_buffer_free_list_index_t * index) { - vlib_buffer_main_t *bm = vm->buffer_main; - u32 i; + vlib_buffer_free_list_index_t i; *index = i = vlib_buffer_get_free_list_index (b); - return pool_elt_at_index (bm->buffer_free_list_pool, i); + return pool_elt_at_index (vm->buffer_free_list_pool, i); } always_inline vlib_buffer_free_list_t * -vlib_buffer_get_free_list (vlib_main_t * vm, u32 free_list_index) +vlib_buffer_get_free_list (vlib_main_t * vm, + vlib_buffer_free_list_index_t free_list_index) { - vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *f; - f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + f = pool_elt_at_index (vm->buffer_free_list_pool, free_list_index); /* Sanity: indices must match. */ ASSERT (f->index == free_list_index); @@ -423,10 +686,10 @@ vlib_buffer_get_free_list (vlib_main_t * vm, u32 free_list_index) } always_inline u32 -vlib_buffer_free_list_buffer_size (vlib_main_t * vm, u32 free_list_index) +vlib_buffer_free_list_buffer_size (vlib_main_t * vm, + vlib_buffer_free_list_index_t index) { - vlib_buffer_free_list_t *f = - vlib_buffer_get_free_list (vm, free_list_index); + vlib_buffer_free_list_t *f = vlib_buffer_get_free_list (vm, index); return f->n_data_bytes; } @@ -455,46 +718,9 @@ vlib_copy_buffers (u32 * dst, u32 * src, u32 n) } } -always_inline void * -vlib_physmem_alloc_aligned (vlib_main_t * vm, clib_error_t ** error, - uword n_bytes, uword alignment) -{ - void *r = - vm->os_physmem_alloc_aligned (&vm->physmem_main, n_bytes, alignment); - if (!r) - *error = - clib_error_return (0, "failed to allocate %wd bytes of I/O memory", - n_bytes); - else - *error = 0; - return r; -} - -/* By default allocate I/O memory with cache line alignment. */ -always_inline void * -vlib_physmem_alloc (vlib_main_t * vm, clib_error_t ** error, uword n_bytes) -{ - return vlib_physmem_alloc_aligned (vm, error, n_bytes, - CLIB_CACHE_LINE_BYTES); -} - -always_inline void -vlib_physmem_free (vlib_main_t * vm, void *mem) -{ - return vm->os_physmem_free (mem); -} - -always_inline u64 -vlib_physmem_virtual_to_physical (vlib_main_t * vm, void *mem) -{ - vlib_physmem_main_t *pm = &vm->physmem_main; - uword o = pointer_to_uword (mem) - pm->virtual.start; - return vlib_physmem_offset_to_physical (pm, o); -} - /* Append given data to end of buffer, possibly allocating new buffers. */ u32 vlib_buffer_add_data (vlib_main_t * vm, - u32 free_list_index, + vlib_buffer_free_list_index_t free_list_index, u32 buffer_index, void *data, u32 n_data_bytes); /* duplicate all buffers in chain */ @@ -532,9 +758,10 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b) d->flags = s->flags & flag_mask; d->total_length_not_including_first_buffer = s->total_length_not_including_first_buffer; - clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque)); - clib_memcpy (vlib_buffer_get_current (d), - vlib_buffer_get_current (s), s->current_length); + clib_memcpy_fast (d->opaque, s->opaque, sizeof (s->opaque)); + clib_memcpy_fast (d->opaque2, s->opaque2, sizeof (s->opaque2)); + clib_memcpy_fast (vlib_buffer_get_current (d), + vlib_buffer_get_current (s), s->current_length); /* next segments */ for (i = 1; i < n_buffers; i++) @@ -546,35 +773,36 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b) d = vlib_get_buffer (vm, new_buffers[i]); d->current_data = s->current_data; d->current_length = s->current_length; - clib_memcpy (vlib_buffer_get_current (d), - vlib_buffer_get_current (s), s->current_length); + clib_memcpy_fast (vlib_buffer_get_current (d), + vlib_buffer_get_current (s), s->current_length); d->flags = s->flags & flag_mask; } return fd; } -/** \brief Create multiple clones of buffer and store them in the supplied array +/** \brief Create a maximum of 256 clones of buffer and store them + in the supplied array @param vm - (vlib_main_t *) vlib main data structure pointer @param src_buffer - (u32) source buffer index @param buffers - (u32 * ) buffer index array - @param n_buffers - (u8) number of buffer clones requested + @param n_buffers - (u16) number of buffer clones requested (<=256) @param head_end_offset - (u16) offset relative to current position where packet head ends - @return - (u8) number of buffers actually cloned, may be + @return - (u16) number of buffers actually cloned, may be less than the number requested or zero */ - -always_inline u8 -vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, - u8 n_buffers, u16 head_end_offset) +always_inline u16 +vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, + u16 n_buffers, u16 head_end_offset) { - u8 i; + u16 i; vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer); ASSERT (s->n_add_refs == 0); ASSERT (n_buffers); + ASSERT (n_buffers <= 256); if (s->current_length <= head_end_offset + CLIB_CACHE_LINE_BYTES * 2) { @@ -591,15 +819,16 @@ vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, return n_buffers; } - n_buffers = vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers, - vlib_buffer_get_free_list_index - (s)); - if (PREDICT_FALSE (n_buffers == 0)) + if (PREDICT_FALSE (n_buffers == 1)) { buffers[0] = src_buffer; return 1; } + n_buffers = vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers, + vlib_buffer_get_free_list_index + (s)); + for (i = 0; i < n_buffers; i++) { vlib_buffer_t *d = vlib_get_buffer (vm, buffers[i]); @@ -607,14 +836,20 @@ vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, d->current_length = head_end_offset; vlib_buffer_set_free_list_index (d, vlib_buffer_get_free_list_index (s)); - d->total_length_not_including_first_buffer = - s->total_length_not_including_first_buffer + s->current_length - + + d->total_length_not_including_first_buffer = s->current_length - head_end_offset; + if (PREDICT_FALSE (s->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + d->total_length_not_including_first_buffer += + s->total_length_not_including_first_buffer; + } d->flags = s->flags | VLIB_BUFFER_NEXT_PRESENT; d->flags &= ~VLIB_BUFFER_EXT_HDR_VALID; - clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque)); - clib_memcpy (vlib_buffer_get_current (d), vlib_buffer_get_current (s), - head_end_offset); + clib_memcpy_fast (d->opaque, s->opaque, sizeof (s->opaque)); + clib_memcpy_fast (d->opaque2, s->opaque2, sizeof (s->opaque2)); + clib_memcpy_fast (vlib_buffer_get_current (d), + vlib_buffer_get_current (s), head_end_offset); d->next_buffer = src_buffer; } vlib_buffer_advance (s, head_end_offset); @@ -628,6 +863,42 @@ vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, return n_buffers; } +/** \brief Create multiple clones of buffer and store them + in the supplied array + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param src_buffer - (u32) source buffer index + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u16) number of buffer clones requested (<=256) + @param head_end_offset - (u16) offset relative to current position + where packet head ends + @return - (u16) number of buffers actually cloned, may be + less than the number requested or zero +*/ +always_inline u16 +vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, + u16 n_buffers, u16 head_end_offset) +{ + vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer); + u16 n_cloned = 0; + + while (n_buffers > 256) + { + vlib_buffer_t *copy; + copy = vlib_buffer_copy (vm, s); + n_cloned += vlib_buffer_clone_256 (vm, + vlib_get_buffer_index (vm, copy), + (buffers + n_cloned), + 256, head_end_offset); + n_buffers -= 256; + } + n_cloned += vlib_buffer_clone_256 (vm, src_buffer, + buffers + n_cloned, + n_buffers, head_end_offset); + + return n_cloned; +} + /** \brief Attach cloned tail to the buffer @param vm - (vlib_main_t *) vlib main data structure pointer @@ -652,7 +923,7 @@ vlib_buffer_attach_clone (vlib_main_t * vm, vlib_buffer_t * head, tail->total_length_not_including_first_buffer; next_segment: - __sync_add_and_fetch (&tail->n_add_refs, 1); + clib_atomic_add_fetch (&tail->n_add_refs, 1); if (tail->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -673,9 +944,7 @@ vlib_buffer_chain_init (vlib_buffer_t * first) /* The provided next_bi buffer index is appended to the end of the packet. */ always_inline vlib_buffer_t * -vlib_buffer_chain_buffer (vlib_main_t * vm, - vlib_buffer_t * first, - vlib_buffer_t * last, u32 next_bi) +vlib_buffer_chain_buffer (vlib_main_t * vm, vlib_buffer_t * last, u32 next_bi) { vlib_buffer_t *next_buffer = vlib_get_buffer (vm, next_bi); last->next_buffer = next_bi; @@ -703,7 +972,7 @@ vlib_buffer_chain_increase_length (vlib_buffer_t * first, * Returns the number of copied bytes. */ always_inline u16 vlib_buffer_chain_append_data (vlib_main_t * vm, - u32 free_list_index, + vlib_buffer_free_list_index_t free_list_index, vlib_buffer_t * first, vlib_buffer_t * last, void *data, u16 data_len) { @@ -713,8 +982,8 @@ vlib_buffer_chain_append_data (vlib_main_t * vm, u16 len = clib_min (data_len, n_buffer_bytes - last->current_length - last->current_data); - clib_memcpy (vlib_buffer_get_current (last) + last->current_length, data, - len); + clib_memcpy_fast (vlib_buffer_get_current (last) + last->current_length, + data, len); vlib_buffer_chain_increase_length (first, last, len); return len; } @@ -726,10 +995,11 @@ vlib_buffer_chain_append_data (vlib_main_t * vm, * chained and points to the last buffer in the chain. */ u16 vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, - u32 free_list_index, + vlib_buffer_free_list_index_t + free_list_index, vlib_buffer_t * first, - vlib_buffer_t ** last, - void *data, u16 data_len); + vlib_buffer_t ** last, void *data, + u16 data_len); void vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * first); format_function_t format_vlib_buffer, format_vlib_buffer_and_data, @@ -740,12 +1010,11 @@ typedef struct /* Vector of packet data. */ u8 *packet_data; - /* Number of buffers to allocate in each call to physmem - allocator. */ - u32 min_n_buffers_each_physmem_alloc; + /* Number of buffers to allocate in each call to allocator. */ + u32 min_n_buffers_each_alloc; /* Buffer free list for this template. */ - u32 free_list_index; + vlib_buffer_free_list_index_t free_list_index; u32 *free_buffers; } vlib_packet_template_t; @@ -757,7 +1026,7 @@ void vlib_packet_template_init (vlib_main_t * vm, vlib_packet_template_t * t, void *packet_data, uword n_packet_data_bytes, - uword min_n_buffers_each_physmem_alloc, + uword min_n_buffers_each_alloc, char *fmt, ...); void *vlib_packet_template_get_packet (vlib_main_t * vm, @@ -819,13 +1088,14 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * dst, /* Make sure buffer template is sane. */ ASSERT (fl->index == vlib_buffer_get_free_list_index (src)); - clib_memcpy (STRUCT_MARK_PTR (dst, template_start), - STRUCT_MARK_PTR (src, template_start), - STRUCT_OFFSET_OF (vlib_buffer_t, template_end) - - STRUCT_OFFSET_OF (vlib_buffer_t, template_start)); + clib_memcpy_fast (STRUCT_MARK_PTR (dst, template_start), + STRUCT_MARK_PTR (src, template_start), + STRUCT_OFFSET_OF (vlib_buffer_t, template_end) - + STRUCT_OFFSET_OF (vlib_buffer_t, template_start)); /* Not in the first 16 octets. */ dst->n_add_refs = src->n_add_refs; + vlib_buffer_set_free_list_index (dst, fl->index); /* Make sure it really worked. */ #define _(f) ASSERT (dst->f == src->f); @@ -847,6 +1117,7 @@ vlib_buffer_add_to_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f, u32 buffer_index, u8 do_init) { + vlib_buffer_pool_t *bp = vlib_buffer_pool_get (f->buffer_pool_index); vlib_buffer_t *b; b = vlib_get_buffer (vm, buffer_index); if (PREDICT_TRUE (do_init)) @@ -855,54 +1126,16 @@ vlib_buffer_add_to_free_list (vlib_main_t * vm, if (vec_len (f->buffers) > 4 * VLIB_FRAME_SIZE) { - vlib_buffer_free_list_t *mf; - mf = vlib_buffer_get_free_list (vlib_mains[0], f->index); - clib_spinlock_lock (&mf->global_buffers_lock); + clib_spinlock_lock (&bp->lock); /* keep last stored buffers, as they are more likely hot in the cache */ - vec_add_aligned (mf->global_buffers, f->buffers, VLIB_FRAME_SIZE, + vec_add_aligned (bp->buffers, f->buffers, VLIB_FRAME_SIZE, CLIB_CACHE_LINE_BYTES); vec_delete (f->buffers, VLIB_FRAME_SIZE, 0); - clib_spinlock_unlock (&mf->global_buffers_lock); + f->n_alloc -= VLIB_FRAME_SIZE; + clib_spinlock_unlock (&bp->lock); } } -always_inline void -vlib_buffer_init_two_for_free_list (vlib_buffer_t * dst0, - vlib_buffer_t * dst1, - vlib_buffer_free_list_t * fl) -{ - vlib_buffer_t *src = &fl->buffer_init_template; - - /* Make sure buffer template is sane. */ - ASSERT (fl->index == vlib_buffer_get_free_list_index (src)); - - clib_memcpy (STRUCT_MARK_PTR (dst0, template_start), - STRUCT_MARK_PTR (src, template_start), - STRUCT_OFFSET_OF (vlib_buffer_t, template_end) - - STRUCT_OFFSET_OF (vlib_buffer_t, template_start)); - - clib_memcpy (STRUCT_MARK_PTR (dst1, template_start), - STRUCT_MARK_PTR (src, template_start), - STRUCT_OFFSET_OF (vlib_buffer_t, template_end) - - STRUCT_OFFSET_OF (vlib_buffer_t, template_start)); - - /* Not in the first 16 octets. */ - dst0->n_add_refs = src->n_add_refs; - dst1->n_add_refs = src->n_add_refs; - - /* Make sure it really worked. */ -#define _(f) ASSERT (dst0->f == src->f); ASSERT( dst1->f == src->f) - _(current_data); - _(current_length); - _(flags); -#undef _ - - ASSERT (dst0->total_length_not_including_first_buffer == 0); - ASSERT (dst1->total_length_not_including_first_buffer == 0); - ASSERT (dst0->n_add_refs == 0); - ASSERT (dst1->n_add_refs == 0); -} - #if CLIB_DEBUG > 0 extern u32 *vlib_buffer_state_validation_lock; extern uword *vlib_buffer_state_validation_hash; @@ -918,7 +1151,7 @@ vlib_validate_buffer_in_use (vlib_buffer_t * b, u32 expected) oldheap = clib_mem_set_heap (vlib_buffer_state_heap); - while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1)) + while (clib_atomic_test_and_set (vlib_buffer_state_validation_lock)) ; p = hash_get (vlib_buffer_state_validation_hash, b); @@ -961,7 +1194,7 @@ vlib_validate_buffer_set_in_use (vlib_buffer_t * b, u32 expected) oldheap = clib_mem_set_heap (vlib_buffer_state_heap); - while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1)) + while (clib_atomic_test_and_set (vlib_buffer_state_validation_lock)) ; hash_set (vlib_buffer_state_validation_hash, b, expected); @@ -972,6 +1205,68 @@ vlib_validate_buffer_set_in_use (vlib_buffer_t * b, u32 expected) #endif } +/** minimum data size of first buffer in a buffer chain */ +#define VLIB_BUFFER_CHAIN_MIN_FIRST_DATA_SIZE (256) + +/** + * @brief compress buffer chain in a way where the first buffer is at least + * VLIB_BUFFER_CHAIN_MIN_FIRST_DATA_SIZE long + * + * @param[in] vm - vlib_main + * @param[in,out] first - first buffer in chain + * @param[in,out] discard_vector - vector of buffer indexes which were removed + * from the chain + */ +always_inline void +vlib_buffer_chain_compress (vlib_main_t * vm, + vlib_buffer_t * first, u32 ** discard_vector) +{ + if (first->current_length >= VLIB_BUFFER_CHAIN_MIN_FIRST_DATA_SIZE || + !(first->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + /* this is already big enough or not a chain */ + return; + } + /* probe free list to find allocated buffer size to avoid overfill */ + vlib_buffer_free_list_index_t index; + vlib_buffer_free_list_t *free_list = + vlib_buffer_get_buffer_free_list (vm, first, &index); + + u32 want_first_size = clib_min (VLIB_BUFFER_CHAIN_MIN_FIRST_DATA_SIZE, + free_list->n_data_bytes - + first->current_data); + do + { + vlib_buffer_t *second = vlib_get_buffer (vm, first->next_buffer); + u32 need = want_first_size - first->current_length; + u32 amount_to_copy = clib_min (need, second->current_length); + clib_memcpy_fast (((u8 *) vlib_buffer_get_current (first)) + + first->current_length, + vlib_buffer_get_current (second), amount_to_copy); + first->current_length += amount_to_copy; + vlib_buffer_advance (second, amount_to_copy); + if (first->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) + { + first->total_length_not_including_first_buffer -= amount_to_copy; + } + if (!second->current_length) + { + vec_add1 (*discard_vector, first->next_buffer); + if (second->flags & VLIB_BUFFER_NEXT_PRESENT) + { + first->next_buffer = second->next_buffer; + } + else + { + first->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + } + second->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + } + } + while ((first->current_length < want_first_size) && + (first->flags & VLIB_BUFFER_NEXT_PRESENT)); +} + #endif /* included_vlib_buffer_funcs_h */ /*