X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fbuffer.c;h=d024aba1e0faae2e1b78935f3130c77e12de7e5a;hb=671e60e65635b8d030bf303c88411192c747b59e;hp=4bf6d125b2116b7f3c7045fef1ca7d95fb3565c2;hpb=7cd468a3d7dee7d6c92f69a0bb7061ae208ec727;p=vpp.git diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 4bf6d125b21..d024aba1e0f 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -38,49 +38,25 @@ */ /** - * @cond (!DPDK) * @file * * Allocate/free network buffers. */ -#if DPDK > 0 -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#endif - #include +#include + +vlib_buffer_callbacks_t *vlib_buffer_callbacks = 0; + +/* when running unpriviledged we are limited by RLIMIT_MEMLOCK which is + typically set to 16MB so setting default size for buffer memory to 14MB + */ +static u32 vlib_buffer_physmem_sz = 14 << 20; + +vlib_buffer_main_t buffer_main; -#if DPDK > 0 -#pragma weak rte_mem_virt2phy -#pragma weak rte_eal_has_hugepages -#pragma weak rte_socket_id -#pragma weak rte_pktmbuf_pool_create -#endif +/* logging */ +static vlib_log_class_t buffer_log_default; uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, @@ -103,11 +79,17 @@ u8 * format_vlib_buffer (u8 * s, va_list * args) { vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *); -#if DPDK > 0 - uword indent = format_get_indent (s); - - s = format (s, "current data %d, length %d, free-list %d", - b->current_data, b->current_length, b->free_list_index); + u32 indent = format_get_indent (s); + u8 *a = 0; + +#define _(bit, name, v) \ + if (v && (b->flags & VLIB_BUFFER_##name)) \ + a = format (a, "%s ", v); + foreach_vlib_buffer_flag +#undef _ + s = format (s, "current data %d, length %d, free-list %d, clone-count %u", + b->current_data, b->current_length, + vlib_buffer_get_free_list_index (b), b->n_add_refs); if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) s = format (s, ", totlen-nifb %d", @@ -116,28 +98,22 @@ format_vlib_buffer (u8 * s, va_list * args) if (b->flags & VLIB_BUFFER_IS_TRACED) s = format (s, ", trace 0x%x", b->trace_index); + if (a) + s = format (s, "\n%U%v", format_white_space, indent, a); + vec_free (a); + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) { vlib_main_t *vm = vlib_get_main (); u32 next_buffer = b->next_buffer; b = vlib_get_buffer (vm, next_buffer); - s = format (s, "\n%Unext-buffer 0x%x, segment length %d", - format_white_space, indent, next_buffer, b->current_length); + s = + format (s, "\n%Unext-buffer 0x%x, segment length %d, clone-count %u", + format_white_space, indent, next_buffer, b->current_length, + b->n_add_refs); } -#else - - s = format (s, "current data %d, length %d, free-list %d", - b->current_data, b->current_length, b->free_list_index); - - if (b->flags & VLIB_BUFFER_IS_TRACED) - s = format (s, ", trace 0x%x", b->trace_index); - - if (b->flags & VLIB_BUFFER_NEXT_PRESENT) - s = format (s, ", next-buffer 0x%x", b->next_buffer); -#endif - return s; } @@ -153,7 +129,6 @@ format_vlib_buffer_and_data (u8 * s, va_list * args) return s; } -#if DPDK == 0 static u8 * format_vlib_buffer_known_state (u8 * s, va_list * args) { @@ -181,7 +156,6 @@ format_vlib_buffer_known_state (u8 * s, va_list * args) return format (s, "%s", t); } -#endif u8 * format_vlib_buffer_contents (u8 * s, va_list * va) @@ -200,35 +174,36 @@ format_vlib_buffer_contents (u8 * s, va_list * va) return s; } -#if DPDK == 0 static u8 * vlib_validate_buffer_helper (vlib_main_t * vm, u32 bi, uword follow_buffer_next, uword ** unique_hash) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); - vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *fl; - if (pool_is_free_index (bm->buffer_free_list_pool, b->free_list_index)) - return format (0, "unknown free list 0x%x", b->free_list_index); + if (pool_is_free_index + (vm->buffer_free_list_pool, vlib_buffer_get_free_list_index (b))) + return format (0, "unknown free list 0x%x", + vlib_buffer_get_free_list_index (b)); - fl = pool_elt_at_index (bm->buffer_free_list_pool, b->free_list_index); + fl = + pool_elt_at_index (vm->buffer_free_list_pool, + vlib_buffer_get_free_list_index (b)); if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE) return format (0, "current data %d before pre-data", b->current_data); -#if DPDK == 0 + if (b->current_data + b->current_length > fl->n_data_bytes) return format (0, "%d-%d beyond end of buffer %d", b->current_data, b->current_length, fl->n_data_bytes); -#endif if (follow_buffer_next && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) { vlib_buffer_known_state_t k; u8 *msg, *result; - k = vlib_buffer_is_known (vm, b->next_buffer); + k = vlib_buffer_is_known (b->next_buffer); if (k != VLIB_BUFFER_KNOWN_ALLOCATED) return format (0, "next 0x%x: %U", b->next_buffer, format_vlib_buffer_known_state, k); @@ -286,7 +261,7 @@ vlib_validate_buffers (vlib_main_t * vm, goto done; } - k = vlib_buffer_is_known (vm, bi); + k = vlib_buffer_is_known (bi); if (k != known_state) { msg = format (0, "is %U; expected %U", @@ -311,14 +286,33 @@ done: hash_free (hash); return result; } -#endif -vlib_main_t **vlib_mains; +/* + * Hand-craft a static vector w/ length 1, so vec_len(vlib_mains) =1 + * and vlib_mains[0] = &vlib_global_main from the beginning of time. + * + * The only place which should ever expand vlib_mains is start_workers() + * in threads.c. It knows about the bootstrap vector. + */ +/* *INDENT-OFF* */ +static struct +{ + vec_header_t h; + vlib_main_t *vm; +} __attribute__ ((packed)) __bootstrap_vlib_main_vector + __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES))) = +{ + .h.len = 1, + .vm = &vlib_global_main, +}; +/* *INDENT-ON* */ + +vlib_main_t **vlib_mains = &__bootstrap_vlib_main_vector.vm; + -#if DPDK == 0 /* When dubugging validate that given buffers are either known allocated or known free. */ -static void +void vlib_buffer_validate_alloc_free (vlib_main_t * vm, u32 * buffers, uword n_buffers, @@ -330,11 +324,8 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, if (CLIB_DEBUG == 0) return; - ASSERT (os_get_cpu_number () == 0); - - /* smp disaster check */ - if (vlib_mains) - ASSERT (vm == vlib_mains[0]); + if (vlib_buffer_callbacks) + return; is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED; b = buffers; @@ -344,7 +335,7 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, bi = b[0]; b += 1; - known = vlib_buffer_is_known (vm, bi); + known = vlib_buffer_is_known (bi); if (known != expected_state) { ASSERT (0); @@ -355,122 +346,25 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, } vlib_buffer_set_known_state - (vm, bi, - is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED); + (bi, is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED); } } -#endif - -#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) - -/* Make sure we have at least given number of unaligned buffers. */ -static void -fill_unaligned (vlib_main_t * vm, - vlib_buffer_free_list_t * free_list, - uword n_unaligned_buffers) -{ - word la = vec_len (free_list->aligned_buffers); - word lu = vec_len (free_list->unaligned_buffers); - - /* Aligned come in aligned copy-sized chunks. */ - ASSERT (la % BUFFERS_PER_COPY == 0); - - ASSERT (la >= n_unaligned_buffers); - - while (lu < n_unaligned_buffers) - { - /* Copy 4 buffers from end of aligned vector to unaligned vector. */ - vec_add (free_list->unaligned_buffers, - free_list->aligned_buffers + la - BUFFERS_PER_COPY, - BUFFERS_PER_COPY); - la -= BUFFERS_PER_COPY; - lu += BUFFERS_PER_COPY; - } - _vec_len (free_list->aligned_buffers) = la; -} - -/* After free aligned buffers may not contain even sized chunks. */ -static void -trim_aligned (vlib_buffer_free_list_t * f) -{ - uword l, n_trim; - - /* Add unaligned to aligned before trim. */ - l = vec_len (f->unaligned_buffers); - if (l > 0) - { - vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l, - /* align */ sizeof (vlib_copy_unit_t)); - - _vec_len (f->unaligned_buffers) = 0; - } - - /* Remove unaligned buffers from end of aligned vector and save for next trim. */ - l = vec_len (f->aligned_buffers); - n_trim = l % BUFFERS_PER_COPY; - if (n_trim) - { - /* Trim aligned -> unaligned. */ - vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim); - - /* Remove from aligned. */ - _vec_len (f->aligned_buffers) = l - n_trim; - } -} - -static void -merge_free_lists (vlib_buffer_free_list_t * dst, - vlib_buffer_free_list_t * src) -{ - uword l; - u32 *d; - - trim_aligned (src); - trim_aligned (dst); - - l = vec_len (src->aligned_buffers); - if (l > 0) - { - vec_add2_aligned (dst->aligned_buffers, d, l, - /* align */ sizeof (vlib_copy_unit_t)); - clib_memcpy (d, src->aligned_buffers, l * sizeof (d[0])); - vec_free (src->aligned_buffers); - } - - l = vec_len (src->unaligned_buffers); - if (l > 0) - { - vec_add (dst->unaligned_buffers, src->unaligned_buffers, l); - vec_free (src->unaligned_buffers); - } -} - -always_inline u32 -vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - - size = vlib_buffer_round_size (size); - uword *p = hash_get (bm->free_list_by_size, size); - return p ? p[0] : ~0; -} /* Add buffer free list. */ -static u32 +static vlib_buffer_free_list_index_t vlib_buffer_create_free_list_helper (vlib_main_t * vm, u32 n_data_bytes, u32 is_public, u32 is_default, u8 * name) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; vlib_buffer_free_list_t *f; -#if DPDK > 0 int i; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); - if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0) + if (!is_default && pool_elts (vm->buffer_free_list_pool) == 0) { - u32 default_free_free_list_index; + vlib_buffer_free_list_index_t default_free_free_list_index; /* *INDENT-OFF* */ default_free_free_list_index = @@ -488,16 +382,20 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, return default_free_free_list_index; } - pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES); + pool_get_aligned (vm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES); - memset (f, 0, sizeof (f[0])); - f->index = f - bm->buffer_free_list_pool; + clib_memset (f, 0, sizeof (f[0])); + f->index = f - vm->buffer_free_list_pool; + vec_validate (f->buffers, 0); + vec_reset_length (f->buffers); f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); - f->min_n_buffers_each_physmem_alloc = 16; - f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name); + f->min_n_buffers_each_alloc = VLIB_FRAME_SIZE; + f->buffer_pool_index = 0; + f->name = clib_mem_is_vec (name) ? name : format (0, "%s", name); /* Setup free buffer template. */ - f->buffer_init_template.free_list_index = f->index; + vlib_buffer_set_free_list_index (&f->buffer_init_template, f->index); + f->buffer_init_template.n_add_refs = 0; if (is_public) { @@ -508,63 +406,23 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, for (i = 1; i < vec_len (vlib_mains); i++) { - vlib_buffer_main_t *wbm = vlib_mains[i]->buffer_main; + vlib_main_t *wvm = vlib_mains[i]; vlib_buffer_free_list_t *wf; - pool_get_aligned (wbm->buffer_free_list_pool, + pool_get_aligned (wvm->buffer_free_list_pool, wf, CLIB_CACHE_LINE_BYTES); - ASSERT (f - bm->buffer_free_list_pool == - wf - wbm->buffer_free_list_pool); + ASSERT (f - vm->buffer_free_list_pool == + wf - wvm->buffer_free_list_pool); wf[0] = f[0]; - wf->aligned_buffers = 0; - wf->unaligned_buffers = 0; + wf->buffers = 0; + vec_validate (wf->buffers, 0); + vec_reset_length (wf->buffers); wf->n_alloc = 0; } -#else - - if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0) - { - u32 default_free_free_list_index; - - default_free_free_list_index = vlib_buffer_create_free_list_helper (vm, - /* default buffer size */ - VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - /* is_public */ - 1, - /* is_default */ - 1, - (u8 - *) - "default"); - ASSERT (default_free_free_list_index == - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public) - return default_free_free_list_index; - } - - pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES); - - memset (f, 0, sizeof (f[0])); - f->index = f - bm->buffer_free_list_pool; - f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); - f->min_n_buffers_each_physmem_alloc = 256; - f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name); - - /* Setup free buffer template. */ - f->buffer_init_template.free_list_index = f->index; - - if (is_public) - { - uword *p = hash_get (bm->free_list_by_size, f->n_data_bytes); - if (!p) - hash_set (bm->free_list_by_size, f->n_data_bytes, f->index); - } -#endif return f->index; } -u32 +vlib_buffer_free_list_index_t vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, char *fmt, ...) { @@ -581,531 +439,167 @@ vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, name); } -u32 -vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, - char *fmt, ...) -{ - u32 i = vlib_buffer_get_free_list_with_size (vm, n_data_bytes); - - if (i == ~0) - { - va_list va; - u8 *name; - - va_start (va, fmt); - name = va_format (0, fmt, &va); - va_end (va); - - i = vlib_buffer_create_free_list_helper (vm, n_data_bytes, - /* is_public */ 1, - /* is_default */ 0, - name); - } - - return i; -} - static void del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) { - u32 i; -#if DPDK > 0 - struct rte_mbuf *mb; - vlib_buffer_t *b; + vlib_buffer_pool_t *bp = vlib_buffer_pool_get (f->buffer_pool_index); - for (i = 0; i < vec_len (f->unaligned_buffers); i++) - { - b = vlib_get_buffer (vm, f->unaligned_buffers[i]); - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } - for (i = 0; i < vec_len (f->aligned_buffers); i++) - { - b = vlib_get_buffer (vm, f->aligned_buffers[i]); - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } + vec_add_aligned (bp->buffers, f->buffers, vec_len (f->buffers), + CLIB_CACHE_LINE_BYTES); vec_free (f->name); -#else + vec_free (f->buffers); - for (i = 0; i < vec_len (f->buffer_memory_allocated); i++) - vm->os_physmem_free (f->buffer_memory_allocated[i]); - vec_free (f->name); - vec_free (f->buffer_memory_allocated); -#endif - vec_free (f->unaligned_buffers); - vec_free (f->aligned_buffers); + /* Poison it. */ + clib_memset (f, 0xab, sizeof (f[0])); } /* Add buffer free list. */ void -vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +vlib_buffer_delete_free_list_internal (vlib_main_t * vm, + vlib_buffer_free_list_index_t index) { - vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *f; - u32 merge_index; -#if DPDK > 0 int i; - ASSERT (os_get_cpu_number () == 0); + ASSERT (vlib_get_thread_index () == 0); - f = vlib_buffer_get_free_list (vm, free_list_index); + f = vlib_buffer_get_free_list (vm, index); - merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); - if (merge_index != ~0 && merge_index != free_list_index) - { - merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, - merge_index), f); - } + ASSERT (vec_len (f->buffers) == f->n_alloc); del_free_list (vm, f); - /* Poison it. */ - memset (f, 0xab, sizeof (f[0])); - - pool_put (bm->buffer_free_list_pool, f); + pool_put (vm->buffer_free_list_pool, f); for (i = 1; i < vec_len (vlib_mains); i++) { - bm = vlib_mains[i]->buffer_main; - f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);; - memset (f, 0xab, sizeof (f[0])); - pool_put (bm->buffer_free_list_pool, f); - } -#else - - f = vlib_buffer_get_free_list (vm, free_list_index); - - ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) == - f->n_alloc); - merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); - if (merge_index != ~0 && merge_index != free_list_index) - { - merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, - merge_index), f); + vlib_main_t *wvm = vlib_mains[i]; + f = vlib_buffer_get_free_list (vlib_mains[i], index); + del_free_list (wvm, f); + pool_put (wvm->buffer_free_list_pool, f); } +} - del_free_list (vm, f); - - /* Poison it. */ - memset (f, 0xab, sizeof (f[0])); - - pool_put (bm->buffer_free_list_pool, f); -#endif +static_always_inline void * +vlib_buffer_pool_get_buffer (vlib_main_t * vm, vlib_buffer_pool_t * bp) +{ + return vlib_physmem_alloc_from_map (vm, bp->physmem_map_index, + bp->buffer_size, CLIB_CACHE_LINE_BYTES); } /* Make sure free list has at least given number of free buffers. */ static uword -fill_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * fl, uword min_free_buffers) +vlib_buffer_fill_free_list_internal (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, + uword min_free_buffers) { -#if DPDK > 0 vlib_buffer_t *b; - int n, i; - u32 bi; - u32 n_remaining = 0, n_alloc = 0; - unsigned socket_id = rte_socket_id ? rte_socket_id () : 0; - struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id]; - struct rte_mbuf *mb; - - /* Too early? */ - if (PREDICT_FALSE (rmp == 0)) - return 0; - - trim_aligned (fl); + vlib_buffer_pool_t *bp = vlib_buffer_pool_get (fl->buffer_pool_index); + int n; + u32 *bi; + u32 n_alloc = 0; /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->aligned_buffers); + n = min_free_buffers - vec_len (fl->buffers); if (n <= 0) return min_free_buffers; - /* Always allocate round number of buffers. */ - n = round_pow2 (n, BUFFERS_PER_COPY); - - /* Always allocate new buffers in reasonably large sized chunks. */ - n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); - - vec_validate (vm->mbuf_alloc_list, n - 1); - - if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) - return 0; - - _vec_len (vm->mbuf_alloc_list) = n; - - for (i = 0; i < n; i++) + if (vec_len (bp->buffers) > 0) { - mb = vm->mbuf_alloc_list[i]; - - ASSERT (rte_mbuf_refcnt_read (mb) == 0); - rte_mbuf_refcnt_set (mb, 1); - - b = vlib_buffer_from_rte_mbuf (mb); - bi = vlib_get_buffer_index (vm, b); - - vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t)); - n_alloc++; - n_remaining--; - - vlib_buffer_init_for_free_list (b, fl); - - if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, &bi, 1); + int n_copy, n_left; + clib_spinlock_lock (&bp->lock); + n_copy = clib_min (vec_len (bp->buffers), n); + n_left = vec_len (bp->buffers) - n_copy; + vec_add_aligned (fl->buffers, bp->buffers + n_left, n_copy, + CLIB_CACHE_LINE_BYTES); + _vec_len (bp->buffers) = n_left; + clib_spinlock_unlock (&bp->lock); + n = min_free_buffers - vec_len (fl->buffers); + if (n <= 0) + return min_free_buffers; } - fl->n_alloc += n; - - return n; -#else - vlib_buffer_t *buffers, *b; - int n, n_bytes, i; - u32 *bi; - u32 n_remaining, n_alloc, n_this_chunk; - - trim_aligned (fl); - - /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->aligned_buffers); - if (n <= 0) - return min_free_buffers; - /* Always allocate round number of buffers. */ - n = round_pow2 (n, BUFFERS_PER_COPY); + n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); /* Always allocate new buffers in reasonably large sized chunks. */ - n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); + n = clib_max (n, fl->min_n_buffers_each_alloc); - n_remaining = n; - n_alloc = 0; - while (n_remaining > 0) + clib_spinlock_lock (&bp->lock); + while (n_alloc < n) { - n_this_chunk = clib_min (n_remaining, 16); - - n_bytes = n_this_chunk * (sizeof (b[0]) + fl->n_data_bytes); - - /* drb: removed power-of-2 ASSERT */ - buffers = vm->os_physmem_alloc_aligned (&vm->physmem_main, - n_bytes, - sizeof (vlib_buffer_t)); - if (!buffers) - return n_alloc; + if ((b = vlib_buffer_pool_get_buffer (vm, bp)) == 0) + goto done; - /* Record chunk as being allocated so we can free it later. */ - vec_add1 (fl->buffer_memory_allocated, buffers); + n_alloc += 1; - fl->n_alloc += n_this_chunk; - n_alloc += n_this_chunk; - n_remaining -= n_this_chunk; + vec_add2_aligned (fl->buffers, bi, 1, CLIB_CACHE_LINE_BYTES); + bi[0] = vlib_get_buffer_index (vm, b); - b = buffers; - vec_add2_aligned (fl->aligned_buffers, bi, n_this_chunk, - sizeof (vlib_copy_unit_t)); - for (i = 0; i < n_this_chunk; i++) - { - bi[i] = vlib_get_buffer_index (vm, b); + if (CLIB_DEBUG > 0) + vlib_buffer_set_known_state (bi[0], VLIB_BUFFER_KNOWN_FREE); - if (CLIB_DEBUG > 0) - vlib_buffer_set_known_state (vm, bi[i], VLIB_BUFFER_KNOWN_FREE); - b = vlib_buffer_next_contiguous (b, fl->n_data_bytes); - } - - memset (buffers, 0, n_bytes); - - /* Initialize all new buffers. */ - b = buffers; - for (i = 0; i < n_this_chunk; i++) - { - vlib_buffer_init_for_free_list (b, fl); - b = vlib_buffer_next_contiguous (b, fl->n_data_bytes); - } + clib_memset (b, 0, sizeof (vlib_buffer_t)); + vlib_buffer_init_for_free_list (b, fl); if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, bi, n_this_chunk); - } - return n_alloc; -#endif -} - -always_inline uword -copy_alignment (u32 * x) -{ - return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; -} - -static u32 -alloc_from_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * free_list, - u32 * alloc_buffers, u32 n_alloc_buffers) -{ - u32 *dst, *u_src; - uword u_len, n_left; - uword n_unaligned_start, n_unaligned_end, n_filled; - -#if DPDK == 0 - ASSERT (os_get_cpu_number () == 0); - -#endif - n_left = n_alloc_buffers; - dst = alloc_buffers; - n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst)) - & (BUFFERS_PER_COPY - 1)); - - n_filled = fill_free_list (vm, free_list, n_alloc_buffers); - if (n_filled == 0) - return 0; - - n_left = n_filled < n_left ? n_filled : n_left; - n_alloc_buffers = n_left; - - if (n_unaligned_start >= n_left) - { - n_unaligned_start = n_left; - n_unaligned_end = 0; - } - else - n_unaligned_end = copy_alignment (dst + n_alloc_buffers); - - fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end); - - u_len = vec_len (free_list->unaligned_buffers); - u_src = free_list->unaligned_buffers + u_len - 1; - - if (n_unaligned_start) - { - uword n_copy = n_unaligned_start; - if (n_copy > n_left) - n_copy = n_left; - n_left -= n_copy; - - while (n_copy > 0) - { - *dst++ = *u_src--; - n_copy--; - u_len--; - } - - /* Now dst should be aligned. */ - if (n_left > 0) - ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0); - } - - /* Aligned copy. */ - { - vlib_copy_unit_t *d, *s; - uword n_copy; - - if (vec_len (free_list->aligned_buffers) < - ((n_left / BUFFERS_PER_COPY) * BUFFERS_PER_COPY)) - abort (); - - n_copy = n_left / BUFFERS_PER_COPY; - n_left = n_left % BUFFERS_PER_COPY; - - /* Remove buffers from aligned free list. */ - _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY; - - s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers); - d = (vlib_copy_unit_t *) dst; - - /* Fast path loop. */ - while (n_copy >= 4) - { - d[0] = s[0]; - d[1] = s[1]; - d[2] = s[2]; - d[3] = s[3]; - n_copy -= 4; - s += 4; - d += 4; - } - - while (n_copy >= 1) - { - d[0] = s[0]; - n_copy -= 1; - s += 1; - d += 1; - } - - dst = (void *) d; - } - - /* Unaligned copy. */ - ASSERT (n_unaligned_end == n_left); - while (n_left > 0) - { - *dst++ = *u_src--; - n_left--; - u_len--; + fl->buffer_init_function (vm, fl, bi, 1); } - if (!free_list->unaligned_buffers) - ASSERT (u_len == 0); - else - _vec_len (free_list->unaligned_buffers) = u_len; - -#if DPDK == 0 - /* Verify that buffers are known free. */ - vlib_buffer_validate_alloc_free (vm, alloc_buffers, - n_alloc_buffers, VLIB_BUFFER_KNOWN_FREE); -#endif - - return n_alloc_buffers; -} - -/* Allocate a given number of buffers into given array. - Returns number actually allocated which will be either zero or - number requested. */ -u32 -vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_main_t *bm = vm->buffer_main; -#if DPDK == 0 - ASSERT (os_get_cpu_number () == 0); -#endif - - return alloc_from_free_list - (vm, - pool_elt_at_index (bm->buffer_free_list_pool, - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), - buffers, n_buffers); -} - -u32 -vlib_buffer_alloc_from_free_list (vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, u32 free_list_index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); - return alloc_from_free_list (vm, f, buffers, n_buffers); -} - -always_inline void -add_buffer_to_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * f, - u32 buffer_index, u8 do_init) -{ - vlib_buffer_t *b; - b = vlib_get_buffer (vm, buffer_index); - if (PREDICT_TRUE (do_init)) - vlib_buffer_init_for_free_list (b, f); - vec_add1_aligned (f->aligned_buffers, buffer_index, - sizeof (vlib_copy_unit_t)); -} - -always_inline vlib_buffer_free_list_t * -buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - u32 i; - - *index = i = b->free_list_index; - return pool_elt_at_index (bm->buffer_free_list_pool, i); +done: + clib_spinlock_unlock (&bp->lock); + fl->n_alloc += n_alloc; + return n_alloc; } void * vlib_set_buffer_free_callback (vlib_main_t * vm, void *fp) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; void *rv = bm->buffer_free_callback; bm->buffer_free_callback = fp; return rv; } -#if DPDK == 0 -void vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) __attribute__ ((weak)); -void -vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) -{ -} - -#endif static_always_inline void -vlib_buffer_free_inline (vlib_main_t * vm, - u32 * buffers, u32 n_buffers, u32 follow_buffer_next) +recycle_or_free (vlib_main_t * vm, vlib_buffer_main_t * bm, u32 bi, + vlib_buffer_t * b, u32 follow_buffer_next) { -#if DPDK > 0 - vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *fl; - u32 fi; - int i; - u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, - u32 follow_buffer_next); + vlib_buffer_free_list_index_t fi; + u32 flags, next; - cb = bm->buffer_free_callback; + fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); - if (PREDICT_FALSE (cb != 0)) - n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); - - if (!n_buffers) - return; - - for (i = 0; i < n_buffers; i++) + do { - vlib_buffer_t *b; - struct rte_mbuf *mb; - - b = vlib_get_buffer (vm, buffers[i]); - - fl = buffer_get_free_list (vm, b, &fi); - - /* The only current use of this callback: multicast recycle */ - if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) - { - int j; - - add_buffer_to_free_list - (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); - - for (j = 0; j < vec_len (bm->announce_list); j++) - { - if (fl == bm->announce_list[j]) - goto already_announced; - } - vec_add1 (bm->announce_list, fl); - already_announced: - ; - } + vlib_buffer_t *nb = vlib_get_buffer (vm, bi); + flags = nb->flags; + next = nb->next_buffer; + if (nb->n_add_refs) + nb->n_add_refs--; else { - if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) - { - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } - } - } - if (vec_len (bm->announce_list)) - { - vlib_buffer_free_list_t *fl; - for (i = 0; i < vec_len (bm->announce_list); i++) - { - fl = bm->announce_list[i]; - fl->buffers_added_to_freelist_function (vm, fl); + vlib_buffer_validate_alloc_free (vm, &bi, 1, + VLIB_BUFFER_KNOWN_ALLOCATED); + vlib_buffer_add_to_free_list (vm, fl, bi, 1); } - _vec_len (bm->announce_list) = 0; + bi = next; } -#else - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *fl; - static u32 *next_to_free[2]; /* smp bad */ - u32 i_next_to_free, *b, *n, *f, fi; - uword n_left; - int i; - static vlib_buffer_free_list_t **announce_list; - vlib_buffer_free_list_t *fl0 = 0, *fl1 = 0; - u32 bi0 = (u32) ~ 0, bi1 = (u32) ~ 0, fi0, fi1 = (u32) ~ 0; - u8 free0, free1 = 0, free_next0, free_next1; + while (follow_buffer_next && (flags & VLIB_BUFFER_NEXT_PRESENT)); +} + +static_always_inline void +vlib_buffer_free_inline (vlib_main_t * vm, + u32 * buffers, u32 n_buffers, u32 follow_buffer_next) +{ + vlib_buffer_main_t *bm = &buffer_main; + vlib_buffer_t *p, *b0, *b1, *b2, *b3; + int i = 0; u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, u32 follow_buffer_next); - ASSERT (os_get_cpu_number () == 0); - cb = bm->buffer_free_callback; if (PREDICT_FALSE (cb != 0)) @@ -1114,299 +608,80 @@ vlib_buffer_free_inline (vlib_main_t * vm, if (!n_buffers) return; - /* Use first buffer to get default free list. */ - { - u32 bi0 = buffers[0]; - vlib_buffer_t *b0; - - b0 = vlib_get_buffer (vm, bi0); - fl = buffer_get_free_list (vm, b0, &fi); - if (fl->buffers_added_to_freelist_function) - vec_add1 (announce_list, fl); - } - - vec_validate (next_to_free[0], n_buffers - 1); - vec_validate (next_to_free[1], n_buffers - 1); - - i_next_to_free = 0; - n_left = n_buffers; - b = buffers; - -again: - /* Verify that buffers are known allocated. */ - vlib_buffer_validate_alloc_free (vm, b, - n_left, VLIB_BUFFER_KNOWN_ALLOCATED); - - vec_add2_aligned (fl->aligned_buffers, f, n_left, - /* align */ sizeof (vlib_copy_unit_t)); - - n = next_to_free[i_next_to_free]; - while (n_left >= 4) + while (i + 11 < n_buffers) { - vlib_buffer_t *b0, *b1, *binit0, *binit1, dummy_buffers[2]; - - bi0 = b[0]; - bi1 = b[1]; - - f[0] = bi0; - f[1] = bi1; - f += 2; - b += 2; - n_left -= 2; - - /* Prefetch buffers for next iteration. */ - vlib_prefetch_buffer_with_index (vm, b[0], WRITE); - vlib_prefetch_buffer_with_index (vm, b[1], WRITE); + p = vlib_get_buffer (vm, buffers[i + 8]); + vlib_prefetch_buffer_header (p, LOAD); + p = vlib_get_buffer (vm, buffers[i + 9]); + vlib_prefetch_buffer_header (p, LOAD); + p = vlib_get_buffer (vm, buffers[i + 10]); + vlib_prefetch_buffer_header (p, LOAD); + p = vlib_get_buffer (vm, buffers[i + 11]); + vlib_prefetch_buffer_header (p, LOAD); - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); + b0 = vlib_get_buffer (vm, buffers[i]); + b1 = vlib_get_buffer (vm, buffers[i + 1]); + b2 = vlib_get_buffer (vm, buffers[i + 2]); + b3 = vlib_get_buffer (vm, buffers[i + 3]); - free0 = (b0->flags & VLIB_BUFFER_RECYCLE) == 0; - free1 = (b1->flags & VLIB_BUFFER_RECYCLE) == 0; + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); - /* Must be before init which will over-write buffer flags. */ - if (follow_buffer_next) - { - n[0] = b0->next_buffer; - free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; - n += free_next0; - - n[0] = b1->next_buffer; - free_next1 = free1 && (b1->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; - n += free_next1; - } - else - free_next0 = free_next1 = 0; - - /* Must be before init which will over-write buffer free list. */ - fi0 = b0->free_list_index; - fi1 = b1->free_list_index; - - if (PREDICT_FALSE (fi0 != fi || fi1 != fi)) - goto slow_path_x2; - - binit0 = free0 ? b0 : &dummy_buffers[0]; - binit1 = free1 ? b1 : &dummy_buffers[1]; - - vlib_buffer_init_two_for_free_list (binit0, binit1, fl); - continue; - - slow_path_x2: - /* Backup speculation. */ - f -= 2; - n -= free_next0 + free_next1; - - _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers; - - fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); - fl1 = pool_elt_at_index (bm->buffer_free_list_pool, fi1); - - add_buffer_to_free_list (vm, fl0, bi0, free0); - if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0)) - { - int i; - for (i = 0; i < vec_len (announce_list); i++) - if (fl0 == announce_list[i]) - goto no_fl0; - vec_add1 (announce_list, fl0); - } - no_fl0: - if (PREDICT_FALSE (fl1->buffers_added_to_freelist_function != 0)) - { - int i; - for (i = 0; i < vec_len (announce_list); i++) - if (fl1 == announce_list[i]) - goto no_fl1; - vec_add1 (announce_list, fl1); - } + recycle_or_free (vm, bm, buffers[i], b0, follow_buffer_next); + recycle_or_free (vm, bm, buffers[i + 1], b1, follow_buffer_next); + recycle_or_free (vm, bm, buffers[i + 2], b2, follow_buffer_next); + recycle_or_free (vm, bm, buffers[i + 3], b3, follow_buffer_next); - no_fl1: - add_buffer_to_free_list (vm, fl1, bi1, free1); - - /* Possibly change current free list. */ - if (fi0 != fi && fi1 != fi) - { - fi = fi1; - fl = pool_elt_at_index (bm->buffer_free_list_pool, fi); - } - - vec_add2_aligned (fl->aligned_buffers, f, n_left, - /* align */ sizeof (vlib_copy_unit_t)); + i += 4; } - while (n_left >= 1) + while (i < n_buffers) { - vlib_buffer_t *b0, *binit0, dummy_buffers[1]; - - bi0 = b[0]; - f[0] = bi0; - f += 1; - b += 1; - n_left -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - free0 = (b0->flags & VLIB_BUFFER_RECYCLE) == 0; - - /* Must be before init which will over-write buffer flags. */ - if (follow_buffer_next) - { - n[0] = b0->next_buffer; - free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; - n += free_next0; - } - else - free_next0 = 0; - - /* Must be before init which will over-write buffer free list. */ - fi0 = b0->free_list_index; - - if (PREDICT_FALSE (fi0 != fi)) - goto slow_path_x1; - - binit0 = free0 ? b0 : &dummy_buffers[0]; - - vlib_buffer_init_for_free_list (binit0, fl); - continue; - - slow_path_x1: - /* Backup speculation. */ - f -= 1; - n -= free_next0; - - _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers; - - fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); - - add_buffer_to_free_list (vm, fl0, bi0, free0); - if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0)) - { - int i; - for (i = 0; i < vec_len (announce_list); i++) - if (fl0 == announce_list[i]) - goto no_fl00; - vec_add1 (announce_list, fl0); - } - - no_fl00: - fi = fi0; - fl = pool_elt_at_index (bm->buffer_free_list_pool, fi); - - vec_add2_aligned (fl->aligned_buffers, f, n_left, - /* align */ sizeof (vlib_copy_unit_t)); - } - - if (follow_buffer_next && ((n_left = n - next_to_free[i_next_to_free]) > 0)) - { - b = next_to_free[i_next_to_free]; - i_next_to_free ^= 1; - goto again; - } - - _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers; - - if (vec_len (announce_list)) - { - vlib_buffer_free_list_t *fl; - for (i = 0; i < vec_len (announce_list); i++) - { - fl = announce_list[i]; - fl->buffers_added_to_freelist_function (vm, fl); - } - _vec_len (announce_list) = 0; + b0 = vlib_get_buffer (vm, buffers[i]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + recycle_or_free (vm, bm, buffers[i], b0, follow_buffer_next); + i++; } -#endif } -void -vlib_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +static void +vlib_buffer_free_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers) { vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 1); } -void -vlib_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +static void +vlib_buffer_free_no_next_internal (vlib_main_t * vm, u32 * buffers, + u32 n_buffers) { vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 0); } -#if DPDK == 0 -/* Copy template packet data into buffers as they are allocated. */ -static void -vlib_packet_template_buffer_init (vlib_main_t * vm, - vlib_buffer_free_list_t * fl, - u32 * buffers, u32 n_buffers) -{ - vlib_packet_template_t *t = - uword_to_pointer (fl->buffer_init_function_opaque, - vlib_packet_template_t *); - uword i; - - for (i = 0; i < n_buffers; i++) - { - vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]); - ASSERT (b->current_length == vec_len (t->packet_data)); - clib_memcpy (vlib_buffer_get_current (b), t->packet_data, - b->current_length); - } -} -#endif - void vlib_packet_template_init (vlib_main_t * vm, vlib_packet_template_t * t, void *packet_data, uword n_packet_data_bytes, - uword min_n_buffers_each_physmem_alloc, - char *fmt, ...) + uword min_n_buffers_each_alloc, char *fmt, ...) { -#if DPDK > 0 va_list va; - __attribute__ ((unused)) u8 *name; va_start (va, fmt); - name = va_format (0, fmt, &va); + t->name = va_format (0, fmt, &va); va_end (va); vlib_worker_thread_barrier_sync (vm); - memset (t, 0, sizeof (t[0])); - - vec_add (t->packet_data, packet_data, n_packet_data_bytes); - vlib_worker_thread_barrier_release (vm); -#else - vlib_buffer_free_list_t *fl; - va_list va; - u8 *name; - - va_start (va, fmt); - name = va_format (0, fmt, &va); - va_end (va); - - memset (t, 0, sizeof (t[0])); + clib_memset (t, 0, sizeof (t[0])); vec_add (t->packet_data, packet_data, n_packet_data_bytes); - t->min_n_buffers_each_physmem_alloc = min_n_buffers_each_physmem_alloc; - - t->free_list_index = vlib_buffer_create_free_list_helper - (vm, n_packet_data_bytes, - /* is_public */ 1, - /* is_default */ 0, - name); + t->min_n_buffers_each_alloc = min_n_buffers_each_alloc; - ASSERT (t->free_list_index != 0); - fl = vlib_buffer_get_free_list (vm, t->free_list_index); - fl->min_n_buffers_each_physmem_alloc = t->min_n_buffers_each_physmem_alloc; - - fl->buffer_init_function = vlib_packet_template_buffer_init; - fl->buffer_init_function_opaque = pointer_to_uword (t); - - fl->buffer_init_template.current_data = 0; - fl->buffer_init_template.current_length = n_packet_data_bytes; - fl->buffer_init_template.flags = 0; -#endif + vlib_worker_thread_barrier_release (vm); } void * @@ -1422,36 +697,17 @@ vlib_packet_template_get_packet (vlib_main_t * vm, *bi_result = bi; b = vlib_get_buffer (vm, bi); - clib_memcpy (vlib_buffer_get_current (b), - t->packet_data, vec_len (t->packet_data)); + clib_memcpy_fast (vlib_buffer_get_current (b), + t->packet_data, vec_len (t->packet_data)); b->current_length = vec_len (t->packet_data); return b->data; } -#if DPDK == 0 -void -vlib_packet_template_get_packet_helper (vlib_main_t * vm, - vlib_packet_template_t * t) -{ - word n = t->min_n_buffers_each_physmem_alloc; - word l = vec_len (t->packet_data); - word n_alloc; - - ASSERT (l > 0); - ASSERT (vec_len (t->free_buffers) == 0); - - vec_validate (t->free_buffers, n - 1); - n_alloc = vlib_buffer_alloc_from_free_list (vm, t->free_buffers, - n, t->free_list_index); - _vec_len (t->free_buffers) = n_alloc; -} - -#endif /* Append given data to end of buffer, possibly allocating new buffers. */ u32 vlib_buffer_add_data (vlib_main_t * vm, - u32 free_list_index, + vlib_buffer_free_list_index_t free_list_index, u32 buffer_index, void *data, u32 n_data_bytes) { u32 n_buffer_bytes, n_left, n_left_this_buffer, bi; @@ -1459,7 +715,7 @@ vlib_buffer_add_data (vlib_main_t * vm, void *d; bi = buffer_index; - if (bi == 0 + if (bi == ~0 && 1 != vlib_buffer_alloc_from_free_list (vm, &bi, 1, free_list_index)) goto out_of_buffers; @@ -1482,7 +738,8 @@ vlib_buffer_add_data (vlib_main_t * vm, n_left_this_buffer = n_buffer_bytes - (b->current_data + b->current_length); n = clib_min (n_left_this_buffer, n_left); - clib_memcpy (vlib_buffer_get_current (b) + b->current_length, d, n); + clib_memcpy_fast (vlib_buffer_get_current (b) + b->current_length, d, + n); b->current_length += n; n_left -= n; if (n_left == 0) @@ -1508,10 +765,11 @@ out_of_buffers: u16 vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, - u32 free_list_index, + vlib_buffer_free_list_index_t + free_list_index, vlib_buffer_t * first, - vlib_buffer_t ** last, - void *data, u16 data_len) + vlib_buffer_t ** last, void *data, + u16 data_len) { vlib_buffer_t *l = *last; u32 n_buffer_bytes = @@ -1527,13 +785,13 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, vlib_buffer_alloc_from_free_list (vm, &l->next_buffer, 1, free_list_index)) return copied; - *last = l = vlib_buffer_chain_buffer (vm, first, l, l->next_buffer); + *last = l = vlib_buffer_chain_buffer (vm, l, l->next_buffer); max = n_buffer_bytes - l->current_length - l->current_data; } u16 len = (data_len > max) ? max : data_len; - clib_memcpy (vlib_buffer_get_current (l) + l->current_length, - data + copied, len); + clib_memcpy_fast (vlib_buffer_get_current (l) + l->current_length, + data + copied, len); vlib_buffer_chain_increase_length (first, l, len); data_len -= len; copied += len; @@ -1541,328 +799,53 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, return copied; } -#if DPDK > 0 -clib_error_t * -vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, - unsigned socket_id) +u8 +vlib_buffer_register_physmem_map (vlib_main_t * vm, u32 physmem_map_index) { - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_physmem_main_t *vpm = &vm->physmem_main; - struct rte_mempool *rmp; - int i; - - if (!rte_pktmbuf_pool_create) - return clib_error_return (0, "not linked with DPDK"); - - vec_validate_aligned (bm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); - - /* pool already exists, nothing to do */ - if (bm->pktmbuf_pools[socket_id]) - return 0; - - u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); + vlib_buffer_main_t *bm = &buffer_main; + vlib_buffer_pool_t *p; + vlib_physmem_map_t *m = vlib_physmem_get_map (vm, physmem_map_index); + uword start = pointer_to_uword (m->base); + uword size = (uword) m->n_pages << m->log2_page_size; - rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ - num_mbufs, /* number of mbufs */ - 512, /* cache size */ - VLIB_BUFFER_HDR_SIZE, /* priv size */ - VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ - socket_id); /* cpu socket */ - - if (rmp) + if (bm->buffer_mem_size == 0) { - { - uword this_pool_end; - uword this_pool_start; - uword this_pool_size; - uword save_vpm_start, save_vpm_end, save_vpm_size; - struct rte_mempool_memhdr *memhdr; - - this_pool_start = ~0ULL; - this_pool_end = 0LL; - - STAILQ_FOREACH (memhdr, &rmp->mem_list, next) - { - if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) - this_pool_end = (uword) (memhdr->addr + memhdr->len); - if (((uword) memhdr->addr) < this_pool_start) - this_pool_start = (uword) (memhdr->addr); - } - ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); - this_pool_size = this_pool_end - this_pool_start; - - if (CLIB_DEBUG > 1) - { - clib_warning ("%s: pool start %llx pool end %llx pool size %lld", - pool_name, this_pool_start, this_pool_end, - this_pool_size); - clib_warning - ("before: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - save_vpm_start = vpm->virtual.start; - save_vpm_end = vpm->virtual.end; - save_vpm_size = vpm->virtual.size; - - if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) - vpm->virtual.start = this_pool_start; - if (this_pool_end > vpm->virtual.end) - vpm->virtual.end = this_pool_end; - - vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; - - if (CLIB_DEBUG > 1) - { - clib_warning - ("after: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - /* check if fits into buffer index range */ - if ((u64) vpm->virtual.size > - ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) - { - clib_warning ("physmem: virtual size out of range!"); - vpm->virtual.start = save_vpm_start; - vpm->virtual.end = save_vpm_end; - vpm->virtual.size = save_vpm_size; - rmp = 0; - } - } - if (rmp) - { - bm->pktmbuf_pools[socket_id] = rmp; - vec_free (pool_name); - return 0; - } + bm->buffer_mem_start = start; + bm->buffer_mem_size = size; } - - vec_free (pool_name); - - /* no usable pool for this socket, try to use pool from another one */ - for (i = 0; i < vec_len (bm->pktmbuf_pools); i++) + else if (start < bm->buffer_mem_start) { - if (bm->pktmbuf_pools[i]) - { - clib_warning - ("WARNING: Failed to allocate mempool for CPU socket %u. " - "Threads running on socket %u will use socket %u mempool.", - socket_id, socket_id, i); - bm->pktmbuf_pools[socket_id] = bm->pktmbuf_pools[i]; - return 0; - } + bm->buffer_mem_size += bm->buffer_mem_start - start; + bm->buffer_mem_start = start; + if (size > bm->buffer_mem_size) + bm->buffer_mem_size = size; } - - return clib_error_return (0, "failed to allocate mempool on socket %u", - socket_id); -} -#endif - -static void -vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s) -{ - vlib_main_t *vm; - vlib_serialize_buffer_main_t *sm; - uword n, n_bytes_to_write; - vlib_buffer_t *last; - - n_bytes_to_write = s->current_buffer_index; - sm = - uword_to_pointer (s->data_function_opaque, - vlib_serialize_buffer_main_t *); - vm = sm->vlib_main; - - ASSERT (sm->tx.max_n_data_bytes_per_chain > 0); - if (serialize_stream_is_end_of_stream (s) - || sm->tx.n_total_data_bytes + n_bytes_to_write > - sm->tx.max_n_data_bytes_per_chain) + else if (start > bm->buffer_mem_start) { - vlib_process_t *p = vlib_get_current_process (vm); - - last = vlib_get_buffer (vm, sm->last_buffer); - last->current_length = n_bytes_to_write; - - vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index, - sm->first_buffer); - - sm->first_buffer = sm->last_buffer = ~0; - sm->tx.n_total_data_bytes = 0; - } - - else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0) - { - ASSERT (sm->first_buffer == ~0); - ASSERT (sm->last_buffer == ~0); - n = - vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1, - sm->tx.free_list_index); - if (n != 1) - serialize_error (m, - clib_error_create - ("vlib_buffer_alloc_from_free_list fails")); - sm->last_buffer = sm->first_buffer; - s->n_buffer_bytes = - vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index); - } - - if (n_bytes_to_write > 0) - { - vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer); - n = - vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1, - sm->tx.free_list_index); - if (n != 1) - serialize_error (m, - clib_error_create - ("vlib_buffer_alloc_from_free_list fails")); - sm->tx.n_total_data_bytes += n_bytes_to_write; - prev->current_length = n_bytes_to_write; - prev->next_buffer = sm->last_buffer; - prev->flags |= VLIB_BUFFER_NEXT_PRESENT; - } - - if (sm->last_buffer != ~0) - { - last = vlib_get_buffer (vm, sm->last_buffer); - s->buffer = vlib_buffer_get_current (last); - s->current_buffer_index = 0; - ASSERT (last->current_data == s->current_buffer_index); - } -} - -static void -vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s) -{ - vlib_main_t *vm; - vlib_serialize_buffer_main_t *sm; - vlib_buffer_t *last; - - sm = - uword_to_pointer (s->data_function_opaque, - vlib_serialize_buffer_main_t *); - vm = sm->vlib_main; - - if (serialize_stream_is_end_of_stream (s)) - return; - - if (sm->last_buffer != ~0) - { - last = vlib_get_buffer (vm, sm->last_buffer); - - if (last->flags & VLIB_BUFFER_NEXT_PRESENT) - sm->last_buffer = last->next_buffer; - else - { - vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1); - sm->first_buffer = sm->last_buffer = ~0; - } - } - - if (sm->last_buffer == ~0) - { - while (clib_fifo_elts (sm->rx.buffer_fifo) == 0) - { - sm->rx.ready_one_time_event = - vlib_process_create_one_time_event (vm, vlib_current_process (vm), - ~0); - vlib_process_wait_for_one_time_event (vm, /* no event data */ 0, - sm->rx.ready_one_time_event); - } - - clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer); - sm->last_buffer = sm->first_buffer; + uword new_size = start - bm->buffer_mem_start + size; + if (new_size > bm->buffer_mem_size) + bm->buffer_mem_size = new_size; } - ASSERT (sm->last_buffer != ~0); - - last = vlib_get_buffer (vm, sm->last_buffer); - s->current_buffer_index = 0; - s->buffer = vlib_buffer_get_current (last); - s->n_buffer_bytes = last->current_length; -} - -static void -serialize_open_vlib_helper (serialize_main_t * m, - vlib_main_t * vm, - vlib_serialize_buffer_main_t * sm, uword is_read) -{ - /* Initialize serialize main but save overflow buffer for re-use between calls. */ - { - u8 *save = m->stream.overflow_buffer; - memset (m, 0, sizeof (m[0])); - m->stream.overflow_buffer = save; - if (save) - _vec_len (save) = 0; - } - - sm->first_buffer = sm->last_buffer = ~0; - if (is_read) - clib_fifo_reset (sm->rx.buffer_fifo); - else - sm->tx.n_total_data_bytes = 0; - sm->vlib_main = vm; - m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx; - m->stream.data_function_opaque = pointer_to_uword (sm); -} - -void -serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, - vlib_serialize_buffer_main_t * sm) -{ - serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0); -} - -void -unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, - vlib_serialize_buffer_main_t * sm) -{ - serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1); -} - -u32 -serialize_close_vlib_buffer (serialize_main_t * m) -{ - vlib_serialize_buffer_main_t *sm - = uword_to_pointer (m->stream.data_function_opaque, - vlib_serialize_buffer_main_t *); - vlib_buffer_t *last; - serialize_stream_t *s = &m->stream; - - last = vlib_get_buffer (sm->vlib_main, sm->last_buffer); - last->current_length = s->current_buffer_index; - - if (vec_len (s->overflow_buffer) > 0) + if ((u64) bm->buffer_mem_size > + ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) { - sm->last_buffer - = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index, - sm->last_buffer == ~0 ? 0 : sm->last_buffer, - s->overflow_buffer, - vec_len (s->overflow_buffer)); - _vec_len (s->overflow_buffer) = 0; + clib_panic ("buffer memory size out of range!"); } - return sm->first_buffer; -} + vec_add2 (bm->buffer_pools, p, 1); + p->start = start; + p->size = size; + p->physmem_map_index = physmem_map_index; -void -unserialize_close_vlib_buffer (serialize_main_t * m) -{ - vlib_serialize_buffer_main_t *sm - = uword_to_pointer (m->stream.data_function_opaque, - vlib_serialize_buffer_main_t *); - if (sm->first_buffer != ~0) - vlib_buffer_free_one (sm->vlib_main, sm->first_buffer); - clib_fifo_reset (sm->rx.buffer_fifo); - if (m->stream.overflow_buffer) - _vec_len (m->stream.overflow_buffer) = 0; + ASSERT (p - bm->buffer_pools < 256); + return p - bm->buffer_pools; } static u8 * format_vlib_buffer_free_list (u8 * s, va_list * va) { vlib_buffer_free_list_t *f = va_arg (*va, vlib_buffer_free_list_t *); -#if DPDK > 0 u32 threadnum = va_arg (*va, u32); uword bytes_alloc, bytes_free, n_free, size; @@ -1872,26 +855,11 @@ format_vlib_buffer_free_list (u8 * s, va_list * va) "#Alloc", "#Free"); size = sizeof (vlib_buffer_t) + f->n_data_bytes; - n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers); + n_free = vec_len (f->buffers); bytes_alloc = size * f->n_alloc; bytes_free = size * n_free; - s = format (s, "%7d%30s%12d%12d%=12U%=12U%=12d%=12d", threadnum, -#else - uword bytes_alloc, bytes_free, n_free, size; - - if (!f) - return format (s, "%=30s%=12s%=12s%=12s%=12s%=12s%=12s", - "Name", "Index", "Size", "Alloc", "Free", "#Alloc", - "#Free"); - - size = sizeof (vlib_buffer_t) + f->n_data_bytes; - n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers); - bytes_alloc = size * f->n_alloc; - bytes_free = size * n_free; - - s = format (s, "%30s%12d%12d%=12U%=12U%=12d%=12d", -#endif + s = format (s, "%7d%30v%12d%12d%=12U%=12U%=12d%=12d", threadnum, f->name, f->index, f->n_data_bytes, format_memory_size, bytes_alloc, format_memory_size, bytes_free, f->n_alloc, n_free); @@ -1903,8 +871,6 @@ static clib_error_t * show_buffers (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { -#if DPDK > 0 - vlib_buffer_main_t *bm; vlib_buffer_free_list_t *f; vlib_main_t *curr_vm; u32 vm_index = 0; @@ -1913,11 +879,10 @@ show_buffers (vlib_main_t * vm, do { - curr_vm = vec_len (vlib_mains) ? vlib_mains[vm_index] : vm; - bm = curr_vm->buffer_main; + curr_vm = vlib_mains[vm_index]; /* *INDENT-OFF* */ - pool_foreach (f, bm->buffer_free_list_pool, ({ + pool_foreach (f, curr_vm->buffer_free_list_pool, ({ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f, vm_index); })); /* *INDENT-ON* */ @@ -1926,18 +891,6 @@ show_buffers (vlib_main_t * vm, } while (vm_index < vec_len (vlib_mains)); -#else - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - - vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0); - /* *INDENT-OFF* */ - pool_foreach (f, bm->buffer_free_list_pool, ({ - vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f); - })); -/* *INDENT-ON* */ - -#endif return 0; } @@ -1949,32 +902,81 @@ VLIB_CLI_COMMAND (show_buffers_command, static) = { }; /* *INDENT-ON* */ -#if DPDK > 0 -#if CLIB_DEBUG > 0 +clib_error_t * +vlib_buffer_main_init (struct vlib_main_t * vm) +{ + vlib_buffer_main_t *bm = &buffer_main; + clib_error_t *error; + u32 physmem_map_index; + u8 pool_index; + int log2_page_size = 0; + + buffer_log_default = vlib_log_register_class ("buffer", 0); + + if (vlib_buffer_callbacks) + { + /* external plugin has registered own buffer callbacks + so we just copy them and quit */ + clib_memcpy_fast (&bm->cb, vlib_buffer_callbacks, + sizeof (vlib_buffer_callbacks_t)); + bm->callbacks_registered = 1; + return 0; + } -u32 *vlib_buffer_state_validation_lock; -uword *vlib_buffer_state_validation_hash; -void *vlib_buffer_state_heap; + bm->cb.vlib_buffer_fill_free_list_cb = &vlib_buffer_fill_free_list_internal; + bm->cb.vlib_buffer_free_cb = &vlib_buffer_free_internal; + bm->cb.vlib_buffer_free_no_next_cb = &vlib_buffer_free_no_next_internal; + bm->cb.vlib_buffer_delete_free_list_cb = + &vlib_buffer_delete_free_list_internal; + clib_spinlock_init (&bm->buffer_known_hash_lockp); + +retry: + error = vlib_physmem_shared_map_create (vm, "buffers", + vlib_buffer_physmem_sz, + log2_page_size, + CLIB_PMALLOC_NUMA_LOCAL, + &physmem_map_index); + + if (error && log2_page_size == 0) + { + vlib_log_warn (buffer_log_default, "%U", format_clib_error, error); + clib_error_free (error); + vlib_log_warn (buffer_log_default, "falling back to non-hugepage " + "backed buffer pool"); + log2_page_size = min_log2 (clib_mem_get_page_size ()); + goto retry; + } + + if (error) + return error; + + pool_index = vlib_buffer_register_physmem_map (vm, physmem_map_index); + vlib_buffer_pool_t *bp = vlib_buffer_pool_get (pool_index); + clib_spinlock_init (&bp->lock); + bp->buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES + + sizeof (vlib_buffer_t); + + return 0; +} static clib_error_t * -buffer_state_validation_init (vlib_main_t * vm) +vlib_buffers_configure (vlib_main_t * vm, unformat_input_t * input) { - void *oldheap; - - vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); + u32 size_in_mb; - oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "memory-size-in-mb %d", &size_in_mb)) + vlib_buffer_physmem_sz = size_in_mb << 20; + else + return unformat_parse_error (input); + } - vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); - vec_validate_aligned (vlib_buffer_state_validation_lock, 0, - CLIB_CACHE_LINE_BYTES); - clib_mem_set_heap (oldheap); + unformat_free (input); return 0; } -VLIB_INIT_FUNCTION (buffer_state_validation_init); -#endif -#endif +VLIB_EARLY_CONFIG_FUNCTION (vlib_buffers_configure, "buffers"); /** @endcond */