From: Damjan Marion Date: Wed, 4 Jan 2017 12:19:27 +0000 (+0100) Subject: vlib: add buffer and thread callbacks X-Git-Tag: v17.04-rc1~369 X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F92%2F4592%2F9;p=vpp.git vlib: add buffer and thread callbacks Change-Id: I8e2e8f94a884ab2f9909d0c83ba00edd38cdab77 Signed-off-by: Damjan Marion --- diff --git a/src/plugins/flowperpkt/flowperpkt.c b/src/plugins/flowperpkt/flowperpkt.c index fb71d5b0ffc..cc35159906d 100644 --- a/src/plugins/flowperpkt/flowperpkt.c +++ b/src/plugins/flowperpkt/flowperpkt.c @@ -643,7 +643,7 @@ flowperpkt_init (vlib_main_t * vm) vec_free (name); /* Decide how many worker threads we have */ - num_threads = 1 /* main thread */ + tm->n_eal_threads; + num_threads = 1 /* main thread */ + tm->n_threads; /* Allocate per worker thread vectors */ vec_validate (fm->ipv4_buffers_per_worker, num_threads - 1); diff --git a/src/vlib.am b/src/vlib.am index 0154d841aa0..c21f88c4f7b 100644 --- a/src/vlib.am +++ b/src/vlib.am @@ -23,6 +23,7 @@ vlib/config.h: libvlib_la_SOURCES = \ vlib/buffer.c \ + vlib/buffer_serialize.c \ vlib/cli.c \ vlib/cli.h \ vlib/config.h \ diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 4bf6d125b21..0b0e6054ec2 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -38,50 +38,13 @@ */ /** - * @cond (!DPDK) * @file * * Allocate/free network buffers. */ -#if DPDK > 0 -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#endif - #include -#if DPDK > 0 -#pragma weak rte_mem_virt2phy -#pragma weak rte_eal_has_hugepages -#pragma weak rte_socket_id -#pragma weak rte_pktmbuf_pool_create -#endif - uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, vlib_buffer_t * b_first) @@ -103,7 +66,6 @@ u8 * format_vlib_buffer (u8 * s, va_list * args) { vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *); -#if DPDK > 0 uword indent = format_get_indent (s); s = format (s, "current data %d, length %d, free-list %d", @@ -126,18 +88,6 @@ format_vlib_buffer (u8 * s, va_list * args) format_white_space, indent, next_buffer, b->current_length); } -#else - - s = format (s, "current data %d, length %d, free-list %d", - b->current_data, b->current_length, b->free_list_index); - - if (b->flags & VLIB_BUFFER_IS_TRACED) - s = format (s, ", trace 0x%x", b->trace_index); - - if (b->flags & VLIB_BUFFER_NEXT_PRESENT) - s = format (s, ", next-buffer 0x%x", b->next_buffer); -#endif - return s; } @@ -153,7 +103,6 @@ format_vlib_buffer_and_data (u8 * s, va_list * args) return s; } -#if DPDK == 0 static u8 * format_vlib_buffer_known_state (u8 * s, va_list * args) { @@ -181,7 +130,6 @@ format_vlib_buffer_known_state (u8 * s, va_list * args) return format (s, "%s", t); } -#endif u8 * format_vlib_buffer_contents (u8 * s, va_list * va) @@ -200,7 +148,6 @@ format_vlib_buffer_contents (u8 * s, va_list * va) return s; } -#if DPDK == 0 static u8 * vlib_validate_buffer_helper (vlib_main_t * vm, u32 bi, @@ -217,11 +164,10 @@ vlib_validate_buffer_helper (vlib_main_t * vm, if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE) return format (0, "current data %d before pre-data", b->current_data); -#if DPDK == 0 + if (b->current_data + b->current_length > fl->n_data_bytes) return format (0, "%d-%d beyond end of buffer %d", b->current_data, b->current_length, fl->n_data_bytes); -#endif if (follow_buffer_next && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) { @@ -311,14 +257,12 @@ done: hash_free (hash); return result; } -#endif vlib_main_t **vlib_mains; -#if DPDK == 0 /* When dubugging validate that given buffers are either known allocated or known free. */ -static void +static void __attribute__ ((unused)) vlib_buffer_validate_alloc_free (vlib_main_t * vm, u32 * buffers, uword n_buffers, @@ -359,7 +303,6 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED); } } -#endif #define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) @@ -463,7 +406,6 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, { vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *f; -#if DPDK > 0 int i; ASSERT (os_get_cpu_number () == 0); @@ -519,47 +461,6 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, wf->unaligned_buffers = 0; wf->n_alloc = 0; } -#else - - if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0) - { - u32 default_free_free_list_index; - - default_free_free_list_index = vlib_buffer_create_free_list_helper (vm, - /* default buffer size */ - VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - /* is_public */ - 1, - /* is_default */ - 1, - (u8 - *) - "default"); - ASSERT (default_free_free_list_index == - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public) - return default_free_free_list_index; - } - - pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES); - - memset (f, 0, sizeof (f[0])); - f->index = f - bm->buffer_free_list_pool; - f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); - f->min_n_buffers_each_physmem_alloc = 256; - f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name); - - /* Setup free buffer template. */ - f->buffer_init_template.free_list_index = f->index; - - if (is_public) - { - uword *p = hash_get (bm->free_list_by_size, f->n_data_bytes); - if (!p) - hash_set (bm->free_list_by_size, f->n_data_bytes, f->index); - } -#endif return f->index; } @@ -609,50 +510,30 @@ static void del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) { u32 i; -#if DPDK > 0 - struct rte_mbuf *mb; - vlib_buffer_t *b; - - for (i = 0; i < vec_len (f->unaligned_buffers); i++) - { - b = vlib_get_buffer (vm, f->unaligned_buffers[i]); - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } - for (i = 0; i < vec_len (f->aligned_buffers); i++) - { - b = vlib_get_buffer (vm, f->aligned_buffers[i]); - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } - vec_free (f->name); -#else for (i = 0; i < vec_len (f->buffer_memory_allocated); i++) vm->os_physmem_free (f->buffer_memory_allocated[i]); vec_free (f->name); vec_free (f->buffer_memory_allocated); -#endif vec_free (f->unaligned_buffers); vec_free (f->aligned_buffers); } /* Add buffer free list. */ void -vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +vlib_buffer_delete_free_list_internal (vlib_main_t * vm, u32 free_list_index) { vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *f; u32 merge_index; -#if DPDK > 0 int i; ASSERT (os_get_cpu_number () == 0); f = vlib_buffer_get_free_list (vm, free_list_index); + ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) == + f->n_alloc); merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); if (merge_index != ~0 && merge_index != free_list_index) { @@ -674,26 +555,6 @@ vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) memset (f, 0xab, sizeof (f[0])); pool_put (bm->buffer_free_list_pool, f); } -#else - - f = vlib_buffer_get_free_list (vm, free_list_index); - - ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) == - f->n_alloc); - merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); - if (merge_index != ~0 && merge_index != free_list_index) - { - merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, - merge_index), f); - } - - del_free_list (vm, f); - - /* Poison it. */ - memset (f, 0xab, sizeof (f[0])); - - pool_put (bm->buffer_free_list_pool, f); -#endif } /* Make sure free list has at least given number of free buffers. */ @@ -701,63 +562,6 @@ static uword fill_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * fl, uword min_free_buffers) { -#if DPDK > 0 - vlib_buffer_t *b; - int n, i; - u32 bi; - u32 n_remaining = 0, n_alloc = 0; - unsigned socket_id = rte_socket_id ? rte_socket_id () : 0; - struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id]; - struct rte_mbuf *mb; - - /* Too early? */ - if (PREDICT_FALSE (rmp == 0)) - return 0; - - trim_aligned (fl); - - /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->aligned_buffers); - if (n <= 0) - return min_free_buffers; - - /* Always allocate round number of buffers. */ - n = round_pow2 (n, BUFFERS_PER_COPY); - - /* Always allocate new buffers in reasonably large sized chunks. */ - n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); - - vec_validate (vm->mbuf_alloc_list, n - 1); - - if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) - return 0; - - _vec_len (vm->mbuf_alloc_list) = n; - - for (i = 0; i < n; i++) - { - mb = vm->mbuf_alloc_list[i]; - - ASSERT (rte_mbuf_refcnt_read (mb) == 0); - rte_mbuf_refcnt_set (mb, 1); - - b = vlib_buffer_from_rte_mbuf (mb); - bi = vlib_get_buffer_index (vm, b); - - vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t)); - n_alloc++; - n_remaining--; - - vlib_buffer_init_for_free_list (b, fl); - - if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, &bi, 1); - } - - fl->n_alloc += n; - - return n; -#else vlib_buffer_t *buffers, *b; int n, n_bytes, i; u32 *bi; @@ -824,7 +628,6 @@ fill_free_list (vlib_main_t * vm, fl->buffer_init_function (vm, fl, bi, n_this_chunk); } return n_alloc; -#endif } always_inline uword @@ -833,6 +636,7 @@ copy_alignment (u32 * x) return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; } + static u32 alloc_from_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * free_list, @@ -842,10 +646,6 @@ alloc_from_free_list (vlib_main_t * vm, uword u_len, n_left; uword n_unaligned_start, n_unaligned_end, n_filled; -#if DPDK == 0 - ASSERT (os_get_cpu_number () == 0); - -#endif n_left = n_alloc_buffers; dst = alloc_buffers; n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst)) @@ -945,25 +745,21 @@ alloc_from_free_list (vlib_main_t * vm, else _vec_len (free_list->unaligned_buffers) = u_len; -#if DPDK == 0 /* Verify that buffers are known free. */ vlib_buffer_validate_alloc_free (vm, alloc_buffers, n_alloc_buffers, VLIB_BUFFER_KNOWN_FREE); -#endif return n_alloc_buffers; } + /* Allocate a given number of buffers into given array. Returns number actually allocated which will be either zero or number requested. */ -u32 -vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +static u32 +vlib_buffer_alloc_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers) { vlib_buffer_main_t *bm = vm->buffer_main; -#if DPDK == 0 - ASSERT (os_get_cpu_number () == 0); -#endif return alloc_from_free_list (vm, @@ -972,10 +768,10 @@ vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) buffers, n_buffers); } -u32 -vlib_buffer_alloc_from_free_list (vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, u32 free_list_index) +static u32 +vlib_buffer_alloc_from_free_list_internal (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, u32 free_list_index) { vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *f; @@ -1016,81 +812,10 @@ vlib_set_buffer_free_callback (vlib_main_t * vm, void *fp) return rv; } -#if DPDK == 0 -void vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) __attribute__ ((weak)); -void -vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) -{ -} - -#endif static_always_inline void vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, u32 follow_buffer_next) { -#if DPDK > 0 - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *fl; - u32 fi; - int i; - u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, - u32 follow_buffer_next); - - cb = bm->buffer_free_callback; - - if (PREDICT_FALSE (cb != 0)) - n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); - - if (!n_buffers) - return; - - for (i = 0; i < n_buffers; i++) - { - vlib_buffer_t *b; - struct rte_mbuf *mb; - - b = vlib_get_buffer (vm, buffers[i]); - - fl = buffer_get_free_list (vm, b, &fi); - - /* The only current use of this callback: multicast recycle */ - if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) - { - int j; - - add_buffer_to_free_list - (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); - - for (j = 0; j < vec_len (bm->announce_list); j++) - { - if (fl == bm->announce_list[j]) - goto already_announced; - } - vec_add1 (bm->announce_list, fl); - already_announced: - ; - } - else - { - if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) - { - mb = rte_mbuf_from_vlib_buffer (b); - ASSERT (rte_mbuf_refcnt_read (mb) == 1); - rte_pktmbuf_free (mb); - } - } - } - if (vec_len (bm->announce_list)) - { - vlib_buffer_free_list_t *fl; - for (i = 0; i < vec_len (bm->announce_list); i++) - { - fl = bm->announce_list[i]; - fl->buffers_added_to_freelist_function (vm, fl); - } - _vec_len (bm->announce_list) = 0; - } -#else vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *fl; static u32 *next_to_free[2]; /* smp bad */ @@ -1315,26 +1040,25 @@ again: } _vec_len (announce_list) = 0; } -#endif } -void -vlib_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +static void +vlib_buffer_free_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers) { vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 1); } -void -vlib_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +static void +vlib_buffer_free_no_next_internal (vlib_main_t * vm, u32 * buffers, + u32 n_buffers) { vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 0); } -#if DPDK == 0 /* Copy template packet data into buffers as they are allocated. */ -static void +static void __attribute__ ((unused)) vlib_packet_template_buffer_init (vlib_main_t * vm, vlib_buffer_free_list_t * fl, u32 * buffers, u32 n_buffers) @@ -1352,7 +1076,6 @@ vlib_packet_template_buffer_init (vlib_main_t * vm, b->current_length); } } -#endif void vlib_packet_template_init (vlib_main_t * vm, @@ -1362,28 +1085,22 @@ vlib_packet_template_init (vlib_main_t * vm, uword min_n_buffers_each_physmem_alloc, char *fmt, ...) { -#if DPDK > 0 + vlib_buffer_main_t *bm = vm->buffer_main; va_list va; __attribute__ ((unused)) u8 *name; + vlib_buffer_free_list_t *fl; va_start (va, fmt); name = va_format (0, fmt, &va); va_end (va); - vlib_worker_thread_barrier_sync (vm); - memset (t, 0, sizeof (t[0])); - - vec_add (t->packet_data, packet_data, n_packet_data_bytes); + if (bm->cb.vlib_packet_template_init_cb) + bm->cb.vlib_packet_template_init_cb (vm, (void *) t, packet_data, + n_packet_data_bytes, + min_n_buffers_each_physmem_alloc, + name); - vlib_worker_thread_barrier_release (vm); -#else - vlib_buffer_free_list_t *fl; - va_list va; - u8 *name; - - va_start (va, fmt); - name = va_format (0, fmt, &va); - va_end (va); + vlib_worker_thread_barrier_sync (vm); memset (t, 0, sizeof (t[0])); @@ -1406,7 +1123,7 @@ vlib_packet_template_init (vlib_main_t * vm, fl->buffer_init_template.current_data = 0; fl->buffer_init_template.current_length = n_packet_data_bytes; fl->buffer_init_template.flags = 0; -#endif + vlib_worker_thread_barrier_release (vm); } void * @@ -1429,7 +1146,6 @@ vlib_packet_template_get_packet (vlib_main_t * vm, return b->data; } -#if DPDK == 0 void vlib_packet_template_get_packet_helper (vlib_main_t * vm, vlib_packet_template_t * t) @@ -1447,7 +1163,6 @@ vlib_packet_template_get_packet_helper (vlib_main_t * vm, _vec_len (t->free_buffers) = n_alloc; } -#endif /* Append given data to end of buffer, possibly allocating new buffers. */ u32 vlib_buffer_add_data (vlib_main_t * vm, @@ -1541,328 +1256,11 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm, return copied; } -#if DPDK > 0 -clib_error_t * -vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, - unsigned socket_id) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_physmem_main_t *vpm = &vm->physmem_main; - struct rte_mempool *rmp; - int i; - - if (!rte_pktmbuf_pool_create) - return clib_error_return (0, "not linked with DPDK"); - - vec_validate_aligned (bm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); - - /* pool already exists, nothing to do */ - if (bm->pktmbuf_pools[socket_id]) - return 0; - - u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); - - rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ - num_mbufs, /* number of mbufs */ - 512, /* cache size */ - VLIB_BUFFER_HDR_SIZE, /* priv size */ - VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ - socket_id); /* cpu socket */ - - if (rmp) - { - { - uword this_pool_end; - uword this_pool_start; - uword this_pool_size; - uword save_vpm_start, save_vpm_end, save_vpm_size; - struct rte_mempool_memhdr *memhdr; - - this_pool_start = ~0ULL; - this_pool_end = 0LL; - - STAILQ_FOREACH (memhdr, &rmp->mem_list, next) - { - if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) - this_pool_end = (uword) (memhdr->addr + memhdr->len); - if (((uword) memhdr->addr) < this_pool_start) - this_pool_start = (uword) (memhdr->addr); - } - ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); - this_pool_size = this_pool_end - this_pool_start; - - if (CLIB_DEBUG > 1) - { - clib_warning ("%s: pool start %llx pool end %llx pool size %lld", - pool_name, this_pool_start, this_pool_end, - this_pool_size); - clib_warning - ("before: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - save_vpm_start = vpm->virtual.start; - save_vpm_end = vpm->virtual.end; - save_vpm_size = vpm->virtual.size; - - if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) - vpm->virtual.start = this_pool_start; - if (this_pool_end > vpm->virtual.end) - vpm->virtual.end = this_pool_end; - - vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; - - if (CLIB_DEBUG > 1) - { - clib_warning - ("after: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - /* check if fits into buffer index range */ - if ((u64) vpm->virtual.size > - ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) - { - clib_warning ("physmem: virtual size out of range!"); - vpm->virtual.start = save_vpm_start; - vpm->virtual.end = save_vpm_end; - vpm->virtual.size = save_vpm_size; - rmp = 0; - } - } - if (rmp) - { - bm->pktmbuf_pools[socket_id] = rmp; - vec_free (pool_name); - return 0; - } - } - - vec_free (pool_name); - - /* no usable pool for this socket, try to use pool from another one */ - for (i = 0; i < vec_len (bm->pktmbuf_pools); i++) - { - if (bm->pktmbuf_pools[i]) - { - clib_warning - ("WARNING: Failed to allocate mempool for CPU socket %u. " - "Threads running on socket %u will use socket %u mempool.", - socket_id, socket_id, i); - bm->pktmbuf_pools[socket_id] = bm->pktmbuf_pools[i]; - return 0; - } - } - - return clib_error_return (0, "failed to allocate mempool on socket %u", - socket_id); -} -#endif - -static void -vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s) -{ - vlib_main_t *vm; - vlib_serialize_buffer_main_t *sm; - uword n, n_bytes_to_write; - vlib_buffer_t *last; - - n_bytes_to_write = s->current_buffer_index; - sm = - uword_to_pointer (s->data_function_opaque, - vlib_serialize_buffer_main_t *); - vm = sm->vlib_main; - - ASSERT (sm->tx.max_n_data_bytes_per_chain > 0); - if (serialize_stream_is_end_of_stream (s) - || sm->tx.n_total_data_bytes + n_bytes_to_write > - sm->tx.max_n_data_bytes_per_chain) - { - vlib_process_t *p = vlib_get_current_process (vm); - - last = vlib_get_buffer (vm, sm->last_buffer); - last->current_length = n_bytes_to_write; - - vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index, - sm->first_buffer); - - sm->first_buffer = sm->last_buffer = ~0; - sm->tx.n_total_data_bytes = 0; - } - - else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0) - { - ASSERT (sm->first_buffer == ~0); - ASSERT (sm->last_buffer == ~0); - n = - vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1, - sm->tx.free_list_index); - if (n != 1) - serialize_error (m, - clib_error_create - ("vlib_buffer_alloc_from_free_list fails")); - sm->last_buffer = sm->first_buffer; - s->n_buffer_bytes = - vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index); - } - - if (n_bytes_to_write > 0) - { - vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer); - n = - vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1, - sm->tx.free_list_index); - if (n != 1) - serialize_error (m, - clib_error_create - ("vlib_buffer_alloc_from_free_list fails")); - sm->tx.n_total_data_bytes += n_bytes_to_write; - prev->current_length = n_bytes_to_write; - prev->next_buffer = sm->last_buffer; - prev->flags |= VLIB_BUFFER_NEXT_PRESENT; - } - - if (sm->last_buffer != ~0) - { - last = vlib_get_buffer (vm, sm->last_buffer); - s->buffer = vlib_buffer_get_current (last); - s->current_buffer_index = 0; - ASSERT (last->current_data == s->current_buffer_index); - } -} - -static void -vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s) -{ - vlib_main_t *vm; - vlib_serialize_buffer_main_t *sm; - vlib_buffer_t *last; - - sm = - uword_to_pointer (s->data_function_opaque, - vlib_serialize_buffer_main_t *); - vm = sm->vlib_main; - - if (serialize_stream_is_end_of_stream (s)) - return; - - if (sm->last_buffer != ~0) - { - last = vlib_get_buffer (vm, sm->last_buffer); - - if (last->flags & VLIB_BUFFER_NEXT_PRESENT) - sm->last_buffer = last->next_buffer; - else - { - vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1); - sm->first_buffer = sm->last_buffer = ~0; - } - } - - if (sm->last_buffer == ~0) - { - while (clib_fifo_elts (sm->rx.buffer_fifo) == 0) - { - sm->rx.ready_one_time_event = - vlib_process_create_one_time_event (vm, vlib_current_process (vm), - ~0); - vlib_process_wait_for_one_time_event (vm, /* no event data */ 0, - sm->rx.ready_one_time_event); - } - - clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer); - sm->last_buffer = sm->first_buffer; - } - - ASSERT (sm->last_buffer != ~0); - - last = vlib_get_buffer (vm, sm->last_buffer); - s->current_buffer_index = 0; - s->buffer = vlib_buffer_get_current (last); - s->n_buffer_bytes = last->current_length; -} - -static void -serialize_open_vlib_helper (serialize_main_t * m, - vlib_main_t * vm, - vlib_serialize_buffer_main_t * sm, uword is_read) -{ - /* Initialize serialize main but save overflow buffer for re-use between calls. */ - { - u8 *save = m->stream.overflow_buffer; - memset (m, 0, sizeof (m[0])); - m->stream.overflow_buffer = save; - if (save) - _vec_len (save) = 0; - } - - sm->first_buffer = sm->last_buffer = ~0; - if (is_read) - clib_fifo_reset (sm->rx.buffer_fifo); - else - sm->tx.n_total_data_bytes = 0; - sm->vlib_main = vm; - m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx; - m->stream.data_function_opaque = pointer_to_uword (sm); -} - -void -serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, - vlib_serialize_buffer_main_t * sm) -{ - serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0); -} - -void -unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, - vlib_serialize_buffer_main_t * sm) -{ - serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1); -} - -u32 -serialize_close_vlib_buffer (serialize_main_t * m) -{ - vlib_serialize_buffer_main_t *sm - = uword_to_pointer (m->stream.data_function_opaque, - vlib_serialize_buffer_main_t *); - vlib_buffer_t *last; - serialize_stream_t *s = &m->stream; - - last = vlib_get_buffer (sm->vlib_main, sm->last_buffer); - last->current_length = s->current_buffer_index; - - if (vec_len (s->overflow_buffer) > 0) - { - sm->last_buffer - = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index, - sm->last_buffer == ~0 ? 0 : sm->last_buffer, - s->overflow_buffer, - vec_len (s->overflow_buffer)); - _vec_len (s->overflow_buffer) = 0; - } - - return sm->first_buffer; -} - -void -unserialize_close_vlib_buffer (serialize_main_t * m) -{ - vlib_serialize_buffer_main_t *sm - = uword_to_pointer (m->stream.data_function_opaque, - vlib_serialize_buffer_main_t *); - if (sm->first_buffer != ~0) - vlib_buffer_free_one (sm->vlib_main, sm->first_buffer); - clib_fifo_reset (sm->rx.buffer_fifo); - if (m->stream.overflow_buffer) - _vec_len (m->stream.overflow_buffer) = 0; -} static u8 * format_vlib_buffer_free_list (u8 * s, va_list * va) { vlib_buffer_free_list_t *f = va_arg (*va, vlib_buffer_free_list_t *); -#if DPDK > 0 u32 threadnum = va_arg (*va, u32); uword bytes_alloc, bytes_free, n_free, size; @@ -1877,21 +1275,6 @@ format_vlib_buffer_free_list (u8 * s, va_list * va) bytes_free = size * n_free; s = format (s, "%7d%30s%12d%12d%=12U%=12U%=12d%=12d", threadnum, -#else - uword bytes_alloc, bytes_free, n_free, size; - - if (!f) - return format (s, "%=30s%=12s%=12s%=12s%=12s%=12s%=12s", - "Name", "Index", "Size", "Alloc", "Free", "#Alloc", - "#Free"); - - size = sizeof (vlib_buffer_t) + f->n_data_bytes; - n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers); - bytes_alloc = size * f->n_alloc; - bytes_free = size * n_free; - - s = format (s, "%30s%12d%12d%=12U%=12U%=12d%=12d", -#endif f->name, f->index, f->n_data_bytes, format_memory_size, bytes_alloc, format_memory_size, bytes_free, f->n_alloc, n_free); @@ -1903,7 +1286,6 @@ static clib_error_t * show_buffers (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { -#if DPDK > 0 vlib_buffer_main_t *bm; vlib_buffer_free_list_t *f; vlib_main_t *curr_vm; @@ -1926,18 +1308,6 @@ show_buffers (vlib_main_t * vm, } while (vm_index < vec_len (vlib_mains)); -#else - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - - vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0); - /* *INDENT-OFF* */ - pool_foreach (f, bm->buffer_free_list_pool, ({ - vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f); - })); -/* *INDENT-ON* */ - -#endif return 0; } @@ -1949,34 +1319,38 @@ VLIB_CLI_COMMAND (show_buffers_command, static) = { }; /* *INDENT-ON* */ -#if DPDK > 0 -#if CLIB_DEBUG > 0 - -u32 *vlib_buffer_state_validation_lock; -uword *vlib_buffer_state_validation_hash; -void *vlib_buffer_state_heap; - -static clib_error_t * -buffer_state_validation_init (vlib_main_t * vm) +void +vlib_buffer_cb_init (struct vlib_main_t *vm) { - void *oldheap; - - vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); - - oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + vlib_buffer_main_t *bm = vm->buffer_main; + bm->cb.vlib_buffer_alloc_cb = &vlib_buffer_alloc_internal; + bm->cb.vlib_buffer_alloc_from_free_list_cb = + &vlib_buffer_alloc_from_free_list_internal; + bm->cb.vlib_buffer_free_cb = &vlib_buffer_free_internal; + bm->cb.vlib_buffer_free_no_next_cb = &vlib_buffer_free_no_next_internal; + bm->cb.vlib_buffer_delete_free_list_cb = + &vlib_buffer_delete_free_list_internal; + bm->extern_buffer_mgmt = 0; +} - vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); - vec_validate_aligned (vlib_buffer_state_validation_lock, 0, - CLIB_CACHE_LINE_BYTES); - clib_mem_set_heap (oldheap); +int +vlib_buffer_cb_register (struct vlib_main_t *vm, vlib_buffer_callbacks_t * cb) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + if (bm->extern_buffer_mgmt) + return -1; + +#define _(x) bm->cb.x = cb->x + _(vlib_buffer_alloc_cb); + _(vlib_buffer_alloc_from_free_list_cb); + _(vlib_buffer_free_cb); + _(vlib_buffer_free_no_next_cb); + _(vlib_buffer_delete_free_list_cb); +#undef _ + bm->extern_buffer_mgmt = 1; return 0; } -VLIB_INIT_FUNCTION (buffer_state_validation_init); -#endif -#endif - - /** @endcond */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 5f1e62f08c9..d270c08a602 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -46,15 +46,9 @@ #include #include /* for vlib_error_t */ -#if DPDK > 0 -#include -#define VLIB_BUFFER_DATA_SIZE (2048) -#define VLIB_BUFFER_PRE_DATA_SIZE RTE_PKTMBUF_HEADROOM -#else #include /* for __PRE_DATA_SIZE */ -#define VLIB_BUFFER_DATA_SIZE (512) +#define VLIB_BUFFER_DATA_SIZE (2048) #define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE -#endif #if defined (CLIB_HAVE_VEC128) || defined (__aarch64__) typedef u8x16 vlib_copy_unit_t; @@ -296,6 +290,27 @@ typedef struct vlib_buffer_free_list_t uword buffer_init_function_opaque; } __attribute__ ((aligned (16))) vlib_buffer_free_list_t; +typedef struct +{ + u32 (*vlib_buffer_alloc_cb) (struct vlib_main_t * vm, u32 * buffers, + u32 n_buffers); + u32 (*vlib_buffer_alloc_from_free_list_cb) (struct vlib_main_t * vm, + u32 * buffers, u32 n_buffers, + u32 free_list_index); + void (*vlib_buffer_free_cb) (struct vlib_main_t * vm, u32 * buffers, + u32 n_buffers); + void (*vlib_buffer_free_no_next_cb) (struct vlib_main_t * vm, u32 * buffers, + u32 n_buffers); + void (*vlib_packet_template_init_cb) (struct vlib_main_t * vm, void *t, + void *packet_data, + uword n_packet_data_bytes, + uword + min_n_buffers_each_physmem_alloc, + u8 * name); + void (*vlib_buffer_delete_free_list_cb) (struct vlib_main_t * vm, + u32 free_list_index); +} vlib_buffer_callbacks_t; + typedef struct { /* Buffer free callback, for subversive activities */ @@ -323,12 +338,15 @@ typedef struct /* List of free-lists needing Blue Light Special announcements */ vlib_buffer_free_list_t **announce_list; - /* Vector of rte_mempools per socket */ -#if DPDK == 1 - struct rte_mempool **pktmbuf_pools; -#endif + /* Callbacks */ + vlib_buffer_callbacks_t cb; + int extern_buffer_mgmt; } vlib_buffer_main_t; +void vlib_buffer_cb_init (struct vlib_main_t *vm); +int vlib_buffer_cb_register (struct vlib_main_t *vm, + vlib_buffer_callbacks_t * cb); + typedef struct { struct vlib_main_t *vlib_main; @@ -385,11 +403,6 @@ serialize_vlib_buffer_n_bytes (serialize_main_t * m) vec_len (s->overflow_buffer); } -#if DPDK > 0 -#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) -#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) -#endif - /* */ diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 75716eca7f6..15d93c1657f 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -195,8 +195,6 @@ do { \ } while (0) #endif -#if DPDK == 0 - typedef enum { /* Index is unknown. */ @@ -232,8 +230,6 @@ vlib_buffer_set_known_state (vlib_main_t * vm, u8 *vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index, uword follow_chain); -#endif /* DPDK == 0 */ - clib_error_t *vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, unsigned socket_id); @@ -245,7 +241,15 @@ clib_error_t *vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, @return - (u32) number of buffers actually allocated, may be less than the number requested or zero */ -u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers); +always_inline u32 +vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_alloc_cb); + + return bm->cb.vlib_buffer_alloc_cb (vm, buffers, n_buffers); +} always_inline u32 vlib_buffer_round_size (u32 size) @@ -261,9 +265,18 @@ vlib_buffer_round_size (u32 size) @return - (u32) number of buffers actually allocated, may be less than the number requested or zero */ -u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, u32 free_list_index); +always_inline u32 +vlib_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_alloc_from_free_list_cb); + + return bm->cb.vlib_buffer_alloc_from_free_list_cb (vm, buffers, n_buffers, + free_list_index); +} /** \brief Free buffers Frees the entire buffer chain for each buffer @@ -273,11 +286,19 @@ u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm, @param n_buffers - (u32) number of buffers to free */ -void vlib_buffer_free (vlib_main_t * vm, - /* pointer to first buffer */ - u32 * buffers, - /* number of buffers to free */ - u32 n_buffers); +always_inline void +vlib_buffer_free (vlib_main_t * vm, + /* pointer to first buffer */ + u32 * buffers, + /* number of buffers to free */ + u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_free_cb); + + return bm->cb.vlib_buffer_free_cb (vm, buffers, n_buffers); +} /** \brief Free buffers, does not free the buffer chain for each buffer @@ -286,11 +307,19 @@ void vlib_buffer_free (vlib_main_t * vm, @param n_buffers - (u32) number of buffers to free */ -void vlib_buffer_free_no_next (vlib_main_t * vm, - /* pointer to first buffer */ - u32 * buffers, - /* number of buffers to free */ - u32 n_buffers); +always_inline void +vlib_buffer_free_no_next (vlib_main_t * vm, + /* pointer to first buffer */ + u32 * buffers, + /* number of buffers to free */ + u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_free_no_next_cb); + + return bm->cb.vlib_buffer_free_no_next_cb (vm, buffers, n_buffers); +} /** \brief Free one buffer Shorthand to free a single buffer chain. @@ -307,7 +336,15 @@ vlib_buffer_free_one (vlib_main_t * vm, u32 buffer_index) /* Add/delete buffer free lists. */ u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, char *fmt, ...); -void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index); +always_inline void +vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + ASSERT (bm->cb.vlib_buffer_delete_free_list_cb); + + bm->cb.vlib_buffer_delete_free_list_cb (vm, free_list_index); +} /* Find already existing public free list with given size or create one. */ u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, @@ -453,11 +490,6 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b) return fd; } -/* - * vlib_buffer_chain_* functions provide a way to create long buffers. - * When DPDK is enabled, the 'hidden' DPDK header is taken care of transparently. - */ - /* Initializes the buffer as an empty packet with no chained buffers. */ always_inline void vlib_buffer_chain_init (vlib_buffer_t * first) @@ -537,8 +569,6 @@ typedef struct /* Vector of packet data. */ u8 *packet_data; - /* Note: the next three fields are unused if DPDK == 1 */ - /* Number of buffers to allocate in each call to physmem allocator. */ u32 min_n_buffers_each_physmem_alloc; diff --git a/src/vlib/buffer_serialize.c b/src/vlib/buffer_serialize.c new file mode 100644 index 00000000000..96a5f0a0da8 --- /dev/null +++ b/src/vlib/buffer_serialize.c @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.c: allocate/free network buffers. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +static void +vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s) +{ + vlib_main_t *vm; + vlib_serialize_buffer_main_t *sm; + uword n, n_bytes_to_write; + vlib_buffer_t *last; + + n_bytes_to_write = s->current_buffer_index; + sm = + uword_to_pointer (s->data_function_opaque, + vlib_serialize_buffer_main_t *); + vm = sm->vlib_main; + + ASSERT (sm->tx.max_n_data_bytes_per_chain > 0); + if (serialize_stream_is_end_of_stream (s) + || sm->tx.n_total_data_bytes + n_bytes_to_write > + sm->tx.max_n_data_bytes_per_chain) + { + vlib_process_t *p = vlib_get_current_process (vm); + + last = vlib_get_buffer (vm, sm->last_buffer); + last->current_length = n_bytes_to_write; + + vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index, + sm->first_buffer); + + sm->first_buffer = sm->last_buffer = ~0; + sm->tx.n_total_data_bytes = 0; + } + + else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0) + { + ASSERT (sm->first_buffer == ~0); + ASSERT (sm->last_buffer == ~0); + n = + vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1, + sm->tx.free_list_index); + if (n != 1) + serialize_error (m, + clib_error_create + ("vlib_buffer_alloc_from_free_list fails")); + sm->last_buffer = sm->first_buffer; + s->n_buffer_bytes = + vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index); + } + + if (n_bytes_to_write > 0) + { + vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer); + n = + vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1, + sm->tx.free_list_index); + if (n != 1) + serialize_error (m, + clib_error_create + ("vlib_buffer_alloc_from_free_list fails")); + sm->tx.n_total_data_bytes += n_bytes_to_write; + prev->current_length = n_bytes_to_write; + prev->next_buffer = sm->last_buffer; + prev->flags |= VLIB_BUFFER_NEXT_PRESENT; + } + + if (sm->last_buffer != ~0) + { + last = vlib_get_buffer (vm, sm->last_buffer); + s->buffer = vlib_buffer_get_current (last); + s->current_buffer_index = 0; + ASSERT (last->current_data == s->current_buffer_index); + } +} + +static void +vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s) +{ + vlib_main_t *vm; + vlib_serialize_buffer_main_t *sm; + vlib_buffer_t *last; + + sm = + uword_to_pointer (s->data_function_opaque, + vlib_serialize_buffer_main_t *); + vm = sm->vlib_main; + + if (serialize_stream_is_end_of_stream (s)) + return; + + if (sm->last_buffer != ~0) + { + last = vlib_get_buffer (vm, sm->last_buffer); + + if (last->flags & VLIB_BUFFER_NEXT_PRESENT) + sm->last_buffer = last->next_buffer; + else + { + vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1); + sm->first_buffer = sm->last_buffer = ~0; + } + } + + if (sm->last_buffer == ~0) + { + while (clib_fifo_elts (sm->rx.buffer_fifo) == 0) + { + sm->rx.ready_one_time_event = + vlib_process_create_one_time_event (vm, vlib_current_process (vm), + ~0); + vlib_process_wait_for_one_time_event (vm, /* no event data */ 0, + sm->rx.ready_one_time_event); + } + + clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer); + sm->last_buffer = sm->first_buffer; + } + + ASSERT (sm->last_buffer != ~0); + + last = vlib_get_buffer (vm, sm->last_buffer); + s->current_buffer_index = 0; + s->buffer = vlib_buffer_get_current (last); + s->n_buffer_bytes = last->current_length; +} + +static void +serialize_open_vlib_helper (serialize_main_t * m, + vlib_main_t * vm, + vlib_serialize_buffer_main_t * sm, uword is_read) +{ + /* Initialize serialize main but save overflow buffer for re-use between calls. */ + { + u8 *save = m->stream.overflow_buffer; + memset (m, 0, sizeof (m[0])); + m->stream.overflow_buffer = save; + if (save) + _vec_len (save) = 0; + } + + sm->first_buffer = sm->last_buffer = ~0; + if (is_read) + clib_fifo_reset (sm->rx.buffer_fifo); + else + sm->tx.n_total_data_bytes = 0; + sm->vlib_main = vm; + m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx; + m->stream.data_function_opaque = pointer_to_uword (sm); +} + +void +serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, + vlib_serialize_buffer_main_t * sm) +{ + serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0); +} + +void +unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, + vlib_serialize_buffer_main_t * sm) +{ + serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1); +} + +u32 +serialize_close_vlib_buffer (serialize_main_t * m) +{ + vlib_serialize_buffer_main_t *sm + = uword_to_pointer (m->stream.data_function_opaque, + vlib_serialize_buffer_main_t *); + vlib_buffer_t *last; + serialize_stream_t *s = &m->stream; + + last = vlib_get_buffer (sm->vlib_main, sm->last_buffer); + last->current_length = s->current_buffer_index; + + if (vec_len (s->overflow_buffer) > 0) + { + sm->last_buffer + = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index, + sm->last_buffer == ~0 ? 0 : sm->last_buffer, + s->overflow_buffer, + vec_len (s->overflow_buffer)); + _vec_len (s->overflow_buffer) = 0; + } + + return sm->first_buffer; +} + +void +unserialize_close_vlib_buffer (serialize_main_t * m) +{ + vlib_serialize_buffer_main_t *sm + = uword_to_pointer (m->stream.data_function_opaque, + vlib_serialize_buffer_main_t *); + if (sm->first_buffer != ~0) + vlib_buffer_free_one (sm->vlib_main, sm->first_buffer); + clib_fifo_reset (sm->rx.buffer_fifo); + if (m->stream.overflow_buffer) + _vec_len (m->stream.overflow_buffer) = 0; +} + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/main.c b/src/vlib/main.c index 6c6cad98bba..09f34bbd04a 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -465,7 +465,7 @@ vlib_put_next_frame (vlib_main_t * vm, vlib_frame_t *f; u32 n_vectors_in_frame; - if (DPDK == 0 && CLIB_DEBUG > 0) + if (vm->buffer_main->extern_buffer_mgmt == 0 && CLIB_DEBUG > 0) vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left); nf = vlib_node_runtime_get_next_frame (vm, r, next_index); @@ -1012,8 +1012,8 @@ dispatch_node (vlib_main_t * vm, /* When in interrupt mode and vector rate crosses threshold switch to polling mode. */ - if ((DPDK == 0 && dispatch_state == VLIB_NODE_STATE_INTERRUPT) - || (DPDK == 0 && dispatch_state == VLIB_NODE_STATE_POLLING + if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT) + || (dispatch_state == VLIB_NODE_STATE_POLLING && (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))) { @@ -1615,6 +1615,7 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) vm->name = "VLIB"; vec_validate (vm->buffer_main, 0); + vlib_buffer_cb_init (vm); if ((error = vlib_thread_init (vm))) { diff --git a/src/vlib/threads.c b/src/vlib/threads.c index c5e58bc001a..b3bbd30ee10 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -22,29 +22,10 @@ #include #include - -#if DPDK==1 -#include -#include -#include -#include -#include -#endif DECLARE_CJ_GLOBAL_LOG; #define FRAME_QUEUE_NELTS 32 - -#if DPDK==1 -/* - * Weak definitions of DPDK symbols used in this file. - * Needed for linking test programs without DPDK libs. - */ -unsigned __thread __attribute__ ((weak)) RTE_PER_LCORE (_lcore_id); -struct lcore_config __attribute__ ((weak)) lcore_config[]; -unsigned __attribute__ ((weak)) rte_socket_id (); -int __attribute__ ((weak)) rte_eal_remote_launch (); -#endif u32 vl (void *p) { @@ -194,14 +175,17 @@ vlib_thread_init (vlib_main_t * vm) tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1); /* pin main thread to main_lcore */ -#if DPDK==0 - { - cpu_set_t cpuset; - CPU_ZERO (&cpuset); - CPU_SET (tm->main_lcore, &cpuset); - pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset); - } -#endif + if (tm->cb.vlib_thread_set_lcore_cb) + { + tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore); + } + else + { + cpu_set_t cpuset; + CPU_ZERO (&cpuset); + CPU_SET (tm->main_lcore, &cpuset); + pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset); + } /* as many threads as stacks... */ vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1, @@ -520,32 +504,29 @@ vlib_worker_thread_bootstrap_fn (void *arg) return rv; } -static int -vlib_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) +static clib_error_t * +vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) { + vlib_thread_main_t *tm = &vlib_thread_main; void *(*fp_arg) (void *) = fp; w->lcore_id = lcore_id; -#if DPDK==1 - if (!w->registration->use_pthreads) - if (rte_eal_remote_launch) /* do we have dpdk linked */ - return rte_eal_remote_launch (fp, (void *) w, lcore_id); - else - return -1; + if (tm->cb.vlib_launch_thread_cb && !w->registration->use_pthreads) + return tm->cb.vlib_launch_thread_cb (fp, (void *) w, lcore_id); else -#endif { - int ret; pthread_t worker; cpu_set_t cpuset; CPU_ZERO (&cpuset); CPU_SET (lcore_id, &cpuset); - ret = pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w); - if (ret == 0) - return pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset); - else - return ret; + if (pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w)) + return clib_error_return_unix (0, "pthread_create"); + + if (pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset)) + return clib_error_return_unix (0, "pthread_setaffinity_np"); + + return 0; } } @@ -769,6 +750,7 @@ start_workers (vlib_main_t * vm) for (i = 0; i < vec_len (tm->registrations); i++) { + clib_error_t *err; int j; tr = tm->registrations[i]; @@ -778,22 +760,24 @@ start_workers (vlib_main_t * vm) for (j = 0; j < tr->count; j++) { w = vlib_worker_threads + worker_thread_index++; - if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, 0) < - 0) - clib_warning ("Couldn't start '%s' pthread ", tr->name); + err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn, + w, 0); + if (err) + clib_error_report (err); } } else { uword c; - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tr->coremask, ({ - w = vlib_worker_threads + worker_thread_index++; - if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, c) < 0) - clib_warning ("Couldn't start DPDK lcore %d", c); - - })); -/* *INDENT-ON* */ + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tr->coremask, ({ + w = vlib_worker_threads + worker_thread_index++; + err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn, + w, c); + if (err) + clib_error_report (err); + })); + /* *INDENT-ON* */ } } vlib_worker_thread_barrier_sync (vm); @@ -1105,7 +1089,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) { tm->n_thread_stacks += tr->count; tm->n_pthreads += tr->count * tr->use_pthreads; - tm->n_eal_threads += tr->count * (tr->use_pthreads == 0); + tm->n_threads += tr->count * (tr->use_pthreads == 0); tr = tr->next; } @@ -1423,6 +1407,7 @@ void vlib_worker_thread_fn (void *arg) { vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_thread_main_t *tm = vlib_get_thread_main (); vlib_main_t *vm = vlib_get_main (); ASSERT (vm->cpu_index == os_get_cpu_number ()); @@ -1431,12 +1416,9 @@ vlib_worker_thread_fn (void *arg) clib_time_init (&vm->clib_time); clib_mem_set_heap (w->thread_mheap); -#if DPDK > 0 /* Wait until the dpdk init sequence is complete */ - vlib_thread_main_t *tm = vlib_get_thread_main (); - while (tm->worker_thread_release == 0) + while (tm->extern_thread_mgmt && tm->worker_thread_release == 0) vlib_worker_thread_barrier_check (); -#endif vlib_worker_thread_internal (vm); } @@ -1475,6 +1457,20 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts) return (fqm - tm->frame_queue_mains); } + +int +vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + + if (tm->extern_thread_mgmt) + return -1; + + tm->cb.vlib_launch_thread_cb = cb->vlib_launch_thread_cb; + tm->extern_thread_mgmt = 1; + return 0; +} + clib_error_t * threads_init (vlib_main_t * vm) { diff --git a/src/vlib/threads.h b/src/vlib/threads.h index 34ab5be8650..75a5a2818fb 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -263,6 +263,13 @@ typedef enum SCHED_POLICY_N, } sched_policy_t; +typedef struct +{ + clib_error_t *(*vlib_launch_thread_cb) (void *fp, vlib_worker_thread_t * w, + unsigned lcore_id); + clib_error_t *(*vlib_thread_set_lcore_cb) (u32 thread, u16 lcore); +} vlib_thread_callbacks_t; + typedef struct { /* Link list of registrations, built by constructors */ @@ -290,8 +297,8 @@ typedef struct /* Number of pthreads */ u32 n_pthreads; - /* Number of DPDK eal threads */ - u32 n_eal_threads; + /* Number of threads */ + u32 n_threads; /* Number of cores to skip, must match the core mask */ u32 skip_cores; @@ -320,6 +327,9 @@ typedef struct /* scheduling policy priority */ u32 sched_priority; + /* callbacks */ + vlib_thread_callbacks_t cb; + int extern_thread_mgmt; } vlib_thread_main_t; extern vlib_thread_main_t vlib_thread_main; @@ -459,6 +469,9 @@ vlib_get_worker_handoff_queue_elt (u32 frame_queue_index, return elt; } +int vlib_thread_cb_register (struct vlib_main_t *vm, + vlib_thread_callbacks_t * cb); + #endif /* included_vlib_threads_h */ /* diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index ee632279db5..b64028c4cf0 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -20,14 +20,6 @@ #include #include -#if DPDK==1 -#include -#include -#include -#include -#include -#endif - static u8 * format_sched_policy_and_priority (u8 * s, va_list * args) { @@ -116,23 +108,6 @@ show_threads_fn (vlib_main_t * vm, vec_free (p); line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id); -#if DPDK==1 - ASSERT (lcore <= RTE_MAX_LCORE); - switch (lcore_config[lcore].state) - { - case WAIT: - line = format (line, "wait"); - break; - case RUNNING: - line = format (line, "running"); - break; - case FINISHED: - line = format (line, "finished"); - break; - default: - line = format (line, "unknown"); - } -#endif } else { diff --git a/src/vlib/unix/physmem.c b/src/vlib/unix/physmem.c index 80ab7b9d6f8..8d10ad2e88d 100644 --- a/src/vlib/unix/physmem.c +++ b/src/vlib/unix/physmem.c @@ -45,13 +45,13 @@ static void * unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes, uword alignment) { + vlib_main_t *vm = vlib_get_main (); physmem_main_t *pm = &physmem_main; uword lo_offset, hi_offset; uword *to_free = 0; -#if DPDK > 0 - clib_warning ("unsafe alloc!"); -#endif + if (vm->buffer_main->extern_buffer_mgmt) + clib_warning ("unsafe alloc!"); /* IO memory is always at least cache aligned. */ alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES); @@ -269,16 +269,17 @@ static clib_error_t * show_physmem (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { -#if DPDK > 0 - vlib_cli_output (vm, "Not supported with DPDK drivers."); -#else physmem_main_t *pm = &physmem_main; + if (vm->buffer_main->extern_buffer_mgmt) + { + vlib_cli_output (vm, "Not supported with external buffer management."); + return 0; + } if (pm->heap) vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 1); else vlib_cli_output (vm, "No physmem allocated."); -#endif return 0; } diff --git a/src/vnet.am b/src/vnet.am index 665a16ea0dc..47c5eda78d5 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -761,11 +761,13 @@ nobase_include_HEADERS += \ ######################################## if WITH_DPDK libvnet_la_SOURCES += \ + vnet/devices/dpdk/buffer.c \ vnet/devices/dpdk/dpdk_priv.h \ vnet/devices/dpdk/device.c \ vnet/devices/dpdk/format.c \ vnet/devices/dpdk/init.c \ vnet/devices/dpdk/node.c \ + vnet/devices/dpdk/thread.c \ vnet/devices/dpdk/hqos.c \ vnet/devices/dpdk/cli.c \ vnet/devices/dpdk/dpdk_api.c diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c new file mode 100644 index 00000000000..214a916263c --- /dev/null +++ b/src/vnet/devices/dpdk/buffer.c @@ -0,0 +1,729 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.c: allocate/free network buffers. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @file + * + * Allocate/free network buffers. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, + "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); + +#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) + +/* Make sure we have at least given number of unaligned buffers. */ +static void +fill_unaligned (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + uword n_unaligned_buffers) +{ + word la = vec_len (free_list->aligned_buffers); + word lu = vec_len (free_list->unaligned_buffers); + + /* Aligned come in aligned copy-sized chunks. */ + ASSERT (la % BUFFERS_PER_COPY == 0); + + ASSERT (la >= n_unaligned_buffers); + + while (lu < n_unaligned_buffers) + { + /* Copy 4 buffers from end of aligned vector to unaligned vector. */ + vec_add (free_list->unaligned_buffers, + free_list->aligned_buffers + la - BUFFERS_PER_COPY, + BUFFERS_PER_COPY); + la -= BUFFERS_PER_COPY; + lu += BUFFERS_PER_COPY; + } + _vec_len (free_list->aligned_buffers) = la; +} + +/* After free aligned buffers may not contain even sized chunks. */ +static void +trim_aligned (vlib_buffer_free_list_t * f) +{ + uword l, n_trim; + + /* Add unaligned to aligned before trim. */ + l = vec_len (f->unaligned_buffers); + if (l > 0) + { + vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l, + /* align */ sizeof (vlib_copy_unit_t)); + + _vec_len (f->unaligned_buffers) = 0; + } + + /* Remove unaligned buffers from end of aligned vector and save for next trim. */ + l = vec_len (f->aligned_buffers); + n_trim = l % BUFFERS_PER_COPY; + if (n_trim) + { + /* Trim aligned -> unaligned. */ + vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim); + + /* Remove from aligned. */ + _vec_len (f->aligned_buffers) = l - n_trim; + } +} + +static void +merge_free_lists (vlib_buffer_free_list_t * dst, + vlib_buffer_free_list_t * src) +{ + uword l; + u32 *d; + + trim_aligned (src); + trim_aligned (dst); + + l = vec_len (src->aligned_buffers); + if (l > 0) + { + vec_add2_aligned (dst->aligned_buffers, d, l, + /* align */ sizeof (vlib_copy_unit_t)); + clib_memcpy (d, src->aligned_buffers, l * sizeof (d[0])); + vec_free (src->aligned_buffers); + } + + l = vec_len (src->unaligned_buffers); + if (l > 0) + { + vec_add (dst->unaligned_buffers, src->unaligned_buffers, l); + vec_free (src->unaligned_buffers); + } +} + +always_inline u32 +dpdk_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + size = vlib_buffer_round_size (size); + uword *p = hash_get (bm->free_list_by_size, size); + return p ? p[0] : ~0; +} + +static void +del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) +{ + u32 i; + struct rte_mbuf *mb; + vlib_buffer_t *b; + + for (i = 0; i < vec_len (f->unaligned_buffers); i++) + { + b = vlib_get_buffer (vm, f->unaligned_buffers[i]); + mb = rte_mbuf_from_vlib_buffer (b); + ASSERT (rte_mbuf_refcnt_read (mb) == 1); + rte_pktmbuf_free (mb); + } + for (i = 0; i < vec_len (f->aligned_buffers); i++) + { + b = vlib_get_buffer (vm, f->aligned_buffers[i]); + mb = rte_mbuf_from_vlib_buffer (b); + ASSERT (rte_mbuf_refcnt_read (mb) == 1); + rte_pktmbuf_free (mb); + } + vec_free (f->name); + vec_free (f->unaligned_buffers); + vec_free (f->aligned_buffers); +} + +/* Add buffer free list. */ +static void +dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + u32 merge_index; + int i; + + ASSERT (os_get_cpu_number () == 0); + + f = vlib_buffer_get_free_list (vm, free_list_index); + + merge_index = dpdk_buffer_get_free_list_with_size (vm, f->n_data_bytes); + if (merge_index != ~0 && merge_index != free_list_index) + { + merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, + merge_index), f); + } + + del_free_list (vm, f); + + /* Poison it. */ + memset (f, 0xab, sizeof (f[0])); + + pool_put (bm->buffer_free_list_pool, f); + + for (i = 1; i < vec_len (vlib_mains); i++) + { + bm = vlib_mains[i]->buffer_main; + f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);; + memset (f, 0xab, sizeof (f[0])); + pool_put (bm->buffer_free_list_pool, f); + } +} + +/* Make sure free list has at least given number of free buffers. */ +static uword +fill_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, uword min_free_buffers) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_buffer_t *b; + int n, i; + u32 bi; + u32 n_remaining = 0, n_alloc = 0; + unsigned socket_id = rte_socket_id (); + struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; + struct rte_mbuf *mb; + + /* Too early? */ + if (PREDICT_FALSE (rmp == 0)) + return 0; + + trim_aligned (fl); + + /* Already have enough free buffers on free list? */ + n = min_free_buffers - vec_len (fl->aligned_buffers); + if (n <= 0) + return min_free_buffers; + + /* Always allocate round number of buffers. */ + n = round_pow2 (n, BUFFERS_PER_COPY); + + /* Always allocate new buffers in reasonably large sized chunks. */ + n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); + + vec_validate (vm->mbuf_alloc_list, n - 1); + + if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) + return 0; + + _vec_len (vm->mbuf_alloc_list) = n; + + for (i = 0; i < n; i++) + { + mb = vm->mbuf_alloc_list[i]; + + ASSERT (rte_mbuf_refcnt_read (mb) == 0); + rte_mbuf_refcnt_set (mb, 1); + + b = vlib_buffer_from_rte_mbuf (mb); + bi = vlib_get_buffer_index (vm, b); + + vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t)); + n_alloc++; + n_remaining--; + + vlib_buffer_init_for_free_list (b, fl); + + if (fl->buffer_init_function) + fl->buffer_init_function (vm, fl, &bi, 1); + } + + fl->n_alloc += n; + + return n; +} + +always_inline uword +copy_alignment (u32 * x) +{ + return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; +} + +static u32 +alloc_from_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + u32 * alloc_buffers, u32 n_alloc_buffers) +{ + u32 *dst, *u_src; + uword u_len, n_left; + uword n_unaligned_start, n_unaligned_end, n_filled; + + n_left = n_alloc_buffers; + dst = alloc_buffers; + n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst)) + & (BUFFERS_PER_COPY - 1)); + + n_filled = fill_free_list (vm, free_list, n_alloc_buffers); + if (n_filled == 0) + return 0; + + n_left = n_filled < n_left ? n_filled : n_left; + n_alloc_buffers = n_left; + + if (n_unaligned_start >= n_left) + { + n_unaligned_start = n_left; + n_unaligned_end = 0; + } + else + n_unaligned_end = copy_alignment (dst + n_alloc_buffers); + + fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end); + + u_len = vec_len (free_list->unaligned_buffers); + u_src = free_list->unaligned_buffers + u_len - 1; + + if (n_unaligned_start) + { + uword n_copy = n_unaligned_start; + if (n_copy > n_left) + n_copy = n_left; + n_left -= n_copy; + + while (n_copy > 0) + { + *dst++ = *u_src--; + n_copy--; + u_len--; + } + + /* Now dst should be aligned. */ + if (n_left > 0) + ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0); + } + + /* Aligned copy. */ + { + vlib_copy_unit_t *d, *s; + uword n_copy; + + if (vec_len (free_list->aligned_buffers) < + ((n_left / BUFFERS_PER_COPY) * BUFFERS_PER_COPY)) + abort (); + + n_copy = n_left / BUFFERS_PER_COPY; + n_left = n_left % BUFFERS_PER_COPY; + + /* Remove buffers from aligned free list. */ + _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY; + + s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers); + d = (vlib_copy_unit_t *) dst; + + /* Fast path loop. */ + while (n_copy >= 4) + { + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + n_copy -= 4; + s += 4; + d += 4; + } + + while (n_copy >= 1) + { + d[0] = s[0]; + n_copy -= 1; + s += 1; + d += 1; + } + + dst = (void *) d; + } + + /* Unaligned copy. */ + ASSERT (n_unaligned_end == n_left); + while (n_left > 0) + { + *dst++ = *u_src--; + n_left--; + u_len--; + } + + if (!free_list->unaligned_buffers) + ASSERT (u_len == 0); + else + _vec_len (free_list->unaligned_buffers) = u_len; + + return n_alloc_buffers; +} + +/* Allocate a given number of buffers into given array. + Returns number actually allocated which will be either zero or + number requested. */ +u32 +dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + return alloc_from_free_list + (vm, + pool_elt_at_index (bm->buffer_free_list_pool, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), + buffers, n_buffers); +} + + +u32 +dpdk_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + return alloc_from_free_list (vm, f, buffers, n_buffers); +} + +always_inline void +add_buffer_to_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * f, + u32 buffer_index, u8 do_init) +{ + vlib_buffer_t *b; + b = vlib_get_buffer (vm, buffer_index); + if (PREDICT_TRUE (do_init)) + vlib_buffer_init_for_free_list (b, f); + vec_add1_aligned (f->aligned_buffers, buffer_index, + sizeof (vlib_copy_unit_t)); +} + +always_inline vlib_buffer_free_list_t * +buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + u32 i; + + *index = i = b->free_list_index; + return pool_elt_at_index (bm->buffer_free_list_pool, i); +} + +static_always_inline void +vlib_buffer_free_inline (vlib_main_t * vm, + u32 * buffers, u32 n_buffers, u32 follow_buffer_next) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *fl; + u32 fi; + int i; + u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u32 follow_buffer_next); + + cb = bm->buffer_free_callback; + + if (PREDICT_FALSE (cb != 0)) + n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); + + if (!n_buffers) + return; + + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_t *b; + struct rte_mbuf *mb; + + b = vlib_get_buffer (vm, buffers[i]); + + fl = buffer_get_free_list (vm, b, &fi); + + /* The only current use of this callback: multicast recycle */ + if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) + { + int j; + + add_buffer_to_free_list + (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); + + for (j = 0; j < vec_len (bm->announce_list); j++) + { + if (fl == bm->announce_list[j]) + goto already_announced; + } + vec_add1 (bm->announce_list, fl); + already_announced: + ; + } + else + { + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) + { + mb = rte_mbuf_from_vlib_buffer (b); + ASSERT (rte_mbuf_refcnt_read (mb) == 1); + rte_pktmbuf_free (mb); + } + } + } + if (vec_len (bm->announce_list)) + { + vlib_buffer_free_list_t *fl; + for (i = 0; i < vec_len (bm->announce_list); i++) + { + fl = bm->announce_list[i]; + fl->buffers_added_to_freelist_function (vm, fl); + } + _vec_len (bm->announce_list) = 0; + } +} + +static void +dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 1); +} + +static void +dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 0); +} + +static void +dpdk_packet_template_init (vlib_main_t * vm, + void *vt, + void *packet_data, + uword n_packet_data_bytes, + uword min_n_buffers_each_physmem_alloc, u8 * name) +{ + vlib_packet_template_t *t = (vlib_packet_template_t *) vt; + + vlib_worker_thread_barrier_sync (vm); + memset (t, 0, sizeof (t[0])); + + vec_add (t->packet_data, packet_data, n_packet_data_bytes); + + vlib_worker_thread_barrier_release (vm); +} + +clib_error_t * +vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, + unsigned socket_id) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_physmem_main_t *vpm = &vm->physmem_main; + struct rte_mempool *rmp; + int i; + + vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); + + /* pool already exists, nothing to do */ + if (dm->pktmbuf_pools[socket_id]) + return 0; + + u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); + + rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ + num_mbufs, /* number of mbufs */ + 512, /* cache size */ + VLIB_BUFFER_HDR_SIZE, /* priv size */ + VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ + socket_id); /* cpu socket */ + + if (rmp) + { + { + uword this_pool_end; + uword this_pool_start; + uword this_pool_size; + uword save_vpm_start, save_vpm_end, save_vpm_size; + struct rte_mempool_memhdr *memhdr; + + this_pool_start = ~0ULL; + this_pool_end = 0LL; + + STAILQ_FOREACH (memhdr, &rmp->mem_list, next) + { + if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) + this_pool_end = (uword) (memhdr->addr + memhdr->len); + if (((uword) memhdr->addr) < this_pool_start) + this_pool_start = (uword) (memhdr->addr); + } + ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); + this_pool_size = this_pool_end - this_pool_start; + + if (CLIB_DEBUG > 1) + { + clib_warning ("%s: pool start %llx pool end %llx pool size %lld", + pool_name, this_pool_start, this_pool_end, + this_pool_size); + clib_warning + ("before: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + save_vpm_start = vpm->virtual.start; + save_vpm_end = vpm->virtual.end; + save_vpm_size = vpm->virtual.size; + + if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) + vpm->virtual.start = this_pool_start; + if (this_pool_end > vpm->virtual.end) + vpm->virtual.end = this_pool_end; + + vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; + + if (CLIB_DEBUG > 1) + { + clib_warning + ("after: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + /* check if fits into buffer index range */ + if ((u64) vpm->virtual.size > + ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) + { + clib_warning ("physmem: virtual size out of range!"); + vpm->virtual.start = save_vpm_start; + vpm->virtual.end = save_vpm_end; + vpm->virtual.size = save_vpm_size; + rmp = 0; + } + } + if (rmp) + { + dm->pktmbuf_pools[socket_id] = rmp; + vec_free (pool_name); + return 0; + } + } + + vec_free (pool_name); + + /* no usable pool for this socket, try to use pool from another one */ + for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) + { + if (dm->pktmbuf_pools[i]) + { + clib_warning + ("WARNING: Failed to allocate mempool for CPU socket %u. " + "Threads running on socket %u will use socket %u mempool.", + socket_id, socket_id, i); + dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; + return 0; + } + } + + return clib_error_return (0, "failed to allocate mempool on socket %u", + socket_id); +} + +#if CLIB_DEBUG > 0 + +u32 *vlib_buffer_state_validation_lock; +uword *vlib_buffer_state_validation_hash; +void *vlib_buffer_state_heap; + +static clib_error_t * +buffer_state_validation_init (vlib_main_t * vm) +{ + void *oldheap; + + vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); + + oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + + vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); + vec_validate_aligned (vlib_buffer_state_validation_lock, 0, + CLIB_CACHE_LINE_BYTES); + clib_mem_set_heap (oldheap); + return 0; +} + +VLIB_INIT_FUNCTION (buffer_state_validation_init); +#endif + +static vlib_buffer_callbacks_t callbacks = { + .vlib_buffer_alloc_cb = &dpdk_buffer_alloc, + .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list, + .vlib_buffer_free_cb = &dpdk_buffer_free, + .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next, + .vlib_packet_template_init_cb = &dpdk_packet_template_init, + .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list, +}; + +static clib_error_t * +dpdk_buffer_init (vlib_main_t * vm) +{ + vlib_buffer_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_buffer_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c index 538a00fd975..22bd4b4fa23 100644 --- a/src/vnet/devices/dpdk/cli.c +++ b/src/vnet/devices/dpdk/cli.c @@ -164,9 +164,9 @@ show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, struct rte_mempool *rmp; int i; - for (i = 0; i < vec_len (vm->buffer_main->pktmbuf_pools); i++) + for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++) { - rmp = vm->buffer_main->pktmbuf_pools[i]; + rmp = dpdk_main.pktmbuf_pools[i]; if (rmp) { unsigned count = rte_mempool_avail_count (rmp); diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c index b22fbf2e69e..0deab6aa2a9 100644 --- a/src/vnet/devices/dpdk/device.c +++ b/src/vnet/devices/dpdk/device.c @@ -87,19 +87,18 @@ dpdk_set_mc_filter (vnet_hw_interface_t * hi, struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b) { - vlib_main_t *vm = vlib_get_main (); - vlib_buffer_main_t *bm = vm->buffer_main; + dpdk_main_t *dm = &dpdk_main; struct rte_mbuf **mbufs = 0, *s, *d; u8 nb_segs; unsigned socket_id = rte_socket_id (); int i; - ASSERT (bm->pktmbuf_pools[socket_id]); + ASSERT (dm->pktmbuf_pools[socket_id]); s = rte_mbuf_from_vlib_buffer (b); nb_segs = s->nb_segs; vec_validate (mbufs, nb_segs - 1); - if (rte_pktmbuf_alloc_bulk (bm->pktmbuf_pools[socket_id], mbufs, nb_segs)) + if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs)) { vec_free (mbufs); return 0; diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h index e0436031df7..066ec6fadbf 100644 --- a/src/vnet/devices/dpdk/dpdk.h +++ b/src/vnet/devices/dpdk/dpdk.h @@ -425,6 +425,9 @@ typedef struct vlib_main_t *vlib_main; vnet_main_t *vnet_main; dpdk_config_main_t *conf; + + /* mempool */ + struct rte_mempool **pktmbuf_pools; } dpdk_main_t; dpdk_main_t dpdk_main; diff --git a/src/vnet/devices/dpdk/dpdk_priv.h b/src/vnet/devices/dpdk/dpdk_priv.h index 0c81dbc3beb..dd40ff48510 100644 --- a/src/vnet/devices/dpdk/dpdk_priv.h +++ b/src/vnet/devices/dpdk/dpdk_priv.h @@ -13,6 +13,9 @@ * limitations under the License. */ +#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) +#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) + #define DPDK_NB_RX_DESC_DEFAULT 1024 #define DPDK_NB_TX_DESC_DEFAULT 1024 #define DPDK_NB_RX_DESC_VIRTIO 256 diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index 60689463492..4c040d20af7 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -64,8 +64,6 @@ static struct rte_eth_conf port_conf_template = { clib_error_t * dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) { - vlib_main_t *vm = vlib_get_main (); - vlib_buffer_main_t *bm = vm->buffer_main; int rv; int j; @@ -107,7 +105,7 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, xd->cpu_socket, 0, - bm-> + dm-> pktmbuf_pools[xd->cpu_socket_id_by_queue [j]]); @@ -115,7 +113,7 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) if (rv < 0) rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, SOCKET_ID_ANY, 0, - bm-> + dm-> pktmbuf_pools[xd->cpu_socket_id_by_queue [j]]); if (rv < 0) diff --git a/src/vnet/devices/dpdk/thread.c b/src/vnet/devices/dpdk/thread.c new file mode 100644 index 00000000000..475dd142157 --- /dev/null +++ b/src/vnet/devices/dpdk/thread.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static clib_error_t * +dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) +{ + int r; + r = rte_eal_remote_launch (fp, (void *) w, lcore_id); + if (r) + return clib_error_return (0, "Failed to launch thread %u", lcore_id); + return 0; +} + +static clib_error_t * +dpdk_thread_set_lcore (u32 thread, u16 lcore) +{ + return 0; +} + +static vlib_thread_callbacks_t callbacks = { + .vlib_launch_thread_cb = &dpdk_launch_thread, + .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore, +}; + +static clib_error_t * +dpdk_thread_init (vlib_main_t * vm) +{ + vlib_thread_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_thread_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sr/sr_replicate.c b/src/vnet/sr/sr_replicate.c index 5f9de5042af..fa5a68c31c2 100644 --- a/src/vnet/sr/sr_replicate.c +++ b/src/vnet/sr/sr_replicate.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,7 @@ static uword sr_replicate_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { + dpdk_main_t *dm = &dpdk_main; u32 n_left_from, *from, *to_next; sr_replicate_next_t next_index; int pkts_replicated = 0; @@ -149,7 +151,6 @@ sr_replicate_node_fn (vlib_main_t * vm, int no_buffer_drops = 0; vlib_buffer_free_list_t *fl; unsigned socket_id = rte_socket_id (); - vlib_buffer_main_t *bm = vm->buffer_main; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -246,13 +247,13 @@ sr_replicate_node_fn (vlib_main_t * vm, vlib_buffer_t *clone0_c, *clone_b0; t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); - hdr_mb0 = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]); + hdr_mb0 = rte_pktmbuf_alloc (dm->pktmbuf_pools[socket_id]); if (i < (num_replicas - 1)) { /* Not the last tunnel to process */ clone0 = rte_pktmbuf_clone - (orig_mb0, bm->pktmbuf_pools[socket_id]); + (orig_mb0, dm->pktmbuf_pools[socket_id]); if (clone0 == 0) goto clone_fail; nb_seg = 0;