From b32bd70c1e83fff90d060ea1bbb41eb55e3f62b1 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 23 Dec 2021 17:05:02 +0100 Subject: [PATCH] vlib: introduce vlib frame aux data Type: improvement Change-Id: I53890a13210cfb0d2b2d9d8cfd9b15118d3bb273 Signed-off-by: Damjan Marion --- src/vlib/main.c | 111 ++++++++++++++------------------------------------ src/vlib/node.c | 62 +++++++++++++++++++++++++--- src/vlib/node.h | 25 +++++++----- src/vlib/node_funcs.h | 27 ++++++------ src/vlib/threads.c | 10 ++--- src/vlib/unix/main.c | 4 +- 6 files changed, 122 insertions(+), 117 deletions(-) diff --git a/src/vlib/main.c b/src/vlib/main.c index 2f219955e70..189884a97f9 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -45,71 +45,12 @@ #include -/* Actually allocate a few extra slots of vector data to support - speculative vector enqueues which overflow vector data in next frame. */ -#define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4) - -always_inline u32 -vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes) -{ - u32 n_bytes; - - /* Make room for vlib_frame_t plus scalar arguments. */ - n_bytes = vlib_frame_vector_byte_offset (n_scalar_bytes); - - /* Make room for vector arguments. - Allocate a few extra slots of vector data to support - speculative vector enqueues which overflow vector data in next frame. */ -#define VLIB_FRAME_SIZE_EXTRA 4 - n_bytes += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * n_vector_bytes; - - /* Magic number is first 32bit number after vector data. - Used to make sure that vector data is never overrun. */ #define VLIB_FRAME_MAGIC (0xabadc0ed) - n_bytes += sizeof (u32); - - /* Pad to cache line. */ - n_bytes = round_pow2 (n_bytes, CLIB_CACHE_LINE_BYTES); - - return n_bytes; -} always_inline u32 * vlib_frame_find_magic (vlib_frame_t * f, vlib_node_t * node) { - void *p = f; - - p += vlib_frame_vector_byte_offset (node->scalar_size); - - p += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * node->vector_size; - - return p; -} - -static inline vlib_frame_size_t * -get_frame_size_info (vlib_node_main_t * nm, - u32 n_scalar_bytes, u32 n_vector_bytes) -{ -#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES - uword key = (n_scalar_bytes << 16) | n_vector_bytes; - uword *p, i; - - p = hash_get (nm->frame_size_hash, key); - if (p) - i = p[0]; - else - { - i = vec_len (nm->frame_sizes); - vec_validate (nm->frame_sizes, i); - hash_set (nm->frame_size_hash, key, i); - } - - return vec_elt_at_index (nm->frame_sizes, i); -#else - ASSERT (vlib_frame_bytes (n_scalar_bytes, n_vector_bytes) - == (vlib_frame_bytes (0, 4))); - return vec_elt_at_index (nm->frame_sizes, 0); -#endif + return (void *) f + node->magic_offset; } static vlib_frame_t * @@ -120,17 +61,21 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_size_t *fs; vlib_node_t *to_node; vlib_frame_t *f; - u32 l, n, scalar_size, vector_size; + u32 l, n; ASSERT (vm == vlib_get_main ()); to_node = vlib_get_node (vm, to_node_index); - scalar_size = to_node->scalar_size; - vector_size = to_node->vector_size; + vec_validate (nm->frame_sizes, to_node->frame_size_index); + fs = vec_elt_at_index (nm->frame_sizes, to_node->frame_size_index); + + if (fs->frame_size == 0) + fs->frame_size = to_node->frame_size; + else + ASSERT (fs->frame_size == to_node->frame_size); - fs = get_frame_size_info (nm, scalar_size, vector_size); - n = vlib_frame_bytes (scalar_size, vector_size); + n = fs->frame_size; if ((l = vec_len (fs->free_frames)) > 0) { /* Allocate from end of free list. */ @@ -139,12 +84,12 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, } else { - f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN); + f = clib_mem_alloc_aligned_no_fail (n, CLIB_CACHE_LINE_BYTES); } /* Poison frame when debugging. */ if (CLIB_DEBUG > 0) - clib_memset (f, 0xfe, n); + clib_memset_u8 (f, 0xfe, n); /* Insert magic number. */ { @@ -156,8 +101,9 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, f->frame_flags = VLIB_FRAME_IS_ALLOCATED | frame_flags; f->n_vectors = 0; - f->scalar_size = scalar_size; - f->vector_size = vector_size; + f->scalar_offset = to_node->scalar_offset; + f->vector_offset = to_node->vector_offset; + f->aux_offset = to_node->aux_offset; f->flags = 0; fs->n_alloc_frames += 1; @@ -249,7 +195,7 @@ vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f) ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED); node = vlib_get_node (vm, r->node_index); - fs = get_frame_size_info (nm, node->scalar_size, node->vector_size); + fs = vec_elt_at_index (nm->frame_sizes, node->frame_size_index); ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED); @@ -271,19 +217,24 @@ static clib_error_t * show_frame_stats (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - vlib_node_main_t *nm = &vm->node_main; vlib_frame_size_t *fs; - vlib_cli_output (vm, "%=6s%=12s%=12s", "Size", "# Alloc", "# Free"); - vec_foreach (fs, nm->frame_sizes) - { - u32 n_alloc = fs->n_alloc_frames; - u32 n_free = vec_len (fs->free_frames); + vlib_cli_output (vm, "%=8s%=6s%=12s%=12s", "Thread", "Size", "# Alloc", + "# Free"); + foreach_vlib_main () + { + vlib_node_main_t *nm = &this_vlib_main->node_main; + vec_foreach (fs, nm->frame_sizes) + { + u32 n_alloc = fs->n_alloc_frames; + u32 n_free = vec_len (fs->free_frames); - if (n_alloc + n_free > 0) - vlib_cli_output (vm, "%=6d%=12d%=12d", - fs - nm->frame_sizes, n_alloc, n_free); - } + if (n_alloc + n_free > 0) + vlib_cli_output (vm, "%=8d%=6d%=12d%=12d", + this_vlib_main->thread_index, fs->frame_size, + n_alloc, n_free); + } + } return 0; } diff --git a/src/vlib/node.c b/src/vlib/node.c index f4329e7c503..41b9ee73be6 100644 --- a/src/vlib/node.c +++ b/src/vlib/node.c @@ -333,6 +333,7 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) { vlib_node_main_t *nm = &vm->node_main; vlib_node_t *n; + u32 size; int i; if (CLIB_DEBUG > 0) @@ -400,13 +401,66 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) _(type); _(flags); _(state); - _(scalar_size); - _(vector_size); _(format_buffer); _(unformat_buffer); _(format_trace); _(validate_frame); + size = round_pow2 (sizeof (vlib_frame_t), VLIB_FRAME_DATA_ALIGN); + + /* scalar data size */ + if (r->scalar_size) + { + n->scalar_offset = size; + size += round_pow2 (r->scalar_size, VLIB_FRAME_DATA_ALIGN); + } + else + n->scalar_offset = 0; + + /* Vecor data size */ + n->vector_offset = size; + size += r->vector_size * VLIB_FRAME_SIZE; + + /* Allocate a few extra slots of vector data to support + speculative vector enqueues which overflow vector data in next frame. */ + size += r->vector_size * VLIB_FRAME_SIZE_EXTRA; + + /* space for VLIB_FRAME_MAGIC */ + n->magic_offset = size; + size += sizeof (u32); + + /* round size to VLIB_FRAME_DATA_ALIGN */ + size = round_pow2 (size, VLIB_FRAME_DATA_ALIGN); + + if (r->aux_size) + { + n->aux_offset = size; + size += r->aux_size * VLIB_FRAME_SIZE; + } + else + n->aux_offset = 0; + + /* final size */ + n->frame_size = size = round_pow2 (size, CLIB_CACHE_LINE_BYTES); + ASSERT (size <= __UINT16_MAX__); + + vlib_frame_size_t *fs = 0; + + n->frame_size_index = (u16) ~0; + vec_foreach (fs, nm->frame_sizes) + if (fs->frame_size == size) + { + n->frame_size_index = fs - nm->frame_sizes; + break; + } + + if (n->frame_size_index == (u16) ~0) + { + vec_add2 (nm->frame_sizes, fs, 1); + fs->frame_size = size; + n->frame_size_index = fs - nm->frame_sizes; + } + /* Register error counters. */ vlib_register_errors (vm, n->index, r->n_errors, r->error_strings, r->error_counters); @@ -669,10 +723,6 @@ vlib_node_main_init (vlib_main_t * vm) vlib_node_t *n; uword ni; - nm->frame_sizes = vec_new (vlib_frame_size_t, 1); -#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES - nm->frame_size_hash = hash_create (0, sizeof (uword)); -#endif nm->flags |= VLIB_NODE_MAIN_RUNTIME_STARTED; /* Generate sibling relationships */ diff --git a/src/vlib/node.h b/src/vlib/node.h index 75a0adba8d1..66a99925846 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -149,7 +149,7 @@ typedef struct _vlib_node_registration u8 protocol_hint; /* Size of scalar and vector arguments in bytes. */ - u16 scalar_size, vector_size; + u8 scalar_size, vector_size, aux_size; /* Number of error codes used by this node. */ u16 n_errors; @@ -309,7 +309,8 @@ typedef struct vlib_node_t u16 n_errors; /* Size of scalar and vector arguments in bytes. */ - u16 scalar_size, vector_size; + u16 frame_size, scalar_offset, vector_offset, magic_offset, aux_offset; + u16 frame_size_index; /* Handle/index in error heap for this node. */ u32 error_heap_handle; @@ -367,7 +368,10 @@ typedef struct vlib_node_t /* Max number of vector elements to process at once per node. */ #define VLIB_FRAME_SIZE 256 -#define VLIB_FRAME_ALIGN CLIB_CACHE_LINE_BYTES +/* Number of extra elements allocated at the end of vecttor. */ +#define VLIB_FRAME_SIZE_EXTRA 4 +/* Frame data alignment */ +#define VLIB_FRAME_DATA_ALIGN 16 /* Calling frame (think stack frame) for a node. */ typedef struct vlib_frame_t @@ -378,11 +382,8 @@ typedef struct vlib_frame_t /* User flags. Used for sending hints to the next node. */ u16 flags; - /* Number of scalar bytes in arguments. */ - u8 scalar_size; - - /* Number of bytes per vector argument. */ - u8 vector_size; + /* Scalar, vector and aux offsets in this frame. */ + u16 scalar_offset, vector_offset, aux_offset; /* Number of vector elements currently in frame. */ u16 n_vectors; @@ -521,10 +522,15 @@ typedef struct /* Number of allocated frames for this scalar/vector size. */ u32 n_alloc_frames; + /* Frame size */ + u16 frame_size; + /* Vector of free frames for this scalar/vector size. */ vlib_frame_t **free_frames; } vlib_frame_size_t; +STATIC_ASSERT_SIZEOF (vlib_frame_size_t, 16); + typedef struct { /* Users opaque value for event type. */ @@ -721,9 +727,6 @@ typedef struct /* Current counts of nodes in each state. */ u32 input_node_counts_by_state[VLIB_N_NODE_STATE]; - /* Hash of (scalar_size,vector_size) to frame_sizes index. */ - uword *frame_size_hash; - /* Per-size frame allocation information. */ vlib_frame_size_t *frame_sizes; diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index b1d5c7bcacb..46db46dd1e0 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -283,16 +283,6 @@ vlib_frame_no_append (vlib_frame_t * f) f->frame_flags |= VLIB_FRAME_NO_APPEND; } -/* Byte alignment for vector arguments. */ -#define VLIB_FRAME_VECTOR_ALIGN (1 << 4) - -always_inline u32 -vlib_frame_vector_byte_offset (u32 scalar_size) -{ - return round_pow2 (sizeof (vlib_frame_t) + scalar_size, - VLIB_FRAME_VECTOR_ALIGN); -} - /** \brief Get pointer to frame vector data. @param f vlib_frame_t pointer @return pointer to first vector element in frame @@ -300,7 +290,19 @@ vlib_frame_vector_byte_offset (u32 scalar_size) always_inline void * vlib_frame_vector_args (vlib_frame_t * f) { - return (void *) f + vlib_frame_vector_byte_offset (f->scalar_size); + ASSERT (f->vector_offset); + return (void *) f + f->vector_offset; +} + +/** \brief Get pointer to frame vector aux data. + @param f vlib_frame_t pointer + @return pointer to first vector aux data element in frame +*/ +always_inline void * +vlib_frame_aux_args (vlib_frame_t *f) +{ + ASSERT (f->aux_offset); + return (void *) f + f->aux_offset; } /** \brief Get pointer to frame scalar data. @@ -314,7 +316,8 @@ vlib_frame_vector_args (vlib_frame_t * f) always_inline void * vlib_frame_scalar_args (vlib_frame_t * f) { - return vlib_frame_vector_args (f) - f->scalar_size; + ASSERT (f->scalar_offset); + return (void *) f + f->scalar_offset; } always_inline vlib_next_frame_t * diff --git a/src/vlib/threads.c b/src/vlib/threads.c index f45e9358a89..760aa8b5776 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -626,8 +626,9 @@ start_workers (vlib_main_t * vm) vm_clone->thread_index = worker_thread_index; vm_clone->heap_base = w->thread_mheap; - vm_clone->heap_aligned_base = (void *) - (((uword) w->thread_mheap) & ~(VLIB_FRAME_ALIGN - 1)); + vm_clone->heap_aligned_base = + (void *) (((uword) w->thread_mheap) & + ~(CLIB_CACHE_LINE_BYTES - 1)); vm_clone->pending_rpc_requests = 0; vec_validate (vm_clone->pending_rpc_requests, 0); _vec_len (vm_clone->pending_rpc_requests) = 0; @@ -730,10 +731,7 @@ start_workers (vlib_main_t * vm) CLIB_CACHE_LINE_BYTES); /* Create per-thread frame freelist */ - nm_clone->frame_sizes = vec_new (vlib_frame_size_t, 1); -#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES - nm_clone->frame_size_hash = hash_create (0, sizeof (uword)); -#endif + nm_clone->frame_sizes = 0; nm_clone->node_by_error = nm->node_by_error; /* Packet trace buffers are guaranteed to be empty, nothing to do here */ diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index 4ef96652470..69959fd9137 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -704,8 +704,8 @@ vlib_unix_main (int argc, char *argv[]) vm->argv = (u8 **) argv; vgm->name = argv[0]; vm->heap_base = clib_mem_get_heap (); - vm->heap_aligned_base = (void *) - (((uword) vm->heap_base) & ~(VLIB_FRAME_ALIGN - 1)); + vm->heap_aligned_base = + (void *) (((uword) vm->heap_base) & ~(CLIB_CACHE_LINE_BYTES - 1)); ASSERT (vm->heap_base); clib_time_init (&vm->clib_time); -- 2.16.6