X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fbuffer_funcs.h;h=667063cd693ec88f46ec610bf0a63f158103be6f;hb=1732e476cde7cd74aa107c91eb93754a274612d3;hp=d1aecac7e80c0d37c53739c212c033fbd77355af;hpb=c22fcba177bad2c755fdb6d4d52f2a799eceaf34;p=vpp.git diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index d1aecac7e80..667063cd693 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -56,13 +56,88 @@ always_inline vlib_buffer_t * vlib_get_buffer (vlib_main_t * vm, u32 buffer_index) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; uword offset = ((uword) buffer_index) << CLIB_LOG2_CACHE_LINE_BYTES; ASSERT (offset < bm->buffer_mem_size); return uword_to_pointer (bm->buffer_mem_start + offset, void *); } +/** \brief Translate array of buffer indices into buffer pointers with offset + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param bi - (u32 *) array of buffer indices + @param b - (void **) array to store buffer pointers + @param count - (uword) number of elements + @param offset - (i32) offset applied to each pointer +*/ +static_always_inline void +vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count, + i32 offset) +{ +#ifdef CLIB_HAVE_VEC256 + u64x4 off = u64x4_splat (buffer_main.buffer_mem_start + offset); + /* if count is not const, compiler will not unroll while loop + se we maintain two-in-parallel variant */ + while (count >= 8) + { + u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi)); + u64x4 b1 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi + 4)); + /* shift and add to get vlib_buffer_t pointer */ + u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); + u64x4_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 4); + b += 8; + bi += 8; + count -= 8; + } +#endif + while (count >= 4) + { +#ifdef CLIB_HAVE_VEC256 + u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi)); + /* shift and add to get vlib_buffer_t pointer */ + u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); +#elif defined (CLIB_HAVE_VEC128) && defined (__x86_64__) + u64x2 off = u64x2_splat (buffer_main.buffer_mem_start + offset); + u32x4 bi4 = u32x4_load_unaligned (bi); + u64x2 b0 = u32x4_extend_to_u64x2 ((u32x4) bi4); + bi4 = u32x4_shuffle (bi4, 2, 3, 0, 1); + u64x2 b1 = u32x4_extend_to_u64x2 ((u32x4) bi4); + u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); + u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2); +#else + b[0] = ((u8 *) vlib_get_buffer (vm, bi[0])) + offset; + b[1] = ((u8 *) vlib_get_buffer (vm, bi[1])) + offset; + b[2] = ((u8 *) vlib_get_buffer (vm, bi[2])) + offset; + b[3] = ((u8 *) vlib_get_buffer (vm, bi[3])) + offset; +#endif + b += 4; + bi += 4; + count -= 4; + } + while (count) + { + b[0] = ((u8 *) vlib_get_buffer (vm, bi[0])) + offset; + b += 1; + bi += 1; + count -= 1; + } +} + +/** \brief Translate array of buffer indices into buffer pointers + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param bi - (u32 *) array of buffer indices + @param b - (vlib_buffer_t **) array to store buffer pointers + @param count - (uword) number of elements +*/ + +static_always_inline void +vlib_get_buffers (vlib_main_t * vm, u32 * bi, vlib_buffer_t ** b, int count) +{ + vlib_get_buffers_with_offset (vm, bi, (void **) b, count, 0); +} + /** \brief Translate buffer pointer into buffer index @param vm - (vlib_main_t *) vlib main data structure pointer @@ -73,7 +148,7 @@ vlib_get_buffer (vlib_main_t * vm, u32 buffer_index) always_inline u32 vlib_get_buffer_index (vlib_main_t * vm, void *p) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; uword offset = pointer_to_uword (p) - bm->buffer_mem_start; ASSERT (pointer_to_uword (p) >= bm->buffer_mem_start); ASSERT (offset < bm->buffer_mem_size); @@ -81,6 +156,82 @@ vlib_get_buffer_index (vlib_main_t * vm, void *p) return offset >> CLIB_LOG2_CACHE_LINE_BYTES; } +/** \brief Translate array of buffer pointers into buffer indices with offset + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (void **) array of buffer pointers + @param bi - (u32 *) array to store buffer indices + @param count - (uword) number of elements + @param offset - (i32) offset applied to each pointer +*/ +static_always_inline void +vlib_get_buffer_indices_with_offset (vlib_main_t * vm, void **b, u32 * bi, + uword count, i32 offset) +{ +#ifdef CLIB_HAVE_VEC256 + u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 }; + u64x4 off4 = u64x4_splat (buffer_main.buffer_mem_start - offset); + + while (count >= 8) + { + /* load 4 pointers into 256-bit register */ + u64x4 v0 = u64x4_load_unaligned (b); + u64x4 v1 = u64x4_load_unaligned (b + 4); + u32x8 v2, v3; + + v0 -= off4; + v1 -= off4; + + v0 >>= CLIB_LOG2_CACHE_LINE_BYTES; + v1 >>= CLIB_LOG2_CACHE_LINE_BYTES; + + /* permute 256-bit register so lower u32s of each buffer index are + * placed into lower 128-bits */ + v2 = u32x8_permute ((u32x8) v0, mask); + v3 = u32x8_permute ((u32x8) v1, mask); + + /* extract lower 128-bits and save them to the array of buffer indices */ + u32x4_store_unaligned (u32x8_extract_lo (v2), bi); + u32x4_store_unaligned (u32x8_extract_lo (v3), bi + 4); + bi += 8; + b += 8; + count -= 8; + } +#endif + while (count >= 4) + { + /* equivalent non-nector implementation */ + bi[0] = vlib_get_buffer_index (vm, ((u8 *) b[0]) + offset); + bi[1] = vlib_get_buffer_index (vm, ((u8 *) b[1]) + offset); + bi[2] = vlib_get_buffer_index (vm, ((u8 *) b[2]) + offset); + bi[3] = vlib_get_buffer_index (vm, ((u8 *) b[3]) + offset); + bi += 4; + b += 4; + count -= 4; + } + while (count) + { + bi[0] = vlib_get_buffer_index (vm, ((u8 *) b[0]) + offset); + bi += 1; + b += 1; + count -= 1; + } +} + +/** \brief Translate array of buffer pointers into buffer indices + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (vlib_buffer_t **) array of buffer pointers + @param bi - (u32 *) array to store buffer indices + @param count - (uword) number of elements +*/ +static_always_inline void +vlib_get_buffer_indices (vlib_main_t * vm, vlib_buffer_t ** b, u32 * bi, + uword count) +{ + vlib_get_buffer_indices_with_offset (vm, (void **) b, bi, count, 0); +} + /** \brief Get next buffer in buffer linklist, or zero for end of list. @param vm - (vlib_main_t *) vlib main data structure pointer @@ -162,7 +313,7 @@ vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents) always_inline u64 vlib_get_buffer_data_physical_address (vlib_main_t * vm, u32 buffer_index) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); vlib_buffer_pool_t *pool = vec_elt_at_index (bm->buffer_pools, b->buffer_pool_index); @@ -224,7 +375,7 @@ void vlib_buffer_validate_alloc_free (vlib_main_t * vm, u32 * buffers, always_inline vlib_buffer_known_state_t vlib_buffer_is_known (u32 buffer_index) { - vlib_buffer_main_t *bm = vlib_global_main.buffer_main; + vlib_buffer_main_t *bm = &buffer_main; clib_spinlock_lock (&bm->buffer_known_hash_lockp); uword *p = hash_get (bm->buffer_known_hash, buffer_index); @@ -236,7 +387,7 @@ always_inline void vlib_buffer_set_known_state (u32 buffer_index, vlib_buffer_known_state_t state) { - vlib_buffer_main_t *bm = vlib_global_main.buffer_main; + vlib_buffer_main_t *bm = &buffer_main; clib_spinlock_lock (&bm->buffer_known_hash_lockp); hash_set (bm->buffer_known_hash, buffer_index, state); @@ -290,14 +441,14 @@ vlib_buffer_alloc_from_free_list (vlib_main_t * vm, u32 n_buffers, vlib_buffer_free_list_index_t index) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; vlib_buffer_free_list_t *fl; u32 *src; uword len; ASSERT (bm->cb.vlib_buffer_fill_free_list_cb); - fl = pool_elt_at_index (bm->buffer_free_list_pool, index); + fl = pool_elt_at_index (vm->buffer_free_list_pool, index); len = vec_len (fl->buffers); @@ -394,7 +545,7 @@ vlib_buffer_free (vlib_main_t * vm, /* number of buffers to free */ u32 n_buffers) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; ASSERT (bm->cb.vlib_buffer_free_cb); @@ -415,7 +566,7 @@ vlib_buffer_free_no_next (vlib_main_t * vm, /* number of buffers to free */ u32 n_buffers) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; ASSERT (bm->cb.vlib_buffer_free_no_next_cb); @@ -459,6 +610,30 @@ vlib_buffer_free_from_ring (vlib_main_t * vm, u32 * ring, u32 start, } } +/** \brief Free buffers from ring without freeing tail buffers + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index ring + @param start - (u32) first slot in the ring + @param ring_size - (u32) ring size + @param n_buffers - (u32) number of buffers +*/ +always_inline void +vlib_buffer_free_from_ring_no_next (vlib_main_t * vm, u32 * ring, u32 start, + u32 ring_size, u32 n_buffers) +{ + ASSERT (n_buffers <= ring_size); + + if (PREDICT_TRUE (start + n_buffers <= ring_size)) + { + vlib_buffer_free_no_next (vm, ring + start, n_buffers); + } + else + { + vlib_buffer_free_no_next (vm, ring + start, ring_size - start); + vlib_buffer_free_no_next (vm, ring, n_buffers - (ring_size - start)); + } +} /* Add/delete buffer free lists. */ vlib_buffer_free_list_index_t vlib_buffer_create_free_list (vlib_main_t * vm, @@ -468,60 +643,36 @@ always_inline void vlib_buffer_delete_free_list (vlib_main_t * vm, vlib_buffer_free_list_index_t free_list_index) { - vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_main_t *bm = &buffer_main; ASSERT (bm->cb.vlib_buffer_delete_free_list_cb); bm->cb.vlib_buffer_delete_free_list_cb (vm, free_list_index); } -/* Find already existing public free list with given size or create one. */ -vlib_buffer_free_list_index_t vlib_buffer_get_or_create_free_list (vlib_main_t - * vm, - u32 - n_data_bytes, - char *fmt, - ...); - -/* Merge two free lists */ -void vlib_buffer_merge_free_lists (vlib_buffer_free_list_t * dst, - vlib_buffer_free_list_t * src); - /* Make sure we have at least given number of unaligned buffers. */ void vlib_buffer_free_list_fill_unaligned (vlib_main_t * vm, vlib_buffer_free_list_t * free_list, uword n_unaligned_buffers); -always_inline vlib_buffer_free_list_index_t -vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - - size = vlib_buffer_round_size (size); - uword *p = hash_get (bm->free_list_by_size, size); - return p ? p[0] : ~0; -} - always_inline vlib_buffer_free_list_t * vlib_buffer_get_buffer_free_list (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_free_list_index_t * index) { - vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_index_t i; *index = i = vlib_buffer_get_free_list_index (b); - return pool_elt_at_index (bm->buffer_free_list_pool, i); + return pool_elt_at_index (vm->buffer_free_list_pool, i); } always_inline vlib_buffer_free_list_t * vlib_buffer_get_free_list (vlib_main_t * vm, vlib_buffer_free_list_index_t free_list_index) { - vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *f; - f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + f = pool_elt_at_index (vm->buffer_free_list_pool, free_list_index); /* Sanity: indices must match. */ ASSERT (f->index == free_list_index); @@ -603,6 +754,7 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b) d->total_length_not_including_first_buffer = s->total_length_not_including_first_buffer; clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque)); + clib_memcpy (d->opaque2, s->opaque2, sizeof (s->opaque2)); clib_memcpy (vlib_buffer_get_current (d), vlib_buffer_get_current (s), s->current_length); @@ -679,12 +831,18 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, d->current_length = head_end_offset; vlib_buffer_set_free_list_index (d, vlib_buffer_get_free_list_index (s)); - d->total_length_not_including_first_buffer = - s->total_length_not_including_first_buffer + s->current_length - + + d->total_length_not_including_first_buffer = s->current_length - head_end_offset; + if (PREDICT_FALSE (s->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + d->total_length_not_including_first_buffer += + s->total_length_not_including_first_buffer; + } d->flags = s->flags | VLIB_BUFFER_NEXT_PRESENT; d->flags &= ~VLIB_BUFFER_EXT_HDR_VALID; clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque)); + clib_memcpy (d->opaque2, s->opaque2, sizeof (s->opaque2)); clib_memcpy (vlib_buffer_get_current (d), vlib_buffer_get_current (s), head_end_offset); d->next_buffer = src_buffer; @@ -849,9 +1007,8 @@ typedef struct /* Vector of packet data. */ u8 *packet_data; - /* Number of buffers to allocate in each call to physmem - allocator. */ - u32 min_n_buffers_each_physmem_alloc; + /* Number of buffers to allocate in each call to allocator. */ + u32 min_n_buffers_each_alloc; /* Buffer free list for this template. */ vlib_buffer_free_list_index_t free_list_index; @@ -866,7 +1023,7 @@ void vlib_packet_template_init (vlib_main_t * vm, vlib_packet_template_t * t, void *packet_data, uword n_packet_data_bytes, - uword min_n_buffers_each_physmem_alloc, + uword min_n_buffers_each_alloc, char *fmt, ...); void *vlib_packet_template_get_packet (vlib_main_t * vm, @@ -957,6 +1114,7 @@ vlib_buffer_add_to_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f, u32 buffer_index, u8 do_init) { + vlib_buffer_pool_t *bp = vlib_buffer_pool_get (f->buffer_pool_index); vlib_buffer_t *b; b = vlib_get_buffer (vm, buffer_index); if (PREDICT_TRUE (do_init)) @@ -965,15 +1123,13 @@ vlib_buffer_add_to_free_list (vlib_main_t * vm, if (vec_len (f->buffers) > 4 * VLIB_FRAME_SIZE) { - vlib_buffer_free_list_t *mf; - mf = vlib_buffer_get_free_list (vlib_mains[0], f->index); - clib_spinlock_lock (&mf->global_buffers_lock); + clib_spinlock_lock (&bp->lock); /* keep last stored buffers, as they are more likely hot in the cache */ - vec_add_aligned (mf->global_buffers, f->buffers, VLIB_FRAME_SIZE, + vec_add_aligned (bp->buffers, f->buffers, VLIB_FRAME_SIZE, CLIB_CACHE_LINE_BYTES); vec_delete (f->buffers, VLIB_FRAME_SIZE, 0); f->n_alloc -= VLIB_FRAME_SIZE; - clib_spinlock_unlock (&mf->global_buffers_lock); + clib_spinlock_unlock (&bp->lock); } }