X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fbuffer_funcs.h;h=e8ccc86f1a9eac7c2aedfbe7e71f4d5b8497a413;hb=e61ad8d6c49d6c2fb30d29b446f6e6603c8b1de6;hp=ba87d9566c7a722a2491c83e35d01c99118f9ad7;hpb=cef1db9c13f57a1fc49c9e500adffafa0b9ca728;p=vpp.git diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index ba87d9566c7..e8ccc86f1a9 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -41,6 +41,7 @@ #define included_vlib_buffer_funcs_h #include +#include /** \file vlib buffer access methods. @@ -63,6 +64,85 @@ vlib_get_buffer (vlib_main_t * vm, u32 buffer_index) return uword_to_pointer (bm->buffer_mem_start + offset, void *); } +/** \brief Translate array of buffer indices into buffer pointers with offset + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param bi - (u32 *) array of buffer indices + @param b - (void **) array to store buffer pointers + @param count - (uword) number of elements + @param offset - (i32) offset applied to each pointer +*/ +static_always_inline void +vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count, + i32 offset) +{ +#ifdef CLIB_HAVE_VEC256 + u64x4 off = u64x4_splat (buffer_main.buffer_mem_start + offset); + /* if count is not const, compiler will not unroll while loop + se we maintain two-in-parallel variant */ + while (count >= 8) + { + u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi)); + u64x4 b1 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi + 4)); + /* shift and add to get vlib_buffer_t pointer */ + u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); + u64x4_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 4); + b += 8; + bi += 8; + count -= 8; + } +#endif + while (count >= 4) + { +#ifdef CLIB_HAVE_VEC256 + u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi)); + /* shift and add to get vlib_buffer_t pointer */ + u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); +#elif defined (CLIB_HAVE_VEC128) + u64x2 off = u64x2_splat (buffer_main.buffer_mem_start + offset); + u32x4 bi4 = u32x4_load_unaligned (bi); + u64x2 b0 = u32x4_extend_to_u64x2 ((u32x4) bi4); +#if defined (__aarch64__) + u64x2 b1 = u32x4_extend_to_u64x2_high ((u32x4) bi4); +#else + bi4 = u32x4_shuffle (bi4, 2, 3, 0, 1); + u64x2 b1 = u32x4_extend_to_u64x2 ((u32x4) bi4); +#endif + u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); + u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2); +#else + b[0] = ((u8 *) vlib_get_buffer (vm, bi[0])) + offset; + b[1] = ((u8 *) vlib_get_buffer (vm, bi[1])) + offset; + b[2] = ((u8 *) vlib_get_buffer (vm, bi[2])) + offset; + b[3] = ((u8 *) vlib_get_buffer (vm, bi[3])) + offset; +#endif + b += 4; + bi += 4; + count -= 4; + } + while (count) + { + b[0] = ((u8 *) vlib_get_buffer (vm, bi[0])) + offset; + b += 1; + bi += 1; + count -= 1; + } +} + +/** \brief Translate array of buffer indices into buffer pointers + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param bi - (u32 *) array of buffer indices + @param b - (vlib_buffer_t **) array to store buffer pointers + @param count - (uword) number of elements +*/ + +static_always_inline void +vlib_get_buffers (vlib_main_t * vm, u32 * bi, vlib_buffer_t ** b, int count) +{ + vlib_get_buffers_with_offset (vm, bi, (void **) b, count, 0); +} + /** \brief Translate buffer pointer into buffer index @param vm - (vlib_main_t *) vlib main data structure pointer @@ -81,6 +161,82 @@ vlib_get_buffer_index (vlib_main_t * vm, void *p) return offset >> CLIB_LOG2_CACHE_LINE_BYTES; } +/** \brief Translate array of buffer pointers into buffer indices with offset + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (void **) array of buffer pointers + @param bi - (u32 *) array to store buffer indices + @param count - (uword) number of elements + @param offset - (i32) offset applied to each pointer +*/ +static_always_inline void +vlib_get_buffer_indices_with_offset (vlib_main_t * vm, void **b, u32 * bi, + uword count, i32 offset) +{ +#ifdef CLIB_HAVE_VEC256 + u32x8 mask = { 0, 2, 4, 6, 1, 3, 5, 7 }; + u64x4 off4 = u64x4_splat (buffer_main.buffer_mem_start - offset); + + while (count >= 8) + { + /* load 4 pointers into 256-bit register */ + u64x4 v0 = u64x4_load_unaligned (b); + u64x4 v1 = u64x4_load_unaligned (b + 4); + u32x8 v2, v3; + + v0 -= off4; + v1 -= off4; + + v0 >>= CLIB_LOG2_CACHE_LINE_BYTES; + v1 >>= CLIB_LOG2_CACHE_LINE_BYTES; + + /* permute 256-bit register so lower u32s of each buffer index are + * placed into lower 128-bits */ + v2 = u32x8_permute ((u32x8) v0, mask); + v3 = u32x8_permute ((u32x8) v1, mask); + + /* extract lower 128-bits and save them to the array of buffer indices */ + u32x4_store_unaligned (u32x8_extract_lo (v2), bi); + u32x4_store_unaligned (u32x8_extract_lo (v3), bi + 4); + bi += 8; + b += 8; + count -= 8; + } +#endif + while (count >= 4) + { + /* equivalent non-nector implementation */ + bi[0] = vlib_get_buffer_index (vm, ((u8 *) b[0]) + offset); + bi[1] = vlib_get_buffer_index (vm, ((u8 *) b[1]) + offset); + bi[2] = vlib_get_buffer_index (vm, ((u8 *) b[2]) + offset); + bi[3] = vlib_get_buffer_index (vm, ((u8 *) b[3]) + offset); + bi += 4; + b += 4; + count -= 4; + } + while (count) + { + bi[0] = vlib_get_buffer_index (vm, ((u8 *) b[0]) + offset); + bi += 1; + b += 1; + count -= 1; + } +} + +/** \brief Translate array of buffer pointers into buffer indices + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (vlib_buffer_t **) array of buffer pointers + @param bi - (u32 *) array to store buffer indices + @param count - (uword) number of elements +*/ +static_always_inline void +vlib_get_buffer_indices (vlib_main_t * vm, vlib_buffer_t ** b, u32 * bi, + uword count) +{ + vlib_get_buffer_indices_with_offset (vm, (void **) b, bi, count, 0); +} + /** \brief Get next buffer in buffer linklist, or zero for end of list. @param vm - (vlib_main_t *) vlib main data structure pointer @@ -148,7 +304,7 @@ vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents) { b = vlib_get_buffer (vm, buffer_index); l = b->current_length; - clib_memcpy (contents + content_len, b->data + b->current_data, l); + clib_memcpy_fast (contents + content_len, b->data + b->current_data, l); content_len += l; if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)) break; @@ -158,16 +314,16 @@ vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents) return content_len; } -/* Return physical address of buffer->data start. */ -always_inline u64 -vlib_get_buffer_data_physical_address (vlib_main_t * vm, u32 buffer_index) +always_inline uword +vlib_buffer_get_pa (vlib_main_t * vm, vlib_buffer_t * b) { - vlib_buffer_main_t *bm = &buffer_main; - vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); - vlib_buffer_pool_t *pool = vec_elt_at_index (bm->buffer_pools, - b->buffer_pool_index); + return vlib_physmem_get_pa (vm, b->data); +} - return vlib_physmem_virtual_to_physical (vm, pool->physmem_region, b->data); +always_inline uword +vlib_buffer_get_current_pa (vlib_main_t * vm, vlib_buffer_t * b) +{ + return vlib_buffer_get_pa (vm, b) + b->current_data; } /** \brief Prefetch buffer metadata by buffer index @@ -314,7 +470,7 @@ vlib_buffer_alloc_from_free_list (vlib_main_t * vm, /* following code is intentionaly duplicated to allow compiler to optimize fast path when n_buffers is constant value */ src = fl->buffers + len - n_buffers; - clib_memcpy (buffers, src, n_buffers * sizeof (u32)); + clib_memcpy_fast (buffers, src, n_buffers * sizeof (u32)); _vec_len (fl->buffers) -= n_buffers; /* Verify that buffers are known free. */ @@ -325,7 +481,7 @@ vlib_buffer_alloc_from_free_list (vlib_main_t * vm, } src = fl->buffers + len - n_buffers; - clib_memcpy (buffers, src, n_buffers * sizeof (u32)); + clib_memcpy_fast (buffers, src, n_buffers * sizeof (u32)); _vec_len (fl->buffers) -= n_buffers; /* Verify that buffers are known free. */ @@ -475,7 +631,7 @@ vlib_buffer_free_from_ring_no_next (vlib_main_t * vm, u32 * ring, u32 start, if (PREDICT_TRUE (start + n_buffers <= ring_size)) { - vlib_buffer_free (vm, ring + start, n_buffers); + vlib_buffer_free_no_next (vm, ring + start, n_buffers); } else { @@ -602,9 +758,10 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b) d->flags = s->flags & flag_mask; d->total_length_not_including_first_buffer = s->total_length_not_including_first_buffer; - clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque)); - clib_memcpy (vlib_buffer_get_current (d), - vlib_buffer_get_current (s), s->current_length); + clib_memcpy_fast (d->opaque, s->opaque, sizeof (s->opaque)); + clib_memcpy_fast (d->opaque2, s->opaque2, sizeof (s->opaque2)); + clib_memcpy_fast (vlib_buffer_get_current (d), + vlib_buffer_get_current (s), s->current_length); /* next segments */ for (i = 1; i < n_buffers; i++) @@ -616,8 +773,8 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b) d = vlib_get_buffer (vm, new_buffers[i]); d->current_data = s->current_data; d->current_length = s->current_length; - clib_memcpy (vlib_buffer_get_current (d), - vlib_buffer_get_current (s), s->current_length); + clib_memcpy_fast (vlib_buffer_get_current (d), + vlib_buffer_get_current (s), s->current_length); d->flags = s->flags & flag_mask; } @@ -679,14 +836,20 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, d->current_length = head_end_offset; vlib_buffer_set_free_list_index (d, vlib_buffer_get_free_list_index (s)); - d->total_length_not_including_first_buffer = - s->total_length_not_including_first_buffer + s->current_length - + + d->total_length_not_including_first_buffer = s->current_length - head_end_offset; + if (PREDICT_FALSE (s->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + d->total_length_not_including_first_buffer += + s->total_length_not_including_first_buffer; + } d->flags = s->flags | VLIB_BUFFER_NEXT_PRESENT; d->flags &= ~VLIB_BUFFER_EXT_HDR_VALID; - clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque)); - clib_memcpy (vlib_buffer_get_current (d), vlib_buffer_get_current (s), - head_end_offset); + clib_memcpy_fast (d->opaque, s->opaque, sizeof (s->opaque)); + clib_memcpy_fast (d->opaque2, s->opaque2, sizeof (s->opaque2)); + clib_memcpy_fast (vlib_buffer_get_current (d), + vlib_buffer_get_current (s), head_end_offset); d->next_buffer = src_buffer; } vlib_buffer_advance (s, head_end_offset); @@ -760,7 +923,7 @@ vlib_buffer_attach_clone (vlib_main_t * vm, vlib_buffer_t * head, tail->total_length_not_including_first_buffer; next_segment: - __sync_add_and_fetch (&tail->n_add_refs, 1); + clib_atomic_add_fetch (&tail->n_add_refs, 1); if (tail->flags & VLIB_BUFFER_NEXT_PRESENT) { @@ -781,9 +944,7 @@ vlib_buffer_chain_init (vlib_buffer_t * first) /* The provided next_bi buffer index is appended to the end of the packet. */ always_inline vlib_buffer_t * -vlib_buffer_chain_buffer (vlib_main_t * vm, - vlib_buffer_t * first, - vlib_buffer_t * last, u32 next_bi) +vlib_buffer_chain_buffer (vlib_main_t * vm, vlib_buffer_t * last, u32 next_bi) { vlib_buffer_t *next_buffer = vlib_get_buffer (vm, next_bi); last->next_buffer = next_bi; @@ -821,8 +982,8 @@ vlib_buffer_chain_append_data (vlib_main_t * vm, u16 len = clib_min (data_len, n_buffer_bytes - last->current_length - last->current_data); - clib_memcpy (vlib_buffer_get_current (last) + last->current_length, data, - len); + clib_memcpy_fast (vlib_buffer_get_current (last) + last->current_length, + data, len); vlib_buffer_chain_increase_length (first, last, len); return len; } @@ -927,10 +1088,10 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * dst, /* Make sure buffer template is sane. */ ASSERT (fl->index == vlib_buffer_get_free_list_index (src)); - clib_memcpy (STRUCT_MARK_PTR (dst, template_start), - STRUCT_MARK_PTR (src, template_start), - STRUCT_OFFSET_OF (vlib_buffer_t, template_end) - - STRUCT_OFFSET_OF (vlib_buffer_t, template_start)); + clib_memcpy_fast (STRUCT_MARK_PTR (dst, template_start), + STRUCT_MARK_PTR (src, template_start), + STRUCT_OFFSET_OF (vlib_buffer_t, template_end) - + STRUCT_OFFSET_OF (vlib_buffer_t, template_start)); /* Not in the first 16 octets. */ dst->n_add_refs = src->n_add_refs; @@ -990,7 +1151,7 @@ vlib_validate_buffer_in_use (vlib_buffer_t * b, u32 expected) oldheap = clib_mem_set_heap (vlib_buffer_state_heap); - while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1)) + while (clib_atomic_test_and_set (vlib_buffer_state_validation_lock)) ; p = hash_get (vlib_buffer_state_validation_hash, b); @@ -1033,7 +1194,7 @@ vlib_validate_buffer_set_in_use (vlib_buffer_t * b, u32 expected) oldheap = clib_mem_set_heap (vlib_buffer_state_heap); - while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1)) + while (clib_atomic_test_and_set (vlib_buffer_state_validation_lock)) ; hash_set (vlib_buffer_state_validation_hash, b, expected); @@ -1079,9 +1240,9 @@ vlib_buffer_chain_compress (vlib_main_t * vm, vlib_buffer_t *second = vlib_get_buffer (vm, first->next_buffer); u32 need = want_first_size - first->current_length; u32 amount_to_copy = clib_min (need, second->current_length); - clib_memcpy (((u8 *) vlib_buffer_get_current (first)) + - first->current_length, - vlib_buffer_get_current (second), amount_to_copy); + clib_memcpy_fast (((u8 *) vlib_buffer_get_current (first)) + + first->current_length, + vlib_buffer_get_current (second), amount_to_copy); first->current_length += amount_to_copy; vlib_buffer_advance (second, amount_to_copy); if (first->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) @@ -1106,6 +1267,70 @@ vlib_buffer_chain_compress (vlib_main_t * vm, (first->flags & VLIB_BUFFER_NEXT_PRESENT)); } +/** + * @brief linearize buffer chain - the first buffer is filled, if needed, + * buffers are allocated and filled, returns free space in last buffer or + * negative on failure + * + * @param[in] vm - vlib_main + * @param[in,out] first - first buffer in chain + */ +always_inline int +vlib_buffer_chain_linearize (vlib_main_t * vm, vlib_buffer_t * first) +{ + vlib_buffer_t *b = first; + vlib_buffer_free_list_t *fl = + vlib_buffer_get_free_list (vm, vlib_buffer_get_free_list_index (b)); + u32 buf_len = fl->n_data_bytes; + // free buffer chain starting from the second buffer + int free_count = (b->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; + u32 chain_to_free = b->next_buffer; + + u32 len = vlib_buffer_length_in_chain (vm, b); + u32 free_len = buf_len - b->current_data - b->current_length; + int alloc_len = clib_max (len - free_len, 0); //use the free len in the first buffer + int n_buffers = (alloc_len + buf_len - 1) / buf_len; + u32 new_buffers[n_buffers]; + + u32 n_alloc = vlib_buffer_alloc (vm, new_buffers, n_buffers); + if (n_alloc != n_buffers) + { + vlib_buffer_free_no_next (vm, new_buffers, n_alloc); + return -1; + } + + vlib_buffer_t *s = b; + while (s->flags & VLIB_BUFFER_NEXT_PRESENT) + { + s = vlib_get_buffer (vm, s->next_buffer); + int d_free_len = buf_len - b->current_data - b->current_length; + ASSERT (d_free_len >= 0); + // chain buf and split write + u32 copy_len = clib_min (d_free_len, s->current_length); + u8 *d = vlib_buffer_put_uninit (b, copy_len); + clib_memcpy (d, vlib_buffer_get_current (s), copy_len); + int rest = s->current_length - copy_len; + if (rest > 0) + { + //prev buf is full + ASSERT (vlib_buffer_get_tail (b) == b->data + buf_len); + ASSERT (n_buffers > 0); + b = vlib_buffer_chain_buffer (vm, b, new_buffers[--n_buffers]); + //make full use of the new buffers + b->current_data = 0; + d = vlib_buffer_put_uninit (b, rest); + clib_memcpy (d, vlib_buffer_get_current (s) + copy_len, rest); + } + } + vlib_buffer_free (vm, &chain_to_free, free_count); + b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; + if (b == first) /* no buffers addeed */ + b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + ASSERT (len == vlib_buffer_length_in_chain (vm, first)); + ASSERT (n_buffers == 0); + return buf_len - b->current_data - b->current_length; +} + #endif /* included_vlib_buffer_funcs_h */ /*