X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Fbuffer_funcs.h;h=bb090f2c195f8aa76209a96ad4ca3ddfc58062b9;hb=8ed2d524aa401a89547a1e059528f8a34bb8895f;hp=cc16fa71a76f52b701e2ce6e64030e3be4fac28b;hpb=0e209242d09f470240d0014c4c63a612a1b284ac;p=vpp.git diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index cc16fa71a76..bb090f2c195 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -101,7 +101,43 @@ vlib_buffer_get_default_data_size (vlib_main_t * vm) static_always_inline void vlib_buffer_copy_indices (u32 * dst, u32 * src, u32 n_indices) { - clib_memcpy_fast (dst, src, n_indices * sizeof (u32)); +#if defined(CLIB_HAVE_VEC512) + while (n_indices >= 16) + { + u32x16_store_unaligned (u32x16_load_unaligned (src), dst); + dst += 16; + src += 16; + n_indices -= 16; + } +#endif + +#if defined(CLIB_HAVE_VEC256) + while (n_indices >= 8) + { + u32x8_store_unaligned (u32x8_load_unaligned (src), dst); + dst += 8; + src += 8; + n_indices -= 8; + } +#endif + +#if defined(CLIB_HAVE_VEC128) + while (n_indices >= 4) + { + u32x4_store_unaligned (u32x4_load_unaligned (src), dst); + dst += 4; + src += 4; + n_indices -= 4; + } +#endif + + while (n_indices) + { + dst[0] = src[0]; + dst += 1; + src += 1; + n_indices -= 1; + } } STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64); @@ -676,7 +712,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, u8 buffer_pool_index = ~0; u32 n_queue = 0, queue[queue_size + 4]; vlib_buffer_t bt = { }; -#if defined(CLIB_HAVE_VEC128) && !__aarch64__ +#if defined(CLIB_HAVE_VEC128) vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 }; vlib_buffer_t bpi_vec = {.buffer_pool_index = ~0 }; vlib_buffer_t flags_refs_mask = { @@ -701,7 +737,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, vlib_prefetch_buffer_header (b[6], LOAD); vlib_prefetch_buffer_header (b[7], LOAD); -#if defined(CLIB_HAVE_VEC128) && !__aarch64__ +#if defined(CLIB_HAVE_VEC128) u8x16 p0, p1, p2, p3, r; p0 = u8x16_load_unaligned (b[0]); p1 = u8x16_load_unaligned (b[1]); @@ -779,7 +815,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, } buffer_pool_index = b[0]->buffer_pool_index; -#if defined(CLIB_HAVE_VEC128) && !__aarch64__ +#if defined(CLIB_HAVE_VEC128) bpi_vec.buffer_pool_index = buffer_pool_index; #endif bp = vlib_get_buffer_pool (vm, buffer_pool_index); @@ -802,7 +838,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, n_queue = 0; } - if (flags & VLIB_BUFFER_NEXT_PRESENT) + if (maybe_next && (flags & VLIB_BUFFER_NEXT_PRESENT)) { bi = next; goto next_in_chain; @@ -918,13 +954,18 @@ vlib_buffer_free_from_ring_no_next (vlib_main_t * vm, u32 * ring, u32 start, int vlib_buffer_add_data (vlib_main_t * vm, u32 * buffer_index, void *data, u32 n_data_bytes); +/* Define vlib_buffer and vnet_buffer flags bits preserved for copy/clone */ +#define VLIB_BUFFER_COPY_CLONE_FLAGS_MASK \ + (VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID | \ + VLIB_BUFFER_IS_TRACED | ~VLIB_BUFFER_FLAGS_ALL) + /* duplicate all buffers in chain */ always_inline vlib_buffer_t * vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b) { vlib_buffer_t *s, *d, *fd; uword n_alloc, n_buffers = 1; - u32 flag_mask = VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID; + u32 flag_mask = VLIB_BUFFER_COPY_CLONE_FLAGS_MASK; int i; s = b; @@ -997,6 +1038,31 @@ vlib_buffer_copy_no_chain (vlib_main_t * vm, vlib_buffer_t * b, u32 * di) return d; } +/* \brief Move packet from current position to offset position in buffer. + Only work for small packet using one buffer with room to fit the move + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (vlib_buffer_t *) pointer to buffer + @param offset - (i16) position to move the packet in buffer + */ +always_inline void +vlib_buffer_move (vlib_main_t * vm, vlib_buffer_t * b, i16 offset) +{ + ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT (offset + VLIB_BUFFER_PRE_DATA_SIZE >= 0); + ASSERT (offset + b->current_length < + vlib_buffer_get_default_data_size (vm)); + + u8 *source = vlib_buffer_get_current (b); + b->current_data = offset; + u8 *destination = vlib_buffer_get_current (b); + u16 length = b->current_length; + + if (source + length <= destination) /* no overlap */ + clib_memcpy_fast (destination, source, length); + else + memmove (destination, source, length); +} + /** \brief Create a maximum of 256 clones of buffer and store them in the supplied array @@ -1006,12 +1072,14 @@ vlib_buffer_copy_no_chain (vlib_main_t * vm, vlib_buffer_t * b, u32 * di) @param n_buffers - (u16) number of buffer clones requested (<=256) @param head_end_offset - (u16) offset relative to current position where packet head ends + @param offset - (i16) copy packet head at current position if 0, + else at offset position to change headroom space as specified @return - (u16) number of buffers actually cloned, may be less than the number requested or zero */ always_inline u16 vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, - u16 n_buffers, u16 head_end_offset) + u16 n_buffers, u16 head_end_offset, i16 offset) { u16 i; vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer); @@ -1019,10 +1087,16 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, ASSERT (s->ref_count == 1); ASSERT (n_buffers); ASSERT (n_buffers <= 256); + ASSERT (offset + VLIB_BUFFER_PRE_DATA_SIZE >= 0); + ASSERT ((offset + head_end_offset) < + vlib_buffer_get_default_data_size (vm)); if (s->current_length <= head_end_offset + CLIB_CACHE_LINE_BYTES * 2) { buffers[0] = src_buffer; + if (offset) + vlib_buffer_move (vm, s, offset); + for (i = 1; i < n_buffers; i++) { vlib_buffer_t *d; @@ -1035,7 +1109,7 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, return n_buffers; } - if (PREDICT_FALSE (n_buffers == 1)) + if (PREDICT_FALSE ((n_buffers == 1) && (offset == 0))) { buffers[0] = src_buffer; return 1; @@ -1047,7 +1121,11 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, for (i = 0; i < n_buffers; i++) { vlib_buffer_t *d = vlib_get_buffer (vm, buffers[i]); - d->current_data = s->current_data; + if (offset) + d->current_data = offset; + else + d->current_data = s->current_data; + d->current_length = head_end_offset; ASSERT (d->buffer_pool_index == s->buffer_pool_index); @@ -1058,8 +1136,8 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, d->total_length_not_including_first_buffer += s->total_length_not_including_first_buffer; } - d->flags = s->flags | VLIB_BUFFER_NEXT_PRESENT; - d->flags &= ~VLIB_BUFFER_EXT_HDR_VALID; + d->flags = (s->flags & VLIB_BUFFER_COPY_CLONE_FLAGS_MASK) | + VLIB_BUFFER_NEXT_PRESENT; clib_memcpy_fast (d->opaque, s->opaque, sizeof (s->opaque)); clib_memcpy_fast (d->opaque2, s->opaque2, sizeof (s->opaque2)); clib_memcpy_fast (vlib_buffer_get_current (d), @@ -1086,12 +1164,14 @@ vlib_buffer_clone_256 (vlib_main_t * vm, u32 src_buffer, u32 * buffers, @param n_buffers - (u16) number of buffer clones requested (<=256) @param head_end_offset - (u16) offset relative to current position where packet head ends + @param offset - (i16) copy packet head at current position if 0, + else at offset position to change headroom space as specified @return - (u16) number of buffers actually cloned, may be less than the number requested or zero */ always_inline u16 -vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, - u16 n_buffers, u16 head_end_offset) +vlib_buffer_clone_at_offset (vlib_main_t * vm, u32 src_buffer, u32 * buffers, + u16 n_buffers, u16 head_end_offset, i16 offset) { vlib_buffer_t *s = vlib_get_buffer (vm, src_buffer); u16 n_cloned = 0; @@ -1103,16 +1183,36 @@ vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, n_cloned += vlib_buffer_clone_256 (vm, vlib_get_buffer_index (vm, copy), (buffers + n_cloned), - 256, head_end_offset); + 256, head_end_offset, offset); n_buffers -= 256; } n_cloned += vlib_buffer_clone_256 (vm, src_buffer, buffers + n_cloned, - n_buffers, head_end_offset); + n_buffers, head_end_offset, offset); return n_cloned; } +/** \brief Create multiple clones of buffer and store them + in the supplied array + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param src_buffer - (u32) source buffer index + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u16) number of buffer clones requested (<=256) + @param head_end_offset - (u16) offset relative to current position + where packet head ends + @return - (u16) number of buffers actually cloned, may be + less than the number requested or zero +*/ +always_inline u16 +vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, + u16 n_buffers, u16 head_end_offset) +{ + return vlib_buffer_clone_at_offset (vm, src_buffer, buffers, n_buffers, + head_end_offset, 0); +} + /** \brief Attach cloned tail to the buffer @param vm - (vlib_main_t *) vlib main data structure pointer @@ -1321,13 +1421,19 @@ vlib_buffer_chain_linearize (vlib_main_t * vm, vlib_buffer_t * b) if (dst_left == 0) { - if (db != first) - db->current_data = 0; db->current_length = dp - (u8 *) vlib_buffer_get_current (db); ASSERT (db->flags & VLIB_BUFFER_NEXT_PRESENT); db = vlib_get_buffer (vm, db->next_buffer); dst_left = data_size; - dp = db->data; + if (db->current_data > 0) + { + db->current_data = 0; + } + else + { + dst_left += -db->current_data; + } + dp = vlib_buffer_get_current (db); } while (src_left == 0)