static_always_inline void
vlib_buffer_copy_indices (u32 * dst, u32 * src, u32 n_indices)
{
- clib_memcpy_fast (dst, src, n_indices * sizeof (u32));
+#if defined(CLIB_HAVE_VEC512)
+ while (n_indices >= 16)
+ {
+ u32x16_store_unaligned (u32x16_load_unaligned (src), dst);
+ dst += 16;
+ src += 16;
+ n_indices -= 16;
+ }
+#endif
+
+#if defined(CLIB_HAVE_VEC256)
+ while (n_indices >= 8)
+ {
+ u32x8_store_unaligned (u32x8_load_unaligned (src), dst);
+ dst += 8;
+ src += 8;
+ n_indices -= 8;
+ }
+#endif
+
+#if defined(CLIB_HAVE_VEC128)
+ while (n_indices >= 4)
+ {
+ u32x4_store_unaligned (u32x4_load_unaligned (src), dst);
+ dst += 4;
+ src += 4;
+ n_indices -= 4;
+ }
+#endif
+
+ while (n_indices)
+ {
+ dst[0] = src[0];
+ dst += 1;
+ src += 1;
+ n_indices -= 1;
+ }
}
STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64);
u8 buffer_pool_index = ~0;
u32 n_queue = 0, queue[queue_size + 4];
vlib_buffer_t bt = { };
-#if defined(CLIB_HAVE_VEC128) && !__aarch64__
+#if defined(CLIB_HAVE_VEC128)
vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 };
vlib_buffer_t bpi_vec = {.buffer_pool_index = ~0 };
vlib_buffer_t flags_refs_mask = {
.flags = VLIB_BUFFER_NEXT_PRESENT,
- .ref_count = ~0
+ .ref_count = ~1
};
#endif
vlib_prefetch_buffer_header (b[6], LOAD);
vlib_prefetch_buffer_header (b[7], LOAD);
-#if defined(CLIB_HAVE_VEC128) && !__aarch64__
+#if defined(CLIB_HAVE_VEC128)
u8x16 p0, p1, p2, p3, r;
p0 = u8x16_load_unaligned (b[0]);
p1 = u8x16_load_unaligned (b[1]);
}
buffer_pool_index = b[0]->buffer_pool_index;
-#if defined(CLIB_HAVE_VEC128) && !__aarch64__
+#if defined(CLIB_HAVE_VEC128)
bpi_vec.buffer_pool_index = buffer_pool_index;
#endif
bp = vlib_get_buffer_pool (vm, buffer_pool_index);