vlib buffer access methods.
*/
+typedef void (vlib_buffer_enqueue_to_next_fn_t) (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ u32 *buffers, u16 *nexts,
+ uword count);
+typedef void (vlib_buffer_enqueue_to_single_next_fn_t) (
+ vlib_main_t *vm, vlib_node_runtime_t *node, u32 *ers, u16 next_index,
+ u32 count);
+
+typedef u32 (vlib_buffer_enqueue_to_thread_fn_t) (
+ vlib_main_t *vm, u32 frame_queue_index, u32 *buffer_indices,
+ u16 *thread_indices, u32 n_packets, int drop_on_congestion);
+
+typedef u32 (vlib_frame_queue_dequeue_fn_t) (vlib_main_t *vm,
+ vlib_frame_queue_main_t *fqm);
+
+typedef struct
+{
+ vlib_buffer_enqueue_to_next_fn_t *buffer_enqueue_to_next_fn;
+ vlib_buffer_enqueue_to_single_next_fn_t *buffer_enqueue_to_single_next_fn;
+ vlib_buffer_enqueue_to_thread_fn_t *buffer_enqueue_to_thread_fn;
+ vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
+} vlib_buffer_func_main_t;
+
+extern vlib_buffer_func_main_t vlib_buffer_func_main;
+
always_inline void
vlib_buffer_validate (vlib_main_t * vm, vlib_buffer_t * b)
{
static_always_inline void
vlib_buffer_copy_indices (u32 * dst, u32 * src, u32 n_indices)
{
-#if defined(CLIB_HAVE_VEC512)
- while (n_indices >= 16)
- {
- u32x16_store_unaligned (u32x16_load_unaligned (src), dst);
- dst += 16;
- src += 16;
- n_indices -= 16;
- }
-#endif
-
-#if defined(CLIB_HAVE_VEC256)
- while (n_indices >= 8)
- {
- u32x8_store_unaligned (u32x8_load_unaligned (src), dst);
- dst += 8;
- src += 8;
- n_indices -= 8;
- }
-#endif
-
-#if defined(CLIB_HAVE_VEC128)
- while (n_indices >= 4)
- {
- u32x4_store_unaligned (u32x4_load_unaligned (src), dst);
- dst += 4;
- src += 4;
- n_indices -= 4;
- }
-#endif
-
- while (n_indices)
- {
- dst[0] = src[0];
- dst += 1;
- src += 1;
- n_indices -= 1;
- }
+ clib_memcpy_u32 (dst, src, n_indices);
}
always_inline void
i32 offset)
{
uword buffer_mem_start = vm->buffer_main->buffer_mem_start;
-#ifdef CLIB_HAVE_VEC256
- u64x4 off = u64x4_splat (buffer_mem_start + offset);
+#ifdef CLIB_HAVE_VEC512
+ u64x8 of8 = u64x8_splat (buffer_mem_start + offset);
+ u64x4 off = u64x8_extract_lo (of8);
/* if count is not const, compiler will not unroll while loop
se we maintain two-in-parallel variant */
+ while (count >= 32)
+ {
+ u64x8 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (bi));
+ u64x8 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 8));
+ u64x8 b2 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 16));
+ u64x8 b3 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 24));
+ /* shift and add to get vlib_buffer_t pointer */
+ u64x8_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b);
+ u64x8_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 8);
+ u64x8_store_unaligned ((b2 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 16);
+ u64x8_store_unaligned ((b3 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 24);
+ b += 32;
+ bi += 32;
+ count -= 32;
+ }
while (count >= 8)
+ {
+ u64x8 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (bi));
+ /* shift and add to get vlib_buffer_t pointer */
+ u64x8_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b);
+ b += 8;
+ bi += 8;
+ count -= 8;
+ }
+#elif defined CLIB_HAVE_VEC256
+ u64x4 off = u64x4_splat (buffer_mem_start + offset);
+ /* if count is not const, compiler will not unroll while loop
+ se we maintain two-in-parallel variant */
+ while (count >= 32)
{
u64x4 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (bi));
u64x4 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 4));
+ u64x4 b2 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 8));
+ u64x4 b3 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 12));
+ u64x4 b4 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 16));
+ u64x4 b5 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 20));
+ u64x4 b6 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 24));
+ u64x4 b7 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 28));
/* shift and add to get vlib_buffer_t pointer */
u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
u64x4_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 4);
- b += 8;
- bi += 8;
- count -= 8;
+ u64x4_store_unaligned ((b2 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 8);
+ u64x4_store_unaligned ((b3 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 12);
+ u64x4_store_unaligned ((b4 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 16);
+ u64x4_store_unaligned ((b5 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 20);
+ u64x4_store_unaligned ((b6 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 24);
+ u64x4_store_unaligned ((b7 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 28);
+ b += 32;
+ bi += 32;
+ count -= 32;
}
#endif
while (count >= 4)