From 8c3f8a29374deed5a67a5fd084f186413f6183d7 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 17 May 2018 21:12:13 +0200 Subject: [PATCH] Add vlib_buffer_enqueue_to_next inline function Change-Id: I1042c0fe179b57a00ce99c8d62cb1bdbe24d9184 Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/node.c | 78 ++-------------------------- src/vlib/buffer_node.h | 115 +++++++++++++++++++++++++++++++++++++++++ src/vppinfra/vector_avx2.h | 6 +++ src/vppinfra/vector_avx512.h | 11 +++- src/vppinfra/vector_sse42.h | 6 +++ 5 files changed, 140 insertions(+), 76 deletions(-) diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 3311ac4830d..a1acc1f0d1e 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -548,81 +548,9 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, vlib_get_buffer_indices_with_offset (vm, (void **) ptd->mbufs, ptd->buffers, n_rx_packets, sizeof (struct rte_mbuf)); - n_left = n_rx_packets; - next = ptd->next; - buffers = ptd->buffers; - mb = ptd->mbufs; - while (n_left) - { - u32 n_left_to_next; - u32 *to_next; - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); -#ifdef CLIB_HAVE_VEC256 - while (n_left >= 16 && n_left_to_next >= 16) - { - u16x16 next16 = u16x16_load_unaligned (next); - if (u16x16_is_all_equal (next16, next_index)) - { - clib_memcpy (to_next, buffers, 16 * sizeof (u32)); - to_next += 16; - n_left_to_next -= 16; - buffers += 16; - n_left -= 16; - next += 16; - mb += 16; - } - else - { - clib_memcpy (to_next, buffers, 4 * sizeof (u32)); - to_next += 4; - n_left_to_next -= 4; - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, buffers[0], - buffers[1], buffers[2], - buffers[3], next[0], next[1], - next[2], next[3]); - /* next */ - buffers += 4; - n_left -= 4; - next += 4; - mb += 4; - } - } -#endif - while (n_left >= 4 && n_left_to_next >= 4) - { - clib_memcpy (to_next, buffers, 4 * sizeof (u32)); - to_next += 4; - n_left_to_next -= 4; - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, buffers[0], - buffers[1], buffers[2], buffers[3], - next[0], next[1], next[2], - next[3]); - /* next */ - buffers += 4; - n_left -= 4; - next += 4; - mb += 4; - } - while (n_left && n_left_to_next) - { - clib_memcpy (to_next, buffers, 1 * sizeof (u32)); - to_next += 1; - n_left_to_next -= 1; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, buffers[0], - next[0]); - /* next */ - buffers += 1; - n_left -= 1; - next += 1; - mb += 1; - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } + + vlib_buffer_enqueue_to_next (vm, node, ptd->buffers, ptd->next, + n_rx_packets); /* packet trace if enabled */ if ((n_trace = vlib_get_trace_count (vm, node))) diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h index f9e8b3fa4fe..1c4f4e7eea7 100644 --- a/src/vlib/buffer_node.h +++ b/src/vlib/buffer_node.h @@ -328,6 +328,121 @@ generic_buffer_node_inline (vlib_main_t * vm, return frame->n_vectors; } +static_always_inline void +vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node, + u32 * buffers, u16 * nexts, uword count) +{ + u32 *to_next, n_left_to_next, max; + u16 next_index; + + next_index = nexts[0]; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + max = clib_min (n_left_to_next, count); + + while (count) + { + u32 n_enqueued; + if ((nexts[0] != next_index) || n_left_to_next == 0) + { + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + next_index = nexts[0]; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + max = clib_min (n_left_to_next, count); + } +#if defined(CLIB_HAVE_VEC512) + u16x32 next32 = u16x32_load_unaligned (nexts); + next32 = (next32 == u16x32_splat (next32[0])); + u64 bitmap = u16x32_msb_mask (next32); + n_enqueued = count_trailing_zeros (~bitmap); +#elif defined(CLIB_HAVE_VEC256) + u16x16 next16 = u16x16_load_unaligned (nexts); + next16 = (next16 == u16x16_splat (next16[0])); + u64 bitmap = u8x32_msb_mask ((u8x32) next16); + n_enqueued = count_trailing_zeros (~bitmap) / 2; +#elif defined(CLIB_HAVE_VEC128) + u16x8 next8 = u16x8_load_unaligned (nexts); + next8 = (next8 == u16x8_splat (next8[0])); + u64 bitmap = u8x16_msb_mask ((u8x16) next8); + n_enqueued = count_trailing_zeros (~bitmap) / 2; +#else + u16 x = 0; + x |= next_index ^ nexts[1]; + x |= next_index ^ nexts[2]; + x |= next_index ^ nexts[3]; + n_enqueued = (x == 0) ? 4 : 1; +#endif + + if (PREDICT_FALSE (n_enqueued > max)) + n_enqueued = max; + +#ifdef CLIB_HAVE_VEC512 + if (n_enqueued >= 32) + { + clib_memcpy (to_next, buffers, 32 * sizeof (u32)); + nexts += 32; + to_next += 32; + buffers += 32; + n_left_to_next -= 32; + count -= 32; + max -= 32; + continue; + } +#endif + +#ifdef CLIB_HAVE_VEC256 + if (n_enqueued >= 16) + { + clib_memcpy (to_next, buffers, 16 * sizeof (u32)); + nexts += 16; + to_next += 16; + buffers += 16; + n_left_to_next -= 16; + count -= 16; + max -= 16; + continue; + } +#endif + +#ifdef CLIB_HAVE_VEC128 + if (n_enqueued >= 8) + { + clib_memcpy (to_next, buffers, 8 * sizeof (u32)); + nexts += 8; + to_next += 8; + buffers += 8; + n_left_to_next -= 8; + count -= 8; + max -= 8; + continue; + } +#endif + + if (n_enqueued >= 4) + { + clib_memcpy (to_next, buffers, 4 * sizeof (u32)); + nexts += 4; + to_next += 4; + buffers += 4; + n_left_to_next -= 4; + count -= 4; + max -= 4; + continue; + } + + /* copy */ + to_next[0] = buffers[0]; + + /* next */ + nexts += 1; + to_next += 1; + buffers += 1; + n_left_to_next -= 1; + count -= 1; + max -= 1; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); +} + #endif /* included_vlib_buffer_node_h */ /* diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h index f651392559b..3f0b397b828 100644 --- a/src/vppinfra/vector_avx2.h +++ b/src/vppinfra/vector_avx2.h @@ -81,6 +81,12 @@ u32x8_insert_hi (u32x8 v1, u32x4 v2) return (u32x8) _mm256_inserti128_si256 ((__m256i) v1, (__m128i) v2, 1); } +static_always_inline u32 +u8x32_msb_mask (u8x32 v) +{ + return _mm256_movemask_epi8 ((__m256i) v); +} + /* _extend_to_ */ /* *INDENT-OFF* */ #define _(f,t,i) \ diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h index ac4c09b8375..c1b7c42a260 100644 --- a/src/vppinfra/vector_avx512.h +++ b/src/vppinfra/vector_avx512.h @@ -27,6 +27,7 @@ _(f,32,8,ps) _(f,64,4,pd) /* splat, load_unaligned, store_unaligned */ +/* *INDENT-OFF* */ #define _(t, s, c, i) \ static_always_inline t##s##x##c \ t##s##x##c##_splat (t##s x) \ @@ -43,7 +44,15 @@ t##s##x##c##_store_unaligned (t##s##x##c v, void *p) \ foreach_avx512_vec512i foreach_avx512_vec512u #undef _ -#endif /* included_vector_avx512_h */ +/* *INDENT-ON* */ + +static_always_inline u32 +u16x32_msb_mask (u16x32 v) +{ + return (u32) _mm512_movepi16_mask ((__m512i) v); +} + +#endif /* included_vector_avx512_h */ /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h index cf7f158b873..50aa662a925 100644 --- a/src/vppinfra/vector_sse42.h +++ b/src/vppinfra/vector_sse42.h @@ -574,6 +574,12 @@ i16x8_min_scalar (i16x8 x) return _mm_extract_epi16 ((__m128i) x, 0); } +static_always_inline u16 +u8x16_msb_mask (u8x16 v) +{ + return _mm_movemask_epi8 ((__m128i) v); +} + #undef _signed_binop #endif /* included_vector_sse2_h */ -- 2.16.6