u8 buffer_pool_index = ~0;
u32 n_queue = 0, queue[queue_size + 4];
vlib_buffer_t bt = { };
-#if defined(CLIB_HAVE_VEC128) && !__aarch64__
+#if defined(CLIB_HAVE_VEC128)
vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 };
vlib_buffer_t bpi_vec = {.buffer_pool_index = ~0 };
vlib_buffer_t flags_refs_mask = {
vlib_prefetch_buffer_header (b[6], LOAD);
vlib_prefetch_buffer_header (b[7], LOAD);
-#if defined(CLIB_HAVE_VEC128) && !__aarch64__
+#if defined(CLIB_HAVE_VEC128)
u8x16 p0, p1, p2, p3, r;
p0 = u8x16_load_unaligned (b[0]);
p1 = u8x16_load_unaligned (b[1]);
}
buffer_pool_index = b[0]->buffer_pool_index;
-#if defined(CLIB_HAVE_VEC128) && !__aarch64__
+#if defined(CLIB_HAVE_VEC128)
bpi_vec.buffer_pool_index = buffer_pool_index;
#endif
bp = vlib_get_buffer_pool (vm, buffer_pool_index);
#define i16x8_sub_saturate(a,b) vsubq_s16(a,b)
/* Dummy. Aid making uniform macros */
#define vreinterpretq_u8_u8(a) a
+/* Implement the missing intrinsics to make uniform macros */
+#define vminvq_u64(x) \
+({ \
+ u64 x0 = vgetq_lane_u64(x, 0); \
+ u64 x1 = vgetq_lane_u64(x, 1); \
+ x0 < x1 ? x0 : x1; \
+})
/* Converts all ones/zeros compare mask to bitmap. */
always_inline u32
\
static_always_inline int \
t##s##x##c##_is_all_zero (t##s##x##c x) \
-{ return !(vaddvq_##i (x)); } \
+{ return !!(vminvq_u##s (vceqq_##i (vdupq_n_##i(0), x))); } \
\
static_always_inline int \
t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
-{ return t##s##x##c##_is_all_zero (a ^ b); } \
+{ return !!(vminvq_u##s (vceqq_##i (a, b))); } \
\
static_always_inline int \
t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \