return (u32) _mm512_movepi16_mask ((__m512i) v);
}
+/* 512-bit packs */
+#define _(f, t, fn) \
+ always_inline t t##_pack (f lo, f hi) \
+ { \
+ return (t) fn ((__m512i) lo, (__m512i) hi); \
+ }
+
+_ (i16x32, i8x64, _mm512_packs_epi16)
+_ (i16x32, u8x64, _mm512_packus_epi16)
+_ (i32x16, i16x32, _mm512_packs_epi32)
+_ (i32x16, u16x32, _mm512_packus_epi32)
+#undef _
+
static_always_inline u32x16
u32x16_byte_swap (u32x16 v)
{
#define u8x64_align_right(a, b, imm) \
(u8x64) _mm512_alignr_epi8 ((__m512i) a, (__m512i) b, imm)
+#define u64x8_align_right(a, b, imm) \
+ (u64x8) _mm512_alignr_epi64 ((__m512i) a, (__m512i) b, imm)
+
static_always_inline u32
u32x16_sum_elts (u32x16 sum16)
{
#endif
#undef _
+#ifdef CLIB_HAVE_VEC256
#define CLIB_HAVE_VEC256_COMPRESS
+#ifdef __AVX512VBMI2__
+#define CLIB_HAVE_VEC256_COMPRESS_U8_U16
+#endif
+
+#endif
+#ifdef CLIB_HAVE_VEC512
#define CLIB_HAVE_VEC512_COMPRESS
+#ifdef __AVX512VBMI2__
+#define CLIB_HAVE_VEC512_COMPRESS_U8_U16
+#endif
+
+#endif
#ifndef __AVX512VBMI2__
static_always_inline u16x16