vppinfra: add compress functions for u64, u16 and u8

[vpp.git] / src / vppinfra / vector_avx512.h
diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h

index a688bae..5da4901 100644 (file)
--- a/src/vppinfra/vector_avx512.h
+++ b/src/vppinfra/vector_avx512.h
@@ -85,6 +85,19 @@ u16x32_msb_mask (u16x32 v)
    return (u32) _mm512_movepi16_mask ((__m512i) v);
  }
  
+/* 512-bit packs */
+#define _(f, t, fn)                                                           \
+  always_inline t t##_pack (f lo, f hi)                                       \
+  {                                                                           \
+    return (t) fn ((__m512i) lo, (__m512i) hi);                               \
+  }
+
+_ (i16x32, i8x64, _mm512_packs_epi16)
+_ (i16x32, u8x64, _mm512_packus_epi16)
+_ (i32x16, i16x32, _mm512_packs_epi32)
+_ (i32x16, u16x32, _mm512_packus_epi32)
+#undef _
+
  static_always_inline u32x16
  u32x16_byte_swap (u32x16 v)
  {
@@ -323,8 +336,20 @@ _ (u8x16, u16, _mm, __m128i, epi8)
  #endif
  #undef _
  
+#ifdef CLIB_HAVE_VEC256
  #define CLIB_HAVE_VEC256_COMPRESS
+#ifdef __AVX512VBMI2__
+#define CLIB_HAVE_VEC256_COMPRESS_U8_U16
+#endif
+
+#endif
+#ifdef CLIB_HAVE_VEC512
  #define CLIB_HAVE_VEC512_COMPRESS
+#ifdef __AVX512VBMI2__
+#define CLIB_HAVE_VEC512_COMPRESS_U8_U16
+#endif
+
+#endif
  
  #ifndef __AVX512VBMI2__
  static_always_inline u16x16