X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvppinfra%2Fvector_avx512.h;h=a82231ac0251f912f51c16e236b7c3b9c4d62fe5;hb=6eb0f84e37d84caeb9db773976df5ab3a7e794d0;hp=2f5763e3c926a9e4aaceb854b4660f0f3ffcc3bf;hpb=ef0bac70995bb225fea8955009b34d5b823285ba;p=vpp.git diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h index 2f5763e3c92..a82231ac025 100644 --- a/src/vppinfra/vector_avx512.h +++ b/src/vppinfra/vector_avx512.h @@ -85,6 +85,19 @@ u16x32_msb_mask (u16x32 v) return (u32) _mm512_movepi16_mask ((__m512i) v); } +/* 512-bit packs */ +#define _(f, t, fn) \ + always_inline t t##_pack (f lo, f hi) \ + { \ + return (t) fn ((__m512i) lo, (__m512i) hi); \ + } + +_ (i16x32, i8x64, _mm512_packs_epi16) +_ (i16x32, u8x64, _mm512_packus_epi16) +_ (i32x16, i16x32, _mm512_packs_epi32) +_ (i32x16, u16x32, _mm512_packus_epi32) +#undef _ + static_always_inline u32x16 u32x16_byte_swap (u32x16 v) { @@ -192,6 +205,9 @@ u8x64_shuffle (u8x64 v, u8x64 m) #define u8x64_align_right(a, b, imm) \ (u8x64) _mm512_alignr_epi8 ((__m512i) a, (__m512i) b, imm) +#define u64x8_align_right(a, b, imm) \ + (u64x8) _mm512_alignr_epi64 ((__m512i) a, (__m512i) b, imm) + static_always_inline u32 u32x16_sum_elts (u32x16 sum16) { @@ -325,9 +341,17 @@ _ (u8x16, u16, _mm, __m128i, epi8) #ifdef CLIB_HAVE_VEC256 #define CLIB_HAVE_VEC256_COMPRESS +#ifdef __AVX512VBMI2__ +#define CLIB_HAVE_VEC256_COMPRESS_U8_U16 +#endif + #endif #ifdef CLIB_HAVE_VEC512 #define CLIB_HAVE_VEC512_COMPRESS +#ifdef __AVX512VBMI2__ +#define CLIB_HAVE_VEC512_COMPRESS_U8_U16 +#endif + #endif #ifndef __AVX512VBMI2__