X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvppinfra%2Fvector_avx512.h;h=3505961dca581d07d3504f94136f9f7fd8a9500a;hb=2e5921b08;hp=8acac2a3a9f8587e4b52177c307ea1d291f75a3a;hpb=9ab2e5d8d711e678e914e4fb8d3d181f868acf3d;p=vpp.git diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h index 8acac2a3a9f..3505961dca5 100644 --- a/src/vppinfra/vector_avx512.h +++ b/src/vppinfra/vector_avx512.h @@ -98,6 +98,18 @@ _ (i32x16, i16x32, _mm512_packs_epi32) _ (i32x16, u16x32, _mm512_packus_epi32) #undef _ +static_always_inline u64x8 +u64x8_byte_swap (u64x8 v) +{ + u8x64 swap = { + 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, + }; + return (u64x8) _mm512_shuffle_epi8 ((__m512i) v, (__m512i) swap); +} + static_always_inline u32x16 u32x16_byte_swap (u32x16 v) { @@ -184,6 +196,13 @@ u8x64_xor3 (u8x64 a, u8x64 b, u8x64 c) (__m512i) c, 0x96); } +static_always_inline u64x8 +u64x8_xor3 (u64x8 a, u64x8 b, u64x8 c) +{ + return (u64x8) _mm512_ternarylogic_epi32 ((__m512i) a, (__m512i) b, + (__m512i) c, 0x96); +} + static_always_inline u8x64 u8x64_reflect_u8x16 (u8x64 x) { @@ -196,12 +215,6 @@ u8x64_reflect_u8x16 (u8x64 x) return (u8x64) _mm512_shuffle_epi8 ((__m512i) x, (__m512i) mask); } -static_always_inline u8x64 -u8x64_shuffle (u8x64 v, u8x64 m) -{ - return (u8x64) _mm512_shuffle_epi8 ((__m512i) v, (__m512i) m); -} - #define u8x64_align_right(a, b, imm) \ (u8x64) _mm512_alignr_epi8 ((__m512i) a, (__m512i) b, imm) @@ -308,6 +321,12 @@ u8x64_mask_blend (u8x64 a, u8x64 b, u64 mask) return (u8x64) _mm512_mask_blend_epi8 (mask, (__m512i) a, (__m512i) b); } +static_always_inline u8x64 +u8x64_permute (u8x64 v, u8x64 idx) +{ + return (u8x64) _mm512_permutexvar_epi8 ((__m512i) v, (__m512i) idx); +} + #define _(t, m, e, p, it) \ static_always_inline m t##_is_equal_mask (t a, t b) \ { \