X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvppinfra%2Fvector%2Fcompress.h;h=adb6503f7115c0524d8a7bf7f2975077e587e9da;hb=7da9b5be41395cc6355f9cf278106aae7fd9f991;hp=1d5d84e77ea0cf98706948aa30403984afd636fa;hpb=17a67218587d40541ff522c6a86f354720481fbb;p=vpp.git diff --git a/src/vppinfra/vector/compress.h b/src/vppinfra/vector/compress.h index 1d5d84e77ea..adb6503f711 100644 --- a/src/vppinfra/vector/compress.h +++ b/src/vppinfra/vector/compress.h @@ -7,6 +7,71 @@ #include #include +static_always_inline u64 * +clib_compress_u64_x64 (u64 *dst, u64 *src, u64 mask) +{ +#if defined(CLIB_HAVE_VEC512_COMPRESS) + u64x8u *sv = (u64x8u *) src; + for (int i = 0; i < 8; i++) + { + u64x8_compress_store (sv[i], mask, dst); + dst += _popcnt32 ((u8) mask); + mask >>= 8; + } +#elif defined(CLIB_HAVE_VEC256_COMPRESS) + u64x4u *sv = (u64x4u *) src; + for (int i = 0; i < 16; i++) + { + u64x4_compress_store (sv[i], mask, dst); + dst += _popcnt32 (((u8) mask) & 0x0f); + mask >>= 4; + } +#else + while (mask) + { + u16 bit = count_trailing_zeros (mask); + mask = clear_lowest_set_bit (mask); + dst++[0] = src[bit]; + } +#endif + return dst; +} + +/** \brief Compress array of 64-bit elemments into destination array based on + * mask + + @param dst destination array of u64 elements + @param src source array of u64 elements + @param mask array of u64 values representing compress mask + @param n_elts number of elements in the source array + @return number of elements stored in destionation array +*/ + +static_always_inline u32 +clib_compress_u64 (u64 *dst, u64 *src, u64 *mask, u32 n_elts) +{ + u64 *dst0 = dst; + while (n_elts >= 64) + { + if (mask[0] == ~0ULL) + { + clib_memcpy_fast (dst, src, 64 * sizeof (u64)); + dst += 64; + } + else + dst = clib_compress_u64_x64 (dst, src, mask[0]); + + mask++; + src += 64; + n_elts -= 64; + } + + if (PREDICT_TRUE (n_elts == 0)) + return dst - dst0; + + return clib_compress_u64_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0; +} + static_always_inline u32 * clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask) { @@ -14,9 +79,8 @@ clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask) u32x16u *sv = (u32x16u *) src; for (int i = 0; i < 4; i++) { - int cnt = _popcnt32 ((u16) mask); u32x16_compress_store (sv[i], mask, dst); - dst += cnt; + dst += _popcnt32 ((u16) mask); mask >>= 16; } @@ -24,9 +88,8 @@ clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask) u32x8u *sv = (u32x8u *) src; for (int i = 0; i < 8; i++) { - int cnt = _popcnt32 ((u8) mask); u32x8_compress_store (sv[i], mask, dst); - dst += cnt; + dst += _popcnt32 ((u8) mask); mask >>= 8; } #else @@ -75,4 +138,114 @@ clib_compress_u32 (u32 *dst, u32 *src, u64 *mask, u32 n_elts) return clib_compress_u32_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0; } +static_always_inline u16 * +clib_compress_u16_x64 (u16 *dst, u16 *src, u64 mask) +{ +#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) + u16x32u *sv = (u16x32u *) src; + for (int i = 0; i < 2; i++) + { + u16x32_compress_store (sv[i], mask, dst); + dst += _popcnt32 ((u32) mask); + mask >>= 32; + } +#else + while (mask) + { + u16 bit = count_trailing_zeros (mask); + mask = clear_lowest_set_bit (mask); + dst++[0] = src[bit]; + } +#endif + return dst; +} + +/** \brief Compress array of 16-bit elemments into destination array based on + * mask + + @param dst destination array of u16 elements + @param src source array of u16 elements + @param mask array of u64 values representing compress mask + @param n_elts number of elements in the source array + @return number of elements stored in destionation array +*/ + +static_always_inline u32 +clib_compress_u16 (u16 *dst, u16 *src, u64 *mask, u32 n_elts) +{ + u16 *dst0 = dst; + while (n_elts >= 64) + { + if (mask[0] == ~0ULL) + { + clib_memcpy_fast (dst, src, 64 * sizeof (u16)); + dst += 64; + } + else + dst = clib_compress_u16_x64 (dst, src, mask[0]); + + mask++; + src += 64; + n_elts -= 64; + } + + if (PREDICT_TRUE (n_elts == 0)) + return dst - dst0; + + return clib_compress_u16_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0; +} + +static_always_inline u8 * +clib_compress_u8_x64 (u8 *dst, u8 *src, u64 mask) +{ +#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) + u8x64u *sv = (u8x64u *) src; + u8x64_compress_store (sv[0], mask, dst); + dst += _popcnt64 (mask); +#else + while (mask) + { + u16 bit = count_trailing_zeros (mask); + mask = clear_lowest_set_bit (mask); + dst++[0] = src[bit]; + } +#endif + return dst; +} + +/** \brief Compress array of 8-bit elemments into destination array based on + * mask + + @param dst destination array of u8 elements + @param src source array of u8 elements + @param mask array of u64 values representing compress mask + @param n_elts number of elements in the source array + @return number of elements stored in destionation array +*/ + +static_always_inline u32 +clib_compress_u8 (u8 *dst, u8 *src, u64 *mask, u32 n_elts) +{ + u8 *dst0 = dst; + while (n_elts >= 64) + { + if (mask[0] == ~0ULL) + { + clib_memcpy_fast (dst, src, 64); + dst += 64; + } + else + dst = clib_compress_u8_x64 (dst, src, mask[0]); + + mask++; + src += 64; + n_elts -= 64; + } + + if (PREDICT_TRUE (n_elts == 0)) + return dst - dst0; + + return clib_compress_u8_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0; +} + #endif