1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #ifndef included_vector_compress_h
6 #define included_vector_compress_h
7 #include <vppinfra/clib.h>
8 #include <vppinfra/memcpy.h>
10 static_always_inline u64 *
11 clib_compress_u64_x64 (u64 *dst, u64 *src, u64 mask)
13 #if defined(CLIB_HAVE_VEC512_COMPRESS)
14 u64x8u *sv = (u64x8u *) src;
15 for (int i = 0; i < 8; i++)
17 u64x8_compress_store (sv[i], mask, dst);
18 dst += _popcnt32 ((u8) mask);
21 #elif defined(CLIB_HAVE_VEC256_COMPRESS)
22 u64x4u *sv = (u64x4u *) src;
23 for (int i = 0; i < 16; i++)
25 u64x4_compress_store (sv[i], mask, dst);
26 dst += _popcnt32 (((u8) mask) & 0x0f);
32 u16 bit = count_trailing_zeros (mask);
33 mask = clear_lowest_set_bit (mask);
40 /** \brief Compress array of 64-bit elemments into destination array based on
43 @param dst destination array of u64 elements
44 @param src source array of u64 elements
45 @param mask array of u64 values representing compress mask
46 @param n_elts number of elements in the source array
47 @return number of elements stored in destionation array
50 static_always_inline u32
51 clib_compress_u64 (u64 *dst, u64 *src, u64 *mask, u32 n_elts)
58 clib_memcpy_fast (dst, src, 64 * sizeof (u64));
62 dst = clib_compress_u64_x64 (dst, src, mask[0]);
69 if (PREDICT_TRUE (n_elts == 0))
72 return clib_compress_u64_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
75 static_always_inline u32 *
76 clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
78 #if defined(CLIB_HAVE_VEC512_COMPRESS)
79 u32x16u *sv = (u32x16u *) src;
80 for (int i = 0; i < 4; i++)
82 u32x16_compress_store (sv[i], mask, dst);
83 dst += _popcnt32 ((u16) mask);
87 #elif defined(CLIB_HAVE_VEC256_COMPRESS)
88 u32x8u *sv = (u32x8u *) src;
89 for (int i = 0; i < 8; i++)
91 u32x8_compress_store (sv[i], mask, dst);
92 dst += _popcnt32 ((u8) mask);
98 u16 bit = count_trailing_zeros (mask);
99 mask = clear_lowest_set_bit (mask);
106 /** \brief Compress array of 32-bit elemments into destination array based on
109 @param dst destination array of u32 elements
110 @param src source array of u32 elements
111 @param mask array of u64 values representing compress mask
112 @param n_elts number of elements in the source array
113 @return number of elements stored in destionation array
116 static_always_inline u32
117 clib_compress_u32 (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
122 if (mask[0] == ~0ULL)
124 clib_memcpy_u32 (dst, src, 64);
128 dst = clib_compress_u32_x64 (dst, src, mask[0]);
135 if (PREDICT_TRUE (n_elts == 0))
138 return clib_compress_u32_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
141 static_always_inline u16 *
142 clib_compress_u16_x64 (u16 *dst, u16 *src, u64 mask)
144 #if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
145 u16x32u *sv = (u16x32u *) src;
146 for (int i = 0; i < 2; i++)
148 u16x32_compress_store (sv[i], mask, dst);
149 dst += _popcnt32 ((u32) mask);
155 u16 bit = count_trailing_zeros (mask);
156 mask = clear_lowest_set_bit (mask);
163 /** \brief Compress array of 16-bit elemments into destination array based on
166 @param dst destination array of u16 elements
167 @param src source array of u16 elements
168 @param mask array of u64 values representing compress mask
169 @param n_elts number of elements in the source array
170 @return number of elements stored in destionation array
173 static_always_inline u32
174 clib_compress_u16 (u16 *dst, u16 *src, u64 *mask, u32 n_elts)
179 if (mask[0] == ~0ULL)
181 clib_memcpy_fast (dst, src, 64 * sizeof (u16));
185 dst = clib_compress_u16_x64 (dst, src, mask[0]);
192 if (PREDICT_TRUE (n_elts == 0))
195 return clib_compress_u16_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
198 static_always_inline u8 *
199 clib_compress_u8_x64 (u8 *dst, u8 *src, u64 mask)
201 #if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
202 u8x64u *sv = (u8x64u *) src;
203 u8x64_compress_store (sv[0], mask, dst);
204 dst += _popcnt64 (mask);
208 u16 bit = count_trailing_zeros (mask);
209 mask = clear_lowest_set_bit (mask);
216 /** \brief Compress array of 8-bit elemments into destination array based on
219 @param dst destination array of u8 elements
220 @param src source array of u8 elements
221 @param mask array of u64 values representing compress mask
222 @param n_elts number of elements in the source array
223 @return number of elements stored in destionation array
226 static_always_inline u32
227 clib_compress_u8 (u8 *dst, u8 *src, u64 *mask, u32 n_elts)
232 if (mask[0] == ~0ULL)
234 clib_memcpy_fast (dst, src, 64);
238 dst = clib_compress_u8_x64 (dst, src, mask[0]);
245 if (PREDICT_TRUE (n_elts == 0))
248 return clib_compress_u8_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;