1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #ifndef included_vector_compress_h
6 #define included_vector_compress_h
7 #include <vppinfra/clib.h>
8 #include <vppinfra/memcpy.h>
10 static_always_inline u64 *
11 clib_compress_u64_x64 (u64 *dst, u64 *src, u64 mask)
13 #if defined(CLIB_HAVE_VEC512_COMPRESS)
14 u64x8u *sv = (u64x8u *) src;
15 for (int i = 0; i < 8; i++)
17 u64x8_compress_store (sv[i], mask, dst);
18 dst += _popcnt32 ((u8) mask);
21 #elif defined(CLIB_HAVE_VEC256_COMPRESS)
22 u64x4u *sv = (u64x4u *) src;
23 for (int i = 0; i < 16; i++)
25 u64x4_compress_store (sv[i], mask, dst);
26 dst += _popcnt32 (((u8) mask) & 0x0f);
31 foreach_set_bit_index (i, mask)
37 static_always_inline u64 *
38 clib_compress_u64_x64_masked (u64 *dst, u64 *src, u64 mask)
40 #if defined(CLIB_HAVE_VEC512_COMPRESS) && \
41 defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
42 u64x8u *sv = (u64x8u *) src;
43 for (int i = 0; i < 8; i++)
45 u64x8u s = u64x8_mask_load_zero (&sv[i], mask);
46 u64x8_compress_store (s, mask, dst);
47 dst += _popcnt32 ((u8) mask);
50 #elif defined(CLIB_HAVE_VEC256_COMPRESS) && \
51 defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
52 u64x4u *sv = (u64x4u *) src;
53 for (int i = 0; i < 16; i++)
55 u64x4u s = u64x4_mask_load_zero (&sv[i], mask);
56 u64x4_compress_store (s, mask, dst);
57 dst += _popcnt32 (((u8) mask) & 0x0f);
62 foreach_set_bit_index (i, mask)
68 /** \brief Compress array of 64-bit elemments into destination array based on
71 @param dst destination array of u64 elements
72 @param src source array of u64 elements
73 @param mask array of u64 values representing compress mask
74 @param n_elts number of elements in the source array
75 @return number of elements stored in destionation array
78 static_always_inline u32
79 clib_compress_u64 (u64 *dst, u64 *src, u64 *mask, u32 n_elts)
86 clib_memcpy_fast (dst, src, 64 * sizeof (u64));
90 dst = clib_compress_u64_x64 (dst, src, mask[0]);
97 if (PREDICT_TRUE (n_elts == 0))
100 return clib_compress_u64_x64_masked (dst, src,
101 mask[0] & pow2_mask (n_elts)) -
105 static_always_inline u32 *
106 clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
108 #if defined(CLIB_HAVE_VEC512_COMPRESS)
109 u32x16u *sv = (u32x16u *) src;
110 for (int i = 0; i < 4; i++)
112 u32x16_compress_store (sv[i], mask, dst);
113 dst += _popcnt32 ((u16) mask);
117 #elif defined(CLIB_HAVE_VEC256_COMPRESS)
118 u32x8u *sv = (u32x8u *) src;
119 for (int i = 0; i < 8; i++)
121 u32x8_compress_store (sv[i], mask, dst);
122 dst += _popcnt32 ((u8) mask);
127 foreach_set_bit_index (i, mask)
133 static_always_inline u32 *
134 clib_compress_u32_x64_masked (u32 *dst, u32 *src, u64 mask)
136 #if defined(CLIB_HAVE_VEC512_COMPRESS) && \
137 defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
138 u32x16u *sv = (u32x16u *) src;
139 for (int i = 0; i < 4; i++)
141 u32x16u s = u32x16_mask_load_zero (&sv[i], mask);
142 u32x16_compress_store (s, mask, dst);
143 dst += _popcnt32 ((u16) mask);
147 #elif defined(CLIB_HAVE_VEC256_COMPRESS) && \
148 defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
149 u32x8u *sv = (u32x8u *) src;
150 for (int i = 0; i < 8; i++)
152 u32x8u s = u32x8_mask_load_zero (&sv[i], mask);
153 u32x8_compress_store (s, mask, dst);
154 dst += _popcnt32 ((u8) mask);
159 foreach_set_bit_index (i, mask)
165 /** \brief Compress array of 32-bit elemments into destination array based on
168 @param dst destination array of u32 elements
169 @param src source array of u32 elements
170 @param mask array of u64 values representing compress mask
171 @param n_elts number of elements in the source array
172 @return number of elements stored in destionation array
175 static_always_inline u32
176 clib_compress_u32 (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
181 if (mask[0] == ~0ULL)
183 clib_memcpy_u32 (dst, src, 64);
187 dst = clib_compress_u32_x64 (dst, src, mask[0]);
194 if (PREDICT_TRUE (n_elts == 0))
197 return clib_compress_u32_x64_masked (dst, src,
198 mask[0] & pow2_mask (n_elts)) -
202 static_always_inline u16 *
203 clib_compress_u16_x64 (u16 *dst, u16 *src, u64 mask)
205 #if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
206 u16x32u *sv = (u16x32u *) src;
207 for (int i = 0; i < 2; i++)
209 u16x32_compress_store (sv[i], mask, dst);
210 dst += _popcnt32 ((u32) mask);
215 foreach_set_bit_index (i, mask)
221 static_always_inline u16 *
222 clib_compress_u16_x64_masked (u16 *dst, u16 *src, u64 mask)
224 #if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) && \
225 defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
226 u16x32u *sv = (u16x32u *) src;
227 for (int i = 0; i < 2; i++)
229 u16x32u s = u16x32_mask_load_zero (&sv[i], mask);
230 u16x32_compress_store (s, mask, dst);
231 dst += _popcnt32 ((u32) mask);
236 foreach_set_bit_index (i, mask)
242 /** \brief Compress array of 16-bit elemments into destination array based on
245 @param dst destination array of u16 elements
246 @param src source array of u16 elements
247 @param mask array of u64 values representing compress mask
248 @param n_elts number of elements in the source array
249 @return number of elements stored in destionation array
252 static_always_inline u32
253 clib_compress_u16 (u16 *dst, u16 *src, u64 *mask, u32 n_elts)
258 if (mask[0] == ~0ULL)
260 clib_memcpy_fast (dst, src, 64 * sizeof (u16));
264 dst = clib_compress_u16_x64 (dst, src, mask[0]);
271 if (PREDICT_TRUE (n_elts == 0))
274 return clib_compress_u16_x64_masked (dst, src,
275 mask[0] & pow2_mask (n_elts)) -
279 static_always_inline u8 *
280 clib_compress_u8_x64 (u8 *dst, u8 *src, u64 mask)
282 #if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
283 u8x64u *sv = (u8x64u *) src;
284 u8x64_compress_store (sv[0], mask, dst);
285 dst += _popcnt64 (mask);
288 foreach_set_bit_index (i, mask)
294 static_always_inline u8 *
295 clib_compress_u8_x64_masked (u8 *dst, u8 *src, u64 mask)
297 #if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) && \
298 defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
299 u8x64u *sv = (u8x64u *) src;
300 u8x64u s = u8x64_mask_load_zero (sv, mask);
301 u8x64_compress_store (s, mask, dst);
302 dst += _popcnt64 (mask);
305 foreach_set_bit_index (i, mask)
311 /** \brief Compress array of 8-bit elemments into destination array based on
314 @param dst destination array of u8 elements
315 @param src source array of u8 elements
316 @param mask array of u64 values representing compress mask
317 @param n_elts number of elements in the source array
318 @return number of elements stored in destionation array
321 static_always_inline u32
322 clib_compress_u8 (u8 *dst, u8 *src, u64 *mask, u32 n_elts)
327 if (mask[0] == ~0ULL)
329 clib_memcpy_fast (dst, src, 64);
333 dst = clib_compress_u8_x64 (dst, src, mask[0]);
340 if (PREDICT_TRUE (n_elts == 0))
343 return clib_compress_u8_x64_masked (dst, src, mask[0] & pow2_mask (n_elts)) -