1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #ifndef included_vector_count_equal_h
6 #define included_vector_count_equal_h
7 #include <vppinfra/clib.h>
9 static_always_inline uword
10 clib_count_equal_u64 (u64 *data, uword max_count)
17 if (data[0] != data[1])
23 #if defined(CLIB_HAVE_VEC256)
24 u64x4 splat = u64x4_splat (first);
25 while (count + 3 < max_count)
28 bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
29 if (bmp != 0xffffffff)
31 count += count_trailing_zeros (~bmp) / 8;
41 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
42 (data[2] ^ first) | (data[3] ^ first)) == 0)
48 while (count < max_count && (data[0] == first))
56 static_always_inline uword
57 clib_count_equal_u32 (u32 *data, uword max_count)
64 if (data[0] != data[1])
70 #if defined(CLIB_HAVE_VEC512)
71 u32x16 splat = u32x16_splat (first);
72 while (count + 15 < max_count)
75 bmp = u32x16_is_equal_mask (u32x16_load_unaligned (data), splat);
76 if (bmp != pow2_mask (16))
77 return count + count_trailing_zeros (~bmp);
82 if (count == max_count)
86 u32 mask = pow2_mask (max_count - count);
88 u32x16_is_equal_mask (u32x16_mask_load_zero (data, mask), splat) &
90 return count + count_trailing_zeros (~bmp);
92 #elif defined(CLIB_HAVE_VEC256)
93 u32x8 splat = u32x8_splat (first);
94 while (count + 7 < max_count)
98 bmp = u32x8_is_equal_mask (u32x8_load_unaligned (data), splat);
99 if (bmp != pow2_mask (8))
100 return count + count_trailing_zeros (~bmp);
102 bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
103 if (bmp != 0xffffffff)
104 return count + count_trailing_zeros (~bmp) / 4;
110 if (count == max_count)
112 #if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
115 u32 mask = pow2_mask (max_count - count);
117 u32x8_is_equal_mask (u32x8_mask_load_zero (data, mask), splat) & mask;
118 return count + count_trailing_zeros (~bmp);
121 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
122 u32x4 splat = u32x4_splat (first);
123 while (count + 3 < max_count)
126 bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
127 if (bmp != pow2_mask (4 * 4))
129 count += count_trailing_zeros (~bmp) / 4;
139 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
140 (data[2] ^ first) | (data[3] ^ first)) == 0)
146 while (count < max_count && (data[0] == first))
154 static_always_inline uword
155 clib_count_equal_u16 (u16 *data, uword max_count)
162 if (data[0] != data[1])
168 #if defined(CLIB_HAVE_VEC256)
169 u16x16 splat = u16x16_splat (first);
170 while (count + 15 < max_count)
173 bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
174 if (bmp != 0xffffffff)
176 count += count_trailing_zeros (~bmp) / 2;
183 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
184 u16x8 splat = u16x8_splat (first);
185 while (count + 7 < max_count)
188 bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
191 count += count_trailing_zeros (~bmp) / 2;
201 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
202 (data[2] ^ first) | (data[3] ^ first)) == 0)
208 while (count < max_count && (data[0] == first))
216 static_always_inline uword
217 clib_count_equal_u8 (u8 *data, uword max_count)
224 if (data[0] != data[1])
230 #if defined(CLIB_HAVE_VEC512)
231 u8x64 splat = u8x64_splat (first);
232 while (count + 63 < max_count)
235 bmp = u8x64_is_equal_mask (u8x64_load_unaligned (data), splat);
237 return count + count_trailing_zeros (~bmp);
242 if (count == max_count)
244 #if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
247 u64 mask = pow2_mask (max_count - count);
249 u8x64_is_equal_mask (u8x64_mask_load_zero (data, mask), splat) & mask;
250 return count + count_trailing_zeros (~bmp);
253 #elif defined(CLIB_HAVE_VEC256)
254 u8x32 splat = u8x32_splat (first);
255 while (count + 31 < max_count)
258 bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
259 if (bmp != 0xffffffff)
260 return count + count_trailing_zeros (~bmp);
265 if (count == max_count)
267 #if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
270 u32 mask = pow2_mask (max_count - count);
272 u8x32_msb_mask (u8x32_mask_load_zero (data, mask) == splat) & mask;
273 return count + count_trailing_zeros (~bmp);
276 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
277 u8x16 splat = u8x16_splat (first);
278 while (count + 15 < max_count)
281 bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
283 return count + count_trailing_zeros (~bmp);
291 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
292 (data[2] ^ first) | (data[3] ^ first)) == 0)
298 while (count < max_count && (data[0] == first))