1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #ifndef included_vector_count_equal_h
6 #define included_vector_count_equal_h
7 #include <vppinfra/clib.h>
9 static_always_inline uword
10 clib_count_equal_u64 (u64 *data, uword max_count)
17 if (data[0] != data[1])
23 #if defined(CLIB_HAVE_VEC256)
24 u64x4 splat = u64x4_splat (first);
25 while (count + 3 < max_count)
28 bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
29 if (bmp != 0xffffffff)
31 count += count_trailing_zeros (~bmp) / 8;
41 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
42 (data[2] ^ first) | (data[3] ^ first)) == 0)
48 while (count < max_count && (data[0] == first))
56 static_always_inline uword
57 clib_count_equal_u32 (u32 *data, uword max_count)
64 if (data[0] != data[1])
70 #if defined(CLIB_HAVE_VEC512)
71 u32x16 splat = u32x16_splat (first);
72 while (count + 15 < max_count)
75 bmp = u32x16_is_equal_mask (u32x16_load_unaligned (data), splat);
76 if (bmp != pow2_mask (16))
77 return count + count_trailing_zeros (~bmp);
82 if (count == max_count)
86 u32 mask = pow2_mask (max_count - count);
88 u32x16_is_equal_mask (u32x16_mask_load_zero (data, mask), splat);
89 return count + count_trailing_zeros (~bmp);
91 #elif defined(CLIB_HAVE_VEC256)
92 u32x8 splat = u32x8_splat (first);
93 while (count + 7 < max_count)
97 bmp = u32x8_is_equal_mask (u32x8_load_unaligned (data), splat);
98 if (bmp != pow2_mask (8))
99 return count + count_trailing_zeros (~bmp);
101 bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
102 if (bmp != 0xffffffff)
103 return count + count_trailing_zeros (~bmp) / 4;
109 if (count == max_count)
111 #if defined(CxLIB_HAVE_VEC256_MASK_LOAD_STORE)
114 u32 mask = pow2_mask (max_count - count);
115 u32 bmp = u32x8_is_equal_mask (u32x8_mask_load_zero (data, mask), splat);
116 return count + count_trailing_zeros (~bmp);
119 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
120 u32x4 splat = u32x4_splat (first);
121 while (count + 3 < max_count)
124 bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
125 if (bmp != pow2_mask (4 * 4))
127 count += count_trailing_zeros (~bmp) / 4;
137 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
138 (data[2] ^ first) | (data[3] ^ first)) == 0)
144 while (count < max_count && (data[0] == first))
152 static_always_inline uword
153 clib_count_equal_u16 (u16 *data, uword max_count)
160 if (data[0] != data[1])
166 #if defined(CLIB_HAVE_VEC256)
167 u16x16 splat = u16x16_splat (first);
168 while (count + 15 < max_count)
171 bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
172 if (bmp != 0xffffffff)
174 count += count_trailing_zeros (~bmp) / 2;
181 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
182 u16x8 splat = u16x8_splat (first);
183 while (count + 7 < max_count)
186 bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
189 count += count_trailing_zeros (~bmp) / 2;
199 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
200 (data[2] ^ first) | (data[3] ^ first)) == 0)
206 while (count < max_count && (data[0] == first))
214 static_always_inline uword
215 clib_count_equal_u8 (u8 *data, uword max_count)
222 if (data[0] != data[1])
228 #if defined(CLIB_HAVE_VEC512)
229 u8x64 splat = u8x64_splat (first);
230 while (count + 63 < max_count)
233 bmp = u8x64_is_equal_mask (u8x64_load_unaligned (data), splat);
235 return count + count_trailing_zeros (~bmp);
240 if (count == max_count)
242 #if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
245 u64 mask = pow2_mask (max_count - count);
246 u64 bmp = u8x64_is_equal_mask (u8x64_mask_load_zero (data, mask), splat);
247 return count + count_trailing_zeros (~bmp);
250 #elif defined(CLIB_HAVE_VEC256)
251 u8x32 splat = u8x32_splat (first);
252 while (count + 31 < max_count)
255 bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
256 if (bmp != 0xffffffff)
257 return count + count_trailing_zeros (~bmp);
262 if (count == max_count)
264 #if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
267 u32 mask = pow2_mask (max_count - count);
268 u64 bmp = u8x32_msb_mask (u8x32_mask_load_zero (data, mask) == splat);
269 return count + count_trailing_zeros (~bmp);
272 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
273 u8x16 splat = u8x16_splat (first);
274 while (count + 15 < max_count)
277 bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
279 return count + count_trailing_zeros (~bmp);
287 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
288 (data[2] ^ first) | (data[3] ^ first)) == 0)
294 while (count < max_count && (data[0] == first))