X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;ds=sidebyside;f=src%2Fvppinfra%2Fstring.h;h=4f96450ce9eb6d7870235e55b90be0e2e8dabc10;hb=7aa76d4319436fe57995b15cfb6e9beb84017160;hp=4755a9868d652cd80e9450fb0f1ad079b7957445;hpb=c59b9a26ed9a6bc083db2868b6993add6fd2ba5b;p=vpp.git diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h index 4755a9868d6..4f96450ce9e 100644 --- a/src/vppinfra/string.h +++ b/src/vppinfra/string.h @@ -71,7 +71,7 @@ void clib_memswap (void *_a, void *_b, uword bytes); * so don't let it anywhere near them. */ #ifndef __COVERITY__ -#if __AVX512F__ +#if __AVX512BITALG__ #include #elif __AVX2__ #include @@ -216,64 +216,66 @@ memset_s_inline (void *s, rsize_t smax, int c, rsize_t n) static_always_inline void clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len) { -#if defined (CLIB_HxAVE_VEC256) - u8x32 s, d; +#if defined (CLIB_HAVE_VEC256) + u8x32 s0, s1, d0, d1; u8x32 mask = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }; u8x32 lv = u8x32_splat (len); u8x32 add = u8x32_splat (32); - s = u8x32_load_unaligned (src); - d = u8x32_load_unaligned (dst); - d = u8x32_blend (d, s, u8x32_is_greater (lv, mask)); - u8x32_store_unaligned (d, dst); + s0 = u8x32_load_unaligned (src); + s1 = u8x32_load_unaligned (src + 32); + d0 = u8x32_load_unaligned (dst); + d1 = u8x32_load_unaligned (dst + 32); + + d0 = u8x32_blend (d0, s0, u8x32_is_greater (lv, mask)); + u8x32_store_unaligned (d0, dst); if (max_len <= 32) return; mask += add; - s = u8x32_load_unaligned (src + 32); - d = u8x32_load_unaligned (dst + 32); - d = u8x32_blend (d, s, u8x32_is_greater (lv, mask)); - u8x32_store_unaligned (d, dst + 32); + d1 = u8x32_blend (d1, s1, u8x32_is_greater (lv, mask)); + u8x32_store_unaligned (d1, dst + 32); -#elif defined (CLIB_HAVE_VEC128) && !defined (__aarch64__) - u8x16 s, d; +#elif defined (CLIB_HAVE_VEC128) + u8x16 s0, s1, s2, s3, d0, d1, d2, d3; u8x16 mask = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; u8x16 lv = u8x16_splat (len); u8x16 add = u8x16_splat (16); - s = u8x16_load_unaligned (src); - d = u8x16_load_unaligned (dst); - d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); - u8x16_store_unaligned (d, dst); + s0 = u8x16_load_unaligned (src); + s1 = u8x16_load_unaligned (src + 16); + s2 = u8x16_load_unaligned (src + 32); + s3 = u8x16_load_unaligned (src + 48); + d0 = u8x16_load_unaligned (dst); + d1 = u8x16_load_unaligned (dst + 16); + d2 = u8x16_load_unaligned (dst + 32); + d3 = u8x16_load_unaligned (dst + 48); + + d0 = u8x16_blend (d0, s0, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d0, dst); if (max_len <= 16) return; mask += add; - s = u8x16_load_unaligned (src + 16); - d = u8x16_load_unaligned (dst + 16); - d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); - u8x16_store_unaligned (d, dst + 16); + d1 = u8x16_blend (d1, s1, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d1, dst + 16); if (max_len <= 32) return; mask += add; - s = u8x16_load_unaligned (src + 32); - d = u8x16_load_unaligned (dst + 32); - d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); - u8x16_store_unaligned (d, dst + 32); + d2 = u8x16_blend (d2, s2, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d2, dst + 32); mask += add; - s = u8x16_load_unaligned (src + 48); - d = u8x16_load_unaligned (dst + 48); - d = u8x16_blend (d, s, u8x16_is_greater (lv, mask)); - u8x16_store_unaligned (d, dst + 48); + d3 = u8x16_blend (d3, s3, u8x16_is_greater (lv, mask)); + u8x16_store_unaligned (d3, dst + 48); #else - clib_memcpy_fast (dst, src, len); + memmove (dst, src, len); #endif } @@ -470,8 +472,8 @@ clib_count_equal_u64 (u64 * data, uword max_count) uword count; u64 first; - if (max_count == 1) - return 1; + if (max_count <= 1) + return max_count; if (data[0] != data[1]) return 1; @@ -480,23 +482,20 @@ clib_count_equal_u64 (u64 * data, uword max_count) #if defined(CLIB_HAVE_VEC256) u64x4 splat = u64x4_splat (first); - while (1) + while (count + 3 < max_count) { u64 bmp; bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat)); if (bmp != 0xffffffff) { count += count_trailing_zeros (~bmp) / 8; - return clib_min (count, max_count); + return count; } data += 4; count += 4; - - if (count >= max_count) - return max_count; } -#endif +#else count += 2; data += 2; while (count + 3 < max_count && @@ -506,6 +505,7 @@ clib_count_equal_u64 (u64 * data, uword max_count) data += 4; count += 4; } +#endif while (count < max_count && (data[0] == first)) { data += 1; @@ -520,8 +520,8 @@ clib_count_equal_u32 (u32 * data, uword max_count) uword count; u32 first; - if (max_count == 1) - return 1; + if (max_count <= 1) + return max_count; if (data[0] != data[1]) return 1; @@ -530,41 +530,35 @@ clib_count_equal_u32 (u32 * data, uword max_count) #if defined(CLIB_HAVE_VEC256) u32x8 splat = u32x8_splat (first); - while (1) + while (count + 7 < max_count) { u64 bmp; bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat)); if (bmp != 0xffffffff) { count += count_trailing_zeros (~bmp) / 4; - return clib_min (count, max_count); + return count; } data += 8; count += 8; - - if (count >= max_count) - return max_count; } #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) u32x4 splat = u32x4_splat (first); - while (1) + while (count + 3 < max_count) { u64 bmp; bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat)); if (bmp != 0xffff) { count += count_trailing_zeros (~bmp) / 4; - return clib_min (count, max_count); + return count; } data += 4; count += 4; - - if (count >= max_count) - return max_count; } -#endif +#else count += 2; data += 2; while (count + 3 < max_count && @@ -574,6 +568,7 @@ clib_count_equal_u32 (u32 * data, uword max_count) data += 4; count += 4; } +#endif while (count < max_count && (data[0] == first)) { data += 1; @@ -588,8 +583,8 @@ clib_count_equal_u16 (u16 * data, uword max_count) uword count; u16 first; - if (max_count == 1) - return 1; + if (max_count <= 1) + return max_count; if (data[0] != data[1]) return 1; @@ -598,41 +593,35 @@ clib_count_equal_u16 (u16 * data, uword max_count) #if defined(CLIB_HAVE_VEC256) u16x16 splat = u16x16_splat (first); - while (1) + while (count + 15 < max_count) { u64 bmp; bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat)); if (bmp != 0xffffffff) { count += count_trailing_zeros (~bmp) / 2; - return clib_min (count, max_count); + return count; } data += 16; count += 16; - - if (count >= max_count) - return max_count; } #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) u16x8 splat = u16x8_splat (first); - while (1) + while (count + 7 < max_count) { u64 bmp; bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat)); if (bmp != 0xffff) { count += count_trailing_zeros (~bmp) / 2; - return clib_min (count, max_count); + return count; } data += 8; count += 8; - - if (count >= max_count) - return max_count; } -#endif +#else count += 2; data += 2; while (count + 3 < max_count && @@ -642,6 +631,7 @@ clib_count_equal_u16 (u16 * data, uword max_count) data += 4; count += 4; } +#endif while (count < max_count && (data[0] == first)) { data += 1; @@ -656,8 +646,8 @@ clib_count_equal_u8 (u8 * data, uword max_count) uword count; u8 first; - if (max_count == 1) - return 1; + if (max_count <= 1) + return max_count; if (data[0] != data[1]) return 1; @@ -666,41 +656,35 @@ clib_count_equal_u8 (u8 * data, uword max_count) #if defined(CLIB_HAVE_VEC256) u8x32 splat = u8x32_splat (first); - while (1) + while (count + 31 < max_count) { u64 bmp; bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat)); if (bmp != 0xffffffff) { count += count_trailing_zeros (~bmp); - return clib_min (count, max_count); + return max_count; } data += 32; count += 32; - - if (count >= max_count) - return max_count; } #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) u8x16 splat = u8x16_splat (first); - while (1) + while (count + 15 < max_count) { u64 bmp; bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat)); if (bmp != 0xffff) { count += count_trailing_zeros (~bmp); - return clib_min (count, max_count); + return count; } data += 16; count += 16; - - if (count >= max_count) - return max_count; } -#endif +#else count += 2; data += 2; while (count + 3 < max_count && @@ -710,6 +694,7 @@ clib_count_equal_u8 (u8 * data, uword max_count) data += 4; count += 4; } +#endif while (count < max_count && (data[0] == first)) { data += 1;