#include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
#include <vppinfra/vector.h>
#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/memcpy_x86_64.h>
#ifdef CLIB_LINUX_KERNEL
#include <linux/string.h>
/* Exchanges source and destination. */
void clib_memswap (void *_a, void *_b, uword bytes);
-/*
- * the vector unit memcpy variants confuse coverity
- * so don't let it anywhere near them.
- */
-#ifndef __COVERITY__
-#if __AVX512BITALG__
-#include <vppinfra/memcpy_avx512.h>
-#define clib_memcpy_fast_arch(a, b, c) clib_memcpy_fast_avx512 (a, b, c)
-#elif __AVX2__
-#include <vppinfra/memcpy_avx2.h>
-#define clib_memcpy_fast_arch(a, b, c) clib_memcpy_fast_avx2 (a, b, c)
-#elif __SSSE3__
-#include <vppinfra/memcpy_sse3.h>
-#define clib_memcpy_fast_arch(a, b, c) clib_memcpy_fast_sse3 (a, b, c)
-#endif /* __AVX512BITALG__ */
-#endif /* __COVERITY__ */
-
-#ifndef clib_memcpy_fast_arch
-#define clib_memcpy_fast_arch(a, b, c) memcpy (a, b, c)
-#endif /* clib_memcpy_fast_arch */
static_always_inline void *
clib_memcpy_fast (void *restrict dst, const void *restrict src, size_t n)
ASSERT (dst && src &&
"memcpy(src, dst, n) with src == NULL or dst == NULL is undefined "
"behaviour");
- return clib_memcpy_fast_arch (dst, src, n);
+#if defined(__COVERITY__)
+ return memcpy (dst, src, n);
+#elif defined(__x86_64__)
+ clib_memcpy_x86_64 (dst, src, n);
+ return dst;
+#else
+ return memcpy (dst, src, n);
+#endif
}
-#undef clib_memcpy_fast_arch
-
#include <vppinfra/memcpy.h>
/* c-11 string manipulation variants */
ptr++[0] = val;
}
-static_always_inline uword
-clib_count_equal_u64 (u64 * data, uword max_count)
-{
- uword count;
- u64 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u64x4 splat = u64x4_splat (first);
- while (count + 3 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 8;
- return count;
- }
-
- data += 4;
- count += 4;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u32 (u32 * data, uword max_count)
-{
- uword count;
- u32 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u32x8 splat = u32x8_splat (first);
- while (count + 7 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 4;
- return count;
- }
-
- data += 8;
- count += 8;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u32x4 splat = u32x4_splat (first);
- while (count + 3 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp) / 4;
- return count;
- }
-
- data += 4;
- count += 4;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u16 (u16 * data, uword max_count)
-{
- uword count;
- u16 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u16x16 splat = u16x16_splat (first);
- while (count + 15 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 2;
- return count;
- }
-
- data += 16;
- count += 16;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u16x8 splat = u16x8_splat (first);
- while (count + 7 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp) / 2;
- return count;
- }
-
- data += 8;
- count += 8;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u8 (u8 * data, uword max_count)
-{
- uword count;
- u8 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u8x32 splat = u8x32_splat (first);
- while (count + 31 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- return max_count;
-
- data += 32;
- count += 32;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u8x16 splat = u8x16_splat (first);
- while (count + 15 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp);
- return count;
- }
-
- data += 16;
- count += 16;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
/*
* This macro is to provide smooth mapping from memcmp to memcmp_s.