X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvppinfra%2Fstring.h;h=38d3baba9dae20d5765d1539d6dc9069653f207c;hb=993c86f339fca3e382e65fc82627381255aaacec;hp=758a541814d7669d7af5ec25ca38f69572012529;hpb=56f54af21d18f9fdd471b81db77a3942b0aa4d9c;p=vpp.git diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h index 758a541814d..38d3baba9da 100644 --- a/src/vppinfra/string.h +++ b/src/vppinfra/string.h @@ -47,7 +47,9 @@ #include /* for CLIB_LINUX_KERNEL */ #include #include +#ifdef __SSE4_2__ #include +#endif #ifdef CLIB_LINUX_KERNEL #include @@ -77,7 +79,7 @@ clib_memcpy_fast (void *restrict dst, const void *restrict src, size_t n) "behaviour"); #if defined(__COVERITY__) return memcpy (dst, src, n); -#elif defined(__x86_64__) +#elif defined(__SSE4_2__) clib_memcpy_x86_64 (dst, src, n); return dst; #else @@ -85,6 +87,25 @@ clib_memcpy_fast (void *restrict dst, const void *restrict src, size_t n) #endif } +static_always_inline void * +clib_memmove (void *dst, const void *src, size_t n) +{ + u8 *d = (u8 *) dst; + u8 *s = (u8 *) src; + + if (s == d) + return d; + + if (d > s) + for (uword i = n - 1; (i + 1) > 0; i--) + d[i] = s[i]; + else + for (uword i = 0; i < n; i++) + d[i] = s[i]; + + return d; +} + #include /* c-11 string manipulation variants */ @@ -232,14 +253,14 @@ clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len) d0 = u8x32_load_unaligned (dst); d1 = u8x32_load_unaligned (dst + 32); - d0 = u8x32_blend (d0, s0, u8x32_is_greater (lv, mask)); + d0 = u8x32_blend (d0, s0, lv > mask); u8x32_store_unaligned (d0, dst); if (max_len <= 32) return; mask += add; - d1 = u8x32_blend (d1, s1, u8x32_is_greater (lv, mask)); + d1 = u8x32_blend (d1, s1, lv > mask); u8x32_store_unaligned (d1, dst + 32); #elif defined (CLIB_HAVE_VEC128) @@ -257,25 +278,25 @@ clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len) d2 = u8x16_load_unaligned (dst + 32); d3 = u8x16_load_unaligned (dst + 48); - d0 = u8x16_blend (d0, s0, u8x16_is_greater (lv, mask)); + d0 = u8x16_blend (d0, s0, lv > mask); u8x16_store_unaligned (d0, dst); if (max_len <= 16) return; mask += add; - d1 = u8x16_blend (d1, s1, u8x16_is_greater (lv, mask)); + d1 = u8x16_blend (d1, s1, lv > mask); u8x16_store_unaligned (d1, dst + 16); if (max_len <= 32) return; mask += add; - d2 = u8x16_blend (d2, s2, u8x16_is_greater (lv, mask)); + d2 = u8x16_blend (d2, s2, lv > mask); u8x16_store_unaligned (d2, dst + 32); mask += add; - d3 = u8x16_blend (d3, s3, u8x16_is_greater (lv, mask)); + d3 = u8x16_blend (d3, s3, lv > mask); u8x16_store_unaligned (d3, dst + 48); #else memmove (dst, src, len); @@ -320,9 +341,17 @@ clib_memset_u64 (void *p, u64 val, uword count) if (count == 0) return; #else +#if defined(CLIB_HAVE_VEC128) + u64x2 v = u64x2_splat (val); +#endif while (count >= 4) { +#if defined(CLIB_HAVE_VEC128) + u64x2_store_unaligned (v, ptr); + u64x2_store_unaligned (v, ptr + 2); +#else ptr[0] = ptr[1] = ptr[2] = ptr[3] = val; +#endif ptr += 4; count -= 4; }