X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvppinfra%2Fstring.h;h=38d3baba9dae20d5765d1539d6dc9069653f207c;hb=993c86f339fca3e382e65fc82627381255aaacec;hp=758a541814d7669d7af5ec25ca38f69572012529;hpb=56f54af21d18f9fdd471b81db77a3942b0aa4d9c;p=vpp.git

diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h
index 758a541814d..38d3baba9da 100644
--- a/src/vppinfra/string.h
+++ b/src/vppinfra/string.h
@@ -47,7 +47,9 @@
 #include <vppinfra/clib.h>	/* for CLIB_LINUX_KERNEL */
 #include <vppinfra/vector.h>
 #include <vppinfra/error_bootstrap.h>
+#ifdef __SSE4_2__
 #include <vppinfra/memcpy_x86_64.h>
+#endif
 
 #ifdef CLIB_LINUX_KERNEL
 #include <linux/string.h>
@@ -77,7 +79,7 @@ clib_memcpy_fast (void *restrict dst, const void *restrict src, size_t n)
 	  "behaviour");
 #if defined(__COVERITY__)
   return memcpy (dst, src, n);
-#elif defined(__x86_64__)
+#elif defined(__SSE4_2__)
   clib_memcpy_x86_64 (dst, src, n);
   return dst;
 #else
@@ -85,6 +87,25 @@ clib_memcpy_fast (void *restrict dst, const void *restrict src, size_t n)
 #endif
 }
 
+static_always_inline void *
+clib_memmove (void *dst, const void *src, size_t n)
+{
+  u8 *d = (u8 *) dst;
+  u8 *s = (u8 *) src;
+
+  if (s == d)
+    return d;
+
+  if (d > s)
+    for (uword i = n - 1; (i + 1) > 0; i--)
+      d[i] = s[i];
+  else
+    for (uword i = 0; i < n; i++)
+      d[i] = s[i];
+
+  return d;
+}
+
 #include <vppinfra/memcpy.h>
 
 /* c-11 string manipulation variants */
@@ -232,14 +253,14 @@ clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len)
   d0 = u8x32_load_unaligned (dst);
   d1 = u8x32_load_unaligned (dst + 32);
 
-  d0 = u8x32_blend (d0, s0, u8x32_is_greater (lv, mask));
+  d0 = u8x32_blend (d0, s0, lv > mask);
   u8x32_store_unaligned (d0, dst);
 
   if (max_len <= 32)
     return;
 
   mask += add;
-  d1 = u8x32_blend (d1, s1, u8x32_is_greater (lv, mask));
+  d1 = u8x32_blend (d1, s1, lv > mask);
   u8x32_store_unaligned (d1, dst + 32);
 
 #elif defined (CLIB_HAVE_VEC128)
@@ -257,25 +278,25 @@ clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len)
   d2 = u8x16_load_unaligned (dst + 32);
   d3 = u8x16_load_unaligned (dst + 48);
 
-  d0 = u8x16_blend (d0, s0, u8x16_is_greater (lv, mask));
+  d0 = u8x16_blend (d0, s0, lv > mask);
   u8x16_store_unaligned (d0, dst);
 
   if (max_len <= 16)
     return;
 
   mask += add;
-  d1 = u8x16_blend (d1, s1, u8x16_is_greater (lv, mask));
+  d1 = u8x16_blend (d1, s1, lv > mask);
   u8x16_store_unaligned (d1, dst + 16);
 
   if (max_len <= 32)
     return;
 
   mask += add;
-  d2 = u8x16_blend (d2, s2, u8x16_is_greater (lv, mask));
+  d2 = u8x16_blend (d2, s2, lv > mask);
   u8x16_store_unaligned (d2, dst + 32);
 
   mask += add;
-  d3 = u8x16_blend (d3, s3, u8x16_is_greater (lv, mask));
+  d3 = u8x16_blend (d3, s3, lv > mask);
   u8x16_store_unaligned (d3, dst + 48);
 #else
   memmove (dst, src, len);
@@ -320,9 +341,17 @@ clib_memset_u64 (void *p, u64 val, uword count)
   if (count == 0)
     return;
 #else
+#if defined(CLIB_HAVE_VEC128)
+  u64x2 v = u64x2_splat (val);
+#endif
   while (count >= 4)
     {
+#if defined(CLIB_HAVE_VEC128)
+      u64x2_store_unaligned (v, ptr);
+      u64x2_store_unaligned (v, ptr + 2);
+#else
       ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
+#endif
       ptr += 4;
       count -= 4;
     }