WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+/** \file
+
+ Optimized string handling code, including c11-compliant
+ "safe C library" variants.
+*/
+
#ifndef included_clib_string_h
#define included_clib_string_h
#elif __SSSE3__
#include <vppinfra/memcpy_sse3.h>
#else
-#define clib_memcpy(a,b,c) memcpy(a,b,c)
+#define clib_memcpy_fast(a,b,c) memcpy(a,b,c)
#endif
#else /* __COVERITY__ */
-#define clib_memcpy(a,b,c) memcpy(a,b,c)
+#define clib_memcpy_fast(a,b,c) memcpy(a,b,c)
+#endif
+
+/* c-11 string manipulation variants */
+
+#ifndef EOK
+#define EOK 0
#endif
+#ifndef EINVAL
+#define EINVAL 22
+#endif
+
+typedef int errno_t;
+typedef uword rsize_t;
+
+void clib_c11_violation (const char *s);
+errno_t memcpy_s (void *__restrict__ dest, rsize_t dmax,
+ const void *__restrict__ src, rsize_t n);
+
+always_inline errno_t
+memcpy_s_inline (void *__restrict__ dest, rsize_t dmax,
+ const void *__restrict__ src, rsize_t n)
+{
+ uword low, hi;
+ u8 bad;
+
+ /*
+ * Optimize constant-number-of-bytes calls without asking
+ * "too many questions for someone from New Jersey"
+ */
+ if (__builtin_constant_p (n))
+ {
+ clib_memcpy_fast (dest, src, n);
+ return EOK;
+ }
+
+ /*
+ * call bogus if: src or dst NULL, trying to copy
+ * more data than we have space in dst, or src == dst.
+ * n == 0 isn't really "bad", so check first in the
+ * "wall-of-shame" department...
+ */
+ bad = (dest == 0) + (src == 0) + (n > dmax) + (dest == src) + (n == 0);
+ if (PREDICT_FALSE (bad != 0))
+ {
+ /* Not actually trying to copy anything is OK */
+ if (n == 0)
+ return EOK;
+ if (dest == NULL)
+ clib_c11_violation ("dest NULL");
+ if (src == NULL)
+ clib_c11_violation ("src NULL");
+ if (n > dmax)
+ clib_c11_violation ("n > dmax");
+ if (dest == src)
+ clib_c11_violation ("dest == src");
+ return EINVAL;
+ }
+
+ /* Check for src/dst overlap, which is not allowed */
+ low = (uword) (src < dest ? src : dest);
+ hi = (uword) (src < dest ? dest : src);
+
+ if (PREDICT_FALSE (low + (n - 1) >= hi))
+ {
+ clib_c11_violation ("src/dest overlap");
+ return EINVAL;
+ }
+
+ clib_memcpy_fast (dest, src, n);
+ return EOK;
+}
+
+/*
+ * Note: $$$ This macro is a crutch. Folks need to manually
+ * inspect every extant clib_memcpy(...) call and
+ * attempt to provide a real destination buffer size
+ * argument...
+ */
+#define clib_memcpy(d,s,n) memcpy_s_inline(d,n,s,n)
+
+errno_t memset_s (void *s, rsize_t smax, int c, rsize_t n);
+
+always_inline errno_t
+memset_s_inline (void *s, rsize_t smax, int c, rsize_t n)
+{
+ u8 bad;
+
+ bad = (s == 0) + (n > smax);
+
+ if (PREDICT_FALSE (bad != 0))
+ {
+ if (s == 0)
+ clib_c11_violation ("s NULL");
+ if (n > smax)
+ clib_c11_violation ("n > smax");
+ return (EINVAL);
+ }
+ memset (s, c, n);
+ return (EOK);
+}
+
+/*
+ * This macro is not [so much of] a crutch.
+ * It's super-typical to write:
+ *
+ * ep = pool_get (<pool>);
+ * clib_memset(ep, 0, sizeof (*ep));
+ *
+ * The compiler should delete the not-so useful
+ * (n > smax) test. TBH the NULL pointer check isn't
+ * so useful in this case, but so be it.
+ */
+#define clib_memset(s,c,n) memset_s_inline(s,n,c,n)
/*
* Copy 64 bytes of data to 4 destinations
_mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3);
#else
- clib_memcpy (d0, s, 64);
- clib_memcpy (d1, s, 64);
- clib_memcpy (d2, s, 64);
- clib_memcpy (d3, s, 64);
+ clib_memcpy_fast (d0, s, 64);
+ clib_memcpy_fast (d1, s, 64);
+ clib_memcpy_fast (d2, s, 64);
+ clib_memcpy_fast (d3, s, 64);
#endif
}
static_always_inline uword
clib_count_equal_u64 (u64 * data, uword max_count)
{
- uword count = 0;
- u64 first = data[0];
+ uword count;
+ u64 first;
+
+ if (max_count == 1)
+ return 1;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
-#if defined(CLIB_HAVE_VEC512)
- while (u64x8_is_all_equal (u64x8_load_unaligned (data), first))
- {
- data += 8;
- count += 8;
- if (count >= max_count)
- return max_count;
- }
-#endif
#if defined(CLIB_HAVE_VEC256)
- while (u64x4_is_all_equal (u64x4_load_unaligned (data), first))
+ u64x4 splat = u64x4_splat (first);
+ while (1)
{
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 8;
+ return clib_min (count, max_count);
+ }
+
data += 4;
count += 4;
+
if (count >= max_count)
return max_count;
}
#endif
-#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
- while (u64x2_is_all_equal (u64x2_load_unaligned (data), first))
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count &&
+ ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
{
- data += 2;
- count += 2;
- if (count >= max_count)
- return max_count;
+ data += 4;
+ count += 4;
}
-#endif
while (count < max_count && (data[0] == first))
{
data += 1;
static_always_inline uword
clib_count_equal_u32 (u32 * data, uword max_count)
{
- uword count = 0;
- u32 first = data[0];
+ uword count;
+ u32 first;
+
+ if (max_count == 1)
+ return 1;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
-#if defined(CLIB_HAVE_VEC512)
- while (u32x16_is_all_equal (u32x16_load_unaligned (data), first))
- {
- data += 16;
- count += 16;
- if (count >= max_count)
- return max_count;
- }
-#endif
#if defined(CLIB_HAVE_VEC256)
- while (u32x8_is_all_equal (u32x8_load_unaligned (data), first))
+ u32x8 splat = u32x8_splat (first);
+ while (1)
{
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 4;
+ return clib_min (count, max_count);
+ }
+
data += 8;
count += 8;
+
if (count >= max_count)
return max_count;
}
-#endif
-#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
- while (u32x4_is_all_equal (u32x4_load_unaligned (data), first))
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u32x4 splat = u32x4_splat (first);
+ while (1)
{
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp) / 4;
+ return clib_min (count, max_count);
+ }
+
data += 4;
count += 4;
+
if (count >= max_count)
return max_count;
}
#endif
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count &&
+ ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
while (count < max_count && (data[0] == first))
{
data += 1;
static_always_inline uword
clib_count_equal_u16 (u16 * data, uword max_count)
{
- uword count = 0;
- u16 first = data[0];
+ uword count;
+ u16 first;
+
+ if (max_count == 1)
+ return 1;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
-#if defined(CLIB_HAVE_VEC512)
- while (count + 32 <= max_count &&
- u16x32_is_all_equal (u16x32_load_unaligned (data), first))
- {
- data += 32;
- count += 32;
- }
-#endif
#if defined(CLIB_HAVE_VEC256)
- while (count + 16 <= max_count &&
- u16x16_is_all_equal (u16x16_load_unaligned (data), first))
+ u16x16 splat = u16x16_splat (first);
+ while (1)
{
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return clib_min (count, max_count);
+ }
+
data += 16;
count += 16;
+
+ if (count >= max_count)
+ return max_count;
}
-#endif
-#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
- while (count + 8 <= max_count &&
- u16x8_is_all_equal (u16x8_load_unaligned (data), first))
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u16x8 splat = u16x8_splat (first);
+ while (1)
{
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return clib_min (count, max_count);
+ }
+
data += 8;
count += 8;
+
+ if (count >= max_count)
+ return max_count;
}
#endif
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count &&
+ ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
while (count < max_count && (data[0] == first))
{
data += 1;
return count;
}
-static_always_inline u32
-clib_count_equal_u8 (u32 * data, uword max_count)
+static_always_inline uword
+clib_count_equal_u8 (u8 * data, uword max_count)
{
- uword count = 0;
- u8 first = data[0];
+ uword count;
+ u8 first;
+
+ if (max_count == 1)
+ return 1;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
-#if defined(CLIB_HAVE_VEC512)
- while (count + 64 <= max_count &&
- u8x64_is_all_equal (u8x64_load_unaligned (data), first))
- {
- data += 64;
- count += 64;
- }
-#endif
#if defined(CLIB_HAVE_VEC256)
- while (count + 32 <= max_count &&
- u8x32_is_all_equal (u8x32_load_unaligned (data), first))
+ u8x32 splat = u8x32_splat (first);
+ while (1)
{
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp);
+ return clib_min (count, max_count);
+ }
+
data += 32;
count += 32;
+
+ if (count >= max_count)
+ return max_count;
+ }
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u8x16 splat = u8x16_splat (first);
+ while (1)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp);
+ return clib_min (count, max_count);
+ }
+
+ data += 16;
+ count += 16;
+
+ if (count >= max_count)
+ return max_count;
}
#endif
-#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
- while (count + 16 <= max_count &&
- u8x16_is_all_equal (u8x16_load_unaligned (data), first))
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count &&
+ ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
{
data += 4;
count += 4;
}
-#endif
while (count < max_count && (data[0] == first))
{
data += 1;
return count;
}
-
#endif /* included_clib_string_h */
/*