2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 Optimized string handling code, including c11-compliant
41 "safe C library" variants.
44 #ifndef included_clib_string_h
45 #define included_clib_string_h
47 #include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
48 #include <vppinfra/vector.h>
50 #ifdef CLIB_LINUX_KERNEL
51 #include <linux/string.h>
58 #ifdef CLIB_STANDALONE
59 #include <vppinfra/standalone_string.h>
63 #include <x86intrin.h>
66 /* Exchanges source and destination. */
67 void clib_memswap (void *_a, void *_b, uword bytes);
70 * the vector unit memcpy variants confuse coverity
71 * so don't let it anywhere near them.
75 #include <vppinfra/memcpy_avx512.h>
77 #include <vppinfra/memcpy_avx2.h>
79 #include <vppinfra/memcpy_sse3.h>
81 #define _clib_memcpy(a,b,c) memcpy(a,b,c)
83 #else /* __COVERITY__ */
84 #define _clib_memcpy(a,b,c) memcpy(a,b,c)
87 /* c-11 string manipulation variants */
97 typedef uword rsize_t;
99 void clib_c11_violation (const char *s);
100 errno_t memcpy_s (void *__restrict__ dest, rsize_t dmax,
101 const void *__restrict__ src, rsize_t n);
103 always_inline errno_t
104 memcpy_s_inline (void *__restrict__ dest, rsize_t dmax,
105 const void *__restrict__ src, rsize_t n)
111 * call bogus if: src or dst NULL, trying to copy
112 * more data than we have space in dst, or src == dst.
113 * n == 0 isn't really "bad", so check first in the
114 * "wall-of-shame" department...
116 bad = (dest == 0) + (src == 0) + (n > dmax) + (dest == src) + (n == 0);
117 if (PREDICT_FALSE (bad != 0))
119 /* Not actually trying to copy anything is OK */
123 clib_c11_violation ("dest NULL");
125 clib_c11_violation ("src NULL");
127 clib_c11_violation ("n > dmax");
129 clib_c11_violation ("dest == src");
133 /* Check for src/dst overlap, which is not allowed */
134 low = (uword) (src < dest ? src : dest);
135 hi = (uword) (src < dest ? dest : src);
137 if (PREDICT_FALSE (low + (n - 1) >= hi))
139 clib_c11_violation ("src/dest overlap");
143 _clib_memcpy (dest, src, n);
148 * Note: $$$ This macro is a crutch. Folks need to manually
149 * inspect every extant clib_memcpy(...) call and
150 * attempt to provide a real destination buffer size
153 #define clib_memcpy(d,s,n) memcpy_s_inline(d,n,s,n)
155 errno_t memset_s (void *s, rsize_t smax, int c, rsize_t n);
157 always_inline errno_t
158 memset_s_inline (void *s, rsize_t smax, int c, rsize_t n)
162 bad = (s == 0) + (n > smax);
164 if (PREDICT_FALSE (bad != 0))
167 clib_c11_violation ("s NULL");
169 clib_c11_violation ("n > smax");
177 * This macro is not [so much of] a crutch.
178 * It's super-typical to write:
180 * ep = pool_get (<pool>);
181 * clib_memset(ep, 0, sizeof (*ep));
183 * The compiler should delete the not-so useful
184 * (n > smax) test. TBH the NULL pointer check isn't
185 * so useful in this case, but so be it.
187 #define clib_memset(s,c,n) memset_s_inline(s,n,c,n)
190 * Copy 64 bytes of data to 4 destinations
191 * this function is typically used in quad-loop case when whole cacheline
192 * needs to be copied to 4 different places. First it reads whole cacheline
193 * to 1/2/4 SIMD registers and then it writes data to 4 destinations.
196 static_always_inline void
197 clib_memcpy64_x4 (void *d0, void *d1, void *d2, void *d3, void *s)
199 #if defined (__AVX512F__)
200 __m512i r0 = _mm512_loadu_si512 (s);
202 _mm512_storeu_si512 (d0, r0);
203 _mm512_storeu_si512 (d1, r0);
204 _mm512_storeu_si512 (d2, r0);
205 _mm512_storeu_si512 (d3, r0);
207 #elif defined (__AVX2__)
208 __m256i r0 = _mm256_loadu_si256 ((__m256i *) (s + 0 * 32));
209 __m256i r1 = _mm256_loadu_si256 ((__m256i *) (s + 1 * 32));
211 _mm256_storeu_si256 ((__m256i *) (d0 + 0 * 32), r0);
212 _mm256_storeu_si256 ((__m256i *) (d0 + 1 * 32), r1);
214 _mm256_storeu_si256 ((__m256i *) (d1 + 0 * 32), r0);
215 _mm256_storeu_si256 ((__m256i *) (d1 + 1 * 32), r1);
217 _mm256_storeu_si256 ((__m256i *) (d2 + 0 * 32), r0);
218 _mm256_storeu_si256 ((__m256i *) (d2 + 1 * 32), r1);
220 _mm256_storeu_si256 ((__m256i *) (d3 + 0 * 32), r0);
221 _mm256_storeu_si256 ((__m256i *) (d3 + 1 * 32), r1);
223 #elif defined (__SSSE3__)
224 __m128i r0 = _mm_loadu_si128 ((__m128i *) (s + 0 * 16));
225 __m128i r1 = _mm_loadu_si128 ((__m128i *) (s + 1 * 16));
226 __m128i r2 = _mm_loadu_si128 ((__m128i *) (s + 2 * 16));
227 __m128i r3 = _mm_loadu_si128 ((__m128i *) (s + 3 * 16));
229 _mm_storeu_si128 ((__m128i *) (d0 + 0 * 16), r0);
230 _mm_storeu_si128 ((__m128i *) (d0 + 1 * 16), r1);
231 _mm_storeu_si128 ((__m128i *) (d0 + 2 * 16), r2);
232 _mm_storeu_si128 ((__m128i *) (d0 + 3 * 16), r3);
234 _mm_storeu_si128 ((__m128i *) (d1 + 0 * 16), r0);
235 _mm_storeu_si128 ((__m128i *) (d1 + 1 * 16), r1);
236 _mm_storeu_si128 ((__m128i *) (d1 + 2 * 16), r2);
237 _mm_storeu_si128 ((__m128i *) (d1 + 3 * 16), r3);
239 _mm_storeu_si128 ((__m128i *) (d2 + 0 * 16), r0);
240 _mm_storeu_si128 ((__m128i *) (d2 + 1 * 16), r1);
241 _mm_storeu_si128 ((__m128i *) (d2 + 2 * 16), r2);
242 _mm_storeu_si128 ((__m128i *) (d2 + 3 * 16), r3);
244 _mm_storeu_si128 ((__m128i *) (d3 + 0 * 16), r0);
245 _mm_storeu_si128 ((__m128i *) (d3 + 1 * 16), r1);
246 _mm_storeu_si128 ((__m128i *) (d3 + 2 * 16), r2);
247 _mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3);
250 clib_memcpy (d0, s, 64);
251 clib_memcpy (d1, s, 64);
252 clib_memcpy (d2, s, 64);
253 clib_memcpy (d3, s, 64);
257 static_always_inline void
258 clib_memset_u64 (void *p, u64 val, uword count)
261 #if defined(CLIB_HAVE_VEC512)
262 u64x8 v512 = u64x8_splat (val);
265 u64x8_store_unaligned (v512, ptr);
272 #if defined(CLIB_HAVE_VEC256)
273 u64x4 v256 = u64x4_splat (val);
276 u64x4_store_unaligned (v256, ptr);
285 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
294 static_always_inline void
295 clib_memset_u32 (void *p, u32 val, uword count)
298 #if defined(CLIB_HAVE_VEC512)
299 u32x16 v512 = u32x16_splat (val);
302 u32x16_store_unaligned (v512, ptr);
309 #if defined(CLIB_HAVE_VEC256)
310 u32x8 v256 = u32x8_splat (val);
313 u32x8_store_unaligned (v256, ptr);
320 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
321 u32x4 v128 = u32x4_splat (val);
324 u32x4_store_unaligned (v128, ptr);
331 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
340 static_always_inline void
341 clib_memset_u16 (void *p, u16 val, uword count)
344 #if defined(CLIB_HAVE_VEC512)
345 u16x32 v512 = u16x32_splat (val);
348 u16x32_store_unaligned (v512, ptr);
355 #if defined(CLIB_HAVE_VEC256)
356 u16x16 v256 = u16x16_splat (val);
359 u16x16_store_unaligned (v256, ptr);
366 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
367 u16x8 v128 = u16x8_splat (val);
370 u16x8_store_unaligned (v128, ptr);
377 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
386 static_always_inline void
387 clib_memset_u8 (void *p, u8 val, uword count)
390 #if defined(CLIB_HAVE_VEC512)
391 u8x64 v512 = u8x64_splat (val);
394 u8x64_store_unaligned (v512, ptr);
401 #if defined(CLIB_HAVE_VEC256)
402 u8x32 v256 = u8x32_splat (val);
405 u8x32_store_unaligned (v256, ptr);
412 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
413 u8x16 v128 = u8x16_splat (val);
416 u8x16_store_unaligned (v128, ptr);
423 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
432 static_always_inline uword
433 clib_count_equal_u64 (u64 * data, uword max_count)
440 if (data[0] != data[1])
446 #if defined(CLIB_HAVE_VEC256)
447 u64x4 splat = u64x4_splat (first);
451 bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
452 if (bmp != 0xffffffff)
454 count += count_trailing_zeros (~bmp) / 8;
455 return clib_min (count, max_count);
461 if (count >= max_count)
467 while (count + 3 < max_count &&
468 ((data[0] ^ first) | (data[1] ^ first) |
469 (data[2] ^ first) | (data[3] ^ first)) == 0)
474 while (count < max_count && (data[0] == first))
482 static_always_inline uword
483 clib_count_equal_u32 (u32 * data, uword max_count)
490 if (data[0] != data[1])
496 #if defined(CLIB_HAVE_VEC256)
497 u32x8 splat = u32x8_splat (first);
501 bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
502 if (bmp != 0xffffffff)
504 count += count_trailing_zeros (~bmp) / 4;
505 return clib_min (count, max_count);
511 if (count >= max_count)
514 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
515 u32x4 splat = u32x4_splat (first);
519 bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
522 count += count_trailing_zeros (~bmp) / 4;
523 return clib_min (count, max_count);
529 if (count >= max_count)
535 while (count + 3 < max_count &&
536 ((data[0] ^ first) | (data[1] ^ first) |
537 (data[2] ^ first) | (data[3] ^ first)) == 0)
542 while (count < max_count && (data[0] == first))
550 static_always_inline uword
551 clib_count_equal_u16 (u16 * data, uword max_count)
558 if (data[0] != data[1])
564 #if defined(CLIB_HAVE_VEC256)
565 u16x16 splat = u16x16_splat (first);
569 bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
570 if (bmp != 0xffffffff)
572 count += count_trailing_zeros (~bmp) / 2;
573 return clib_min (count, max_count);
579 if (count >= max_count)
582 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
583 u16x8 splat = u16x8_splat (first);
587 bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
590 count += count_trailing_zeros (~bmp) / 2;
591 return clib_min (count, max_count);
597 if (count >= max_count)
603 while (count + 3 < max_count &&
604 ((data[0] ^ first) | (data[1] ^ first) |
605 (data[2] ^ first) | (data[3] ^ first)) == 0)
610 while (count < max_count && (data[0] == first))
618 static_always_inline uword
619 clib_count_equal_u8 (u8 * data, uword max_count)
626 if (data[0] != data[1])
632 #if defined(CLIB_HAVE_VEC256)
633 u8x32 splat = u8x32_splat (first);
637 bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
638 if (bmp != 0xffffffff)
640 count += count_trailing_zeros (~bmp);
641 return clib_min (count, max_count);
647 if (count >= max_count)
650 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
651 u8x16 splat = u8x16_splat (first);
655 bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
658 count += count_trailing_zeros (~bmp);
659 return clib_min (count, max_count);
665 if (count >= max_count)
671 while (count + 3 < max_count &&
672 ((data[0] ^ first) | (data[1] ^ first) |
673 (data[2] ^ first) | (data[3] ^ first)) == 0)
678 while (count < max_count && (data[0] == first))
686 #endif /* included_clib_string_h */
689 * fd.io coding-style-patch-verification: ON
692 * eval: (c-set-style "gnu")