1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #include <vppinfra/clib.h>
6 #ifndef included_memcpy_h
7 #define included_memcpy_h
9 static_always_inline void
10 clib_memcpy_u32_x4 (u32 *dst, u32 *src)
12 #if defined(CLIB_HAVE_VEC128)
13 u32x4_store_unaligned (u32x4_load_unaligned (src), dst);
15 clib_memcpy_fast (dst, src, 4 * sizeof (u32));
18 static_always_inline void
19 clib_memcpy_u32_x8 (u32 *dst, u32 *src)
21 #if defined(CLIB_HAVE_VEC256)
22 u32x8_store_unaligned (u32x8_load_unaligned (src), dst);
24 clib_memcpy_u32_x4 (dst, src);
25 clib_memcpy_u32_x4 (dst + 4, src + 4);
29 static_always_inline void
30 clib_memcpy_u32_x16 (u32 *dst, u32 *src)
32 #if defined(CLIB_HAVE_VEC512)
33 u32x16_store_unaligned (u32x16_load_unaligned (src), dst);
35 clib_memcpy_u32_x8 (dst, src);
36 clib_memcpy_u32_x8 (dst + 8, src + 8);
40 static_always_inline void
41 clib_memcpy_u32 (u32 *dst, u32 *src, u32 n_left)
43 #if defined(CLIB_HAVE_VEC128)
44 if (COMPILE_TIME_CONST (n_left))
46 /* for n_left defined as compile-time constant we should prevent compiler
47 * to use more expensive mask load/store for common cases where smaller
48 * register load/store exists */
52 clib_memcpy_u32_x4 (dst, src);
55 clib_memcpy_u32_x8 (dst, src);
58 clib_memcpy_u32_x8 (dst, src);
59 clib_memcpy_u32_x4 (dst + 8, src + 8);
62 clib_memcpy_u32_x16 (dst, src);
65 clib_memcpy_u32_x16 (dst, src);
66 clib_memcpy_u32_x16 (dst + 16, src + 16);
69 clib_memcpy_u32_x16 (dst, src);
70 clib_memcpy_u32_x16 (dst + 16, src + 16);
71 clib_memcpy_u32_x16 (dst + 32, src + 32);
72 clib_memcpy_u32_x16 (dst + 48, src + 48);
79 #if defined(CLIB_HAVE_VEC512)
82 clib_memcpy_u32_x16 (dst, src);
83 clib_memcpy_u32_x16 (dst + 16, src + 16);
84 clib_memcpy_u32_x16 (dst + 32, src + 32);
85 clib_memcpy_u32_x16 (dst + 48, src + 48);
92 #if defined(CLIB_HAVE_VEC256)
95 clib_memcpy_u32_x16 (dst, src);
96 clib_memcpy_u32_x16 (dst + 16, src + 16);
105 clib_memcpy_u32_x16 (dst, src);
111 #if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
114 u16 mask = pow2_mask (n_left);
115 u32x16_mask_store (u32x16_mask_load_zero (src, mask), dst, mask);
122 clib_memcpy_u32_x8 (dst, src);
128 #if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
131 u8 mask = pow2_mask (n_left);
132 u32x8_mask_store (u32x8_mask_load_zero (src, mask), dst, mask);
139 clib_memcpy_u32_x4 (dst, src);