1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #ifndef included_vector_index_to_ptr_h
6 #define included_vector_index_to_ptr_h
7 #include <vppinfra/clib.h>
9 #ifdef CLIB_HAVE_VEC128
10 static_always_inline void
11 clib_index_to_ptr_u32x4 (u32 *indices, void **ptrs, i32 i, u64x2 ov, u8 shift)
13 u32x4 iv4 = u32x4_load_unaligned (indices + i);
15 pv2 = u64x2_from_u32x4 (iv4);
16 u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i);
18 pv2 = u64x2_from_u32x4_high (iv4);
20 pv2 = u64x2_from_u32x4 ((u32x4) u8x16_word_shift_right (iv4, 8));
22 u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i + 2);
26 /** \brief Convert array of indices to pointers with base and shift
28 @param indices source array of u32 indices
29 @param base base pointer
30 @param shift numbers of bits to be shifted
31 @param ptrs destinatin array of pointers
32 @param n_elts number of elements in the source array
35 static_always_inline void
36 clib_index_to_ptr_u32 (u32 *indices, void *base, u8 shift, void **ptrs,
39 #if defined CLIB_HAVE_VEC512
42 u64x8 off = u64x8_splat ((u64) base);
43 u64x8 b0, b1, b2, b3, b4, b5, b6, b7;
47 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
48 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
49 b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
50 b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
51 b4 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 32));
52 b5 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 40));
53 b6 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 48));
54 b7 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 56));
55 u64x8_store_unaligned ((b0 << shift) + off, ptrs);
56 u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
57 u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
58 u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
59 u64x8_store_unaligned ((b4 << shift) + off, ptrs + 32);
60 u64x8_store_unaligned ((b5 << shift) + off, ptrs + 40);
61 u64x8_store_unaligned ((b6 << shift) + off, ptrs + 48);
62 u64x8_store_unaligned ((b7 << shift) + off, ptrs + 56);
73 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
74 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
75 b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
76 b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
77 u64x8_store_unaligned ((b0 << shift) + off, ptrs);
78 u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
79 u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
80 u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
87 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
88 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
89 u64x8_store_unaligned ((b0 << shift) + off, ptrs);
90 u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
97 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
98 u64x8_store_unaligned ((b0 << shift) + off, ptrs);
107 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + n_elts - 8));
108 u64x8_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 8);
112 u32 mask = pow2_mask (n_elts);
113 u64x8 r = u64x8_from_u32x8 (u32x8_mask_load_zero (indices, mask));
114 u64x8_mask_store ((r << shift) + u64x8_splat ((u64) base), ptrs, mask);
117 #elif defined CLIB_HAVE_VEC256
120 u64x4 off = u64x4_splat ((u64) base);
121 u64x4 b0, b1, b2, b3, b4, b5, b6, b7;
125 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
126 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
127 b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
128 b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
129 b4 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 16));
130 b5 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 20));
131 b6 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 24));
132 b7 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 28));
133 u64x4_store_unaligned ((b0 << shift) + off, ptrs);
134 u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
135 u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
136 u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
137 u64x4_store_unaligned ((b4 << shift) + off, ptrs + 16);
138 u64x4_store_unaligned ((b5 << shift) + off, ptrs + 20);
139 u64x4_store_unaligned ((b6 << shift) + off, ptrs + 24);
140 u64x4_store_unaligned ((b7 << shift) + off, ptrs + 28);
151 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
152 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
153 b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
154 b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
155 u64x4_store_unaligned ((b0 << shift) + off, ptrs);
156 u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
157 u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
158 u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
165 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
166 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
167 u64x4_store_unaligned ((b0 << shift) + off, ptrs);
168 u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
175 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
176 u64x4_store_unaligned ((b0 << shift) + off, ptrs);
182 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + n_elts - 4));
183 u64x4_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 4);
186 #ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
189 u32 mask = pow2_mask (n_elts);
190 u64x4 r = u64x4_from_u32x4 (u32x4_mask_load_zero (indices, mask));
191 u64x4_mask_store ((r << shift) + u64x4_splat ((u64) base), ptrs, mask);
195 #elif defined(CLIB_HAVE_VEC128)
198 u64x2 ov = u64x2_splat ((u64) base);
199 u32 *i = (u32 *) indices;
200 void **p = (void **) ptrs;
205 clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
206 clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
207 clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
208 clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
209 clib_index_to_ptr_u32x4 (indices, ptrs, 16, ov, shift);
210 clib_index_to_ptr_u32x4 (indices, ptrs, 20, ov, shift);
211 clib_index_to_ptr_u32x4 (indices, ptrs, 24, ov, shift);
212 clib_index_to_ptr_u32x4 (indices, ptrs, 28, ov, shift);
223 clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
224 clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
225 clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
226 clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
234 clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
235 clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
242 clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
244 clib_index_to_ptr_u32x4 (i, p, n_elts - 4, ov, shift);
250 ptrs[0] = base + ((u64) indices[0] << shift);