1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #ifndef included_vector_index_to_ptr_h
6 #define included_vector_index_to_ptr_h
7 #include <vppinfra/clib.h>
9 #ifdef CLIB_HAVE_VEC128
10 static_always_inline void
11 clib_index_to_ptr_u32x4 (u32 *indices, void **ptrs, i32 i, u64x2 ov, u8 shift)
13 u32x4 iv4 = u32x4_load_unaligned (indices + i);
15 pv2 = u64x2_from_u32x4 (iv4);
16 u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i);
18 pv2 = u64x2_from_u32x4_high (iv4);
20 pv2 = u64x2_from_u32x4 ((u32x4) u8x16_word_shift_right (iv4, 8));
22 u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i + 2);
26 /** \brief Convert array of indices to pointers with base and shift
28 @param indices source array of u32 indices
29 @param base base pointer
30 @param shift numbers of bits to be shifted
31 @param ptrs destinatin array of pointers
32 @param n_elts number of elements in the source array
35 static_always_inline void
36 clib_index_to_ptr_u32 (u32 *indices, void *base, u8 shift, void **ptrs,
39 #if defined CLIB_HAVE_VEC512
42 u64x8 off = u64x8_splat ((u64) base);
43 u64x8 b0, b1, b2, b3, b4, b5, b6, b7;
47 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
48 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
49 b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
50 b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
51 b4 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 32));
52 b5 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 40));
53 b6 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 48));
54 b7 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 56));
55 u64x8_store_unaligned ((b0 << shift) + off, ptrs);
56 u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
57 u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
58 u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
59 u64x8_store_unaligned ((b4 << shift) + off, ptrs + 32);
60 u64x8_store_unaligned ((b5 << shift) + off, ptrs + 40);
61 u64x8_store_unaligned ((b6 << shift) + off, ptrs + 48);
62 u64x8_store_unaligned ((b7 << shift) + off, ptrs + 56);
73 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
74 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
75 b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
76 b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
77 u64x8_store_unaligned ((b0 << shift) + off, ptrs);
78 u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
79 u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
80 u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
87 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
88 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
89 u64x8_store_unaligned ((b0 << shift) + off, ptrs);
90 u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
97 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
98 u64x8_store_unaligned ((b0 << shift) + off, ptrs);
104 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + n_elts - 8));
105 u64x8_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 8);
109 u32 mask = pow2_mask (n_elts);
110 u64x8 r = u64x8_from_u32x8 (u32x8_mask_load_zero (indices, mask));
111 u64x8_mask_store ((r << shift) + u64x8_splat ((u64) base), ptrs, mask);
114 #elif defined CLIB_HAVE_VEC256
117 u64x4 off = u64x4_splat ((u64) base);
118 u64x4 b0, b1, b2, b3, b4, b5, b6, b7;
122 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
123 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
124 b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
125 b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
126 b4 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 16));
127 b5 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 20));
128 b6 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 24));
129 b7 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 28));
130 u64x4_store_unaligned ((b0 << shift) + off, ptrs);
131 u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
132 u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
133 u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
134 u64x4_store_unaligned ((b4 << shift) + off, ptrs + 16);
135 u64x4_store_unaligned ((b5 << shift) + off, ptrs + 20);
136 u64x4_store_unaligned ((b6 << shift) + off, ptrs + 24);
137 u64x4_store_unaligned ((b7 << shift) + off, ptrs + 28);
148 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
149 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
150 b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
151 b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
152 u64x4_store_unaligned ((b0 << shift) + off, ptrs);
153 u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
154 u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
155 u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
162 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
163 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
164 u64x4_store_unaligned ((b0 << shift) + off, ptrs);
165 u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
172 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
173 u64x4_store_unaligned ((b0 << shift) + off, ptrs);
179 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + n_elts - 4));
180 u64x4_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 4);
183 #ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
186 u32 mask = pow2_mask (n_elts);
187 u64x4 r = u64x4_from_u32x4 (u32x4_mask_load_zero (indices, mask));
188 u64x4_mask_store ((r << shift) + u64x4_splat ((u64) base), ptrs, mask);
192 #elif defined(CLIB_HAVE_VEC128)
195 u64x2 ov = u64x2_splat ((u64) base);
196 u32 *i = (u32 *) indices;
197 void **p = (void **) ptrs;
202 clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
203 clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
204 clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
205 clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
206 clib_index_to_ptr_u32x4 (indices, ptrs, 16, ov, shift);
207 clib_index_to_ptr_u32x4 (indices, ptrs, 20, ov, shift);
208 clib_index_to_ptr_u32x4 (indices, ptrs, 24, ov, shift);
209 clib_index_to_ptr_u32x4 (indices, ptrs, 28, ov, shift);
220 clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
221 clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
222 clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
223 clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
231 clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
232 clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
239 clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
241 clib_index_to_ptr_u32x4 (i, p, n_elts - 4, ov, shift);
247 ptrs[0] = base + ((u64) indices[0] << shift);