1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #include <vppinfra/clib.h>
7 #include <vppinfra/vector_funcs.h>
11 uword used_elts[VLIB_FRAME_SIZE / 64];
15 static_always_inline u32 *
16 extract_unused_elts_x64 (u32 *elts, u16 *indices, u16 index, int n_left,
20 #if defined(CLIB_HAVE_VEC128)
21 mask = clib_compare_u16_x64 (index, indices);
26 clib_memcpy_u32 (dst, elts, 64);
32 mask &= pow2_mask (n_left);
36 #if defined(CLIB_HAVE_VEC512_COMPRESS)
37 u32x16u *ev = (u32x16u *) elts;
38 for (int i = 0; i < 4; i++)
40 int cnt = _popcnt32 ((u16) mask);
41 u32x16_compress_store (ev[i], mask, dst);
46 #elif defined(CLIB_HAVE_VEC256_COMPRESS)
47 u32x8u *ev = (u32x8u *) elts;
48 for (int i = 0; i < 8; i++)
50 int cnt = _popcnt32 ((u8) mask);
51 u32x8_compress_store (ev[i], mask, dst);
55 #elif defined(CLIB_HAVE_VEC256)
58 u16 bit = count_trailing_zeros (mask);
59 mask = clear_lowest_set_bit (mask);
65 u16 bit = count_trailing_zeros (mask);
71 for (int i = 0; i < n_left; i++)
73 if (indices[i] == index)
84 static_always_inline u32
85 extract_unused_elts_by_index (extract_data_t *d, u32 *elts, u16 *indices,
86 u16 index, int n_left, u32 *dst)
89 u64 *bmp = d->used_elts;
92 dst = extract_unused_elts_x64 (elts, indices, index, 64, bmp, dst);
102 dst = extract_unused_elts_x64 (elts, indices, index, n_left, bmp, dst);
107 static_always_inline u32
108 find_first_unused_elt (extract_data_t *d)
110 u64 *ue = d->used_elts + d->uword_offset;
112 while (PREDICT_FALSE (ue[0] == ~0))
118 return d->uword_offset * 64 + count_trailing_zeros (~ue[0]);
121 static_always_inline u32
122 enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, extract_data_t *d,
123 u16 next_index, u32 *buffers, u16 *nexts, u32 n_buffers,
124 u32 n_left, u32 *tmp)
127 u32 n_extracted, n_free;
130 f = vlib_get_next_frame_internal (vm, node, next_index, 0);
132 n_free = VLIB_FRAME_SIZE - f->n_vectors;
134 /* if frame contains enough space for worst case scenario, we can avoid
136 if (n_free >= n_left)
137 to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
141 n_extracted = extract_unused_elts_by_index (d, buffers, nexts, next_index,
146 /* indices already written to frame, just close it */
147 vlib_put_next_frame (vm, node, next_index, n_free - n_extracted);
149 else if (n_free >= n_extracted)
151 /* enough space in the existing frame */
152 to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
153 vlib_buffer_copy_indices (to, tmp, n_extracted);
154 vlib_put_next_frame (vm, node, next_index, n_free - n_extracted);
159 to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
160 vlib_buffer_copy_indices (to, tmp, n_free);
161 vlib_put_next_frame (vm, node, next_index, 0);
164 u32 n_2nd_frame = n_extracted - n_free;
165 f = vlib_get_next_frame_internal (vm, node, next_index, 1);
166 to = vlib_frame_vector_args (f);
167 vlib_buffer_copy_indices (to, tmp + n_free, n_2nd_frame);
168 vlib_put_next_frame (vm, node, next_index,
169 VLIB_FRAME_SIZE - n_2nd_frame);
172 return n_left - n_extracted;
175 void __clib_section (".vlib_buffer_enqueue_to_next_fn")
176 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn)
177 (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts,
180 u32 tmp[VLIB_FRAME_SIZE];
184 while (count >= VLIB_FRAME_SIZE)
186 extract_data_t d = {};
187 n_left = VLIB_FRAME_SIZE;
189 next_index = nexts[0];
190 n_left = enqueue_one (vm, node, &d, next_index, buffers, nexts,
191 VLIB_FRAME_SIZE, n_left, tmp);
195 next_index = nexts[find_first_unused_elt (&d)];
196 n_left = enqueue_one (vm, node, &d, next_index, buffers, nexts,
197 VLIB_FRAME_SIZE, n_left, tmp);
200 buffers += VLIB_FRAME_SIZE;
201 nexts += VLIB_FRAME_SIZE;
202 count -= VLIB_FRAME_SIZE;
207 extract_data_t d = {};
208 next_index = nexts[0];
211 n_left = enqueue_one (vm, node, &d, next_index, buffers, nexts, count,
216 next_index = nexts[find_first_unused_elt (&d)];
217 n_left = enqueue_one (vm, node, &d, next_index, buffers, nexts,
223 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_fn);
225 void __clib_section (".vlib_buffer_enqueue_to_single_next_fn")
226 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn)
227 (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 next_index,
230 u32 *to_next, n_left_to_next, n_enq;
232 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
234 if (PREDICT_TRUE (n_left_to_next >= count))
236 vlib_buffer_copy_indices (to_next, buffers, count);
237 n_left_to_next -= count;
238 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
242 n_enq = n_left_to_next;
244 vlib_buffer_copy_indices (to_next, buffers, n_enq);
245 n_left_to_next -= n_enq;
247 if (PREDICT_FALSE (count > n_enq))
252 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
253 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
254 n_enq = clib_min (n_left_to_next, count);
257 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
259 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_fn);
261 u32 __clib_section (".vlib_buffer_enqueue_to_thread_fn")
262 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
263 (vlib_main_t *vm, u32 frame_queue_index, u32 *buffer_indices,
264 u16 *thread_indices, u32 n_packets, int drop_on_congestion)
266 vlib_thread_main_t *tm = vlib_get_thread_main ();
267 vlib_frame_queue_main_t *fqm;
268 vlib_frame_queue_per_thread_data_t *ptd;
269 u32 n_left = n_packets;
270 u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
271 vlib_frame_queue_elt_t *hf = 0;
272 u32 n_left_to_next_thread = 0, *to_next_thread = 0;
273 u32 next_thread_index, current_thread_index = ~0;
276 fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
277 ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
281 next_thread_index = thread_indices[0];
283 if (next_thread_index != current_thread_index)
285 if (drop_on_congestion &&
286 is_vlib_frame_queue_congested (
287 frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
288 ptd->congested_handoff_queue_by_thread_index))
290 dbi[0] = buffer_indices[0];
297 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
299 hf = vlib_get_worker_handoff_queue_elt (
300 frame_queue_index, next_thread_index,
301 ptd->handoff_queue_elt_by_thread_index);
303 n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
304 to_next_thread = &hf->buffer_index[hf->n_vectors];
305 current_thread_index = next_thread_index;
308 to_next_thread[0] = buffer_indices[0];
310 n_left_to_next_thread--;
312 if (n_left_to_next_thread == 0)
314 hf->n_vectors = VLIB_FRAME_SIZE;
315 vlib_put_frame_queue_elt (hf);
316 vlib_get_main_by_index (current_thread_index)->check_frame_queues =
318 current_thread_index = ~0;
319 ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
331 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
333 /* Ship frames to the thread nodes */
334 for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
336 if (ptd->handoff_queue_elt_by_thread_index[i])
338 hf = ptd->handoff_queue_elt_by_thread_index[i];
340 * It works better to let the handoff node
341 * rate-adapt, always ship the handoff queue element.
343 if (1 || hf->n_vectors == hf->last_n_vectors)
345 vlib_put_frame_queue_elt (hf);
346 vlib_get_main_by_index (i)->check_frame_queues = 1;
347 ptd->handoff_queue_elt_by_thread_index[i] = 0;
350 hf->last_n_vectors = hf->n_vectors;
352 ptd->congested_handoff_queue_by_thread_index[i] =
353 (vlib_frame_queue_t *) (~0);
356 if (drop_on_congestion && n_drop)
357 vlib_buffer_free (vm, drop_list, n_drop);
359 return n_packets - n_drop;
362 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
364 #ifndef CLIB_MARCH_VARIANT
365 vlib_buffer_func_main_t vlib_buffer_func_main;
367 static clib_error_t *
368 vlib_buffer_funcs_init (vlib_main_t *vm)
370 vlib_buffer_func_main_t *bfm = &vlib_buffer_func_main;
371 bfm->buffer_enqueue_to_next_fn =
372 CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_fn);
373 bfm->buffer_enqueue_to_single_next_fn =
374 CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
375 bfm->buffer_enqueue_to_thread_fn =
376 CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
380 VLIB_INIT_FUNCTION (vlib_buffer_funcs_init);