1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
5 #include <vppinfra/clib.h>
7 #include <vppinfra/vector_funcs.h>
11 uword used_elts[VLIB_FRAME_SIZE / 64];
15 static_always_inline u32 *
16 extract_unused_elts_x64 (u32 *elts, u16 *indices, u16 index, int n_left,
20 #if defined(CLIB_HAVE_VEC128)
21 mask = clib_compare_u16_x64 (index, indices);
26 clib_memcpy_u32 (dst, elts, 64);
32 mask &= pow2_mask (n_left);
36 #if defined(CLIB_HAVE_VEC512_COMPRESS)
37 u32x16u *ev = (u32x16u *) elts;
38 for (int i = 0; i < 4; i++)
40 int cnt = _popcnt32 ((u16) mask);
41 u32x16_compress_store (ev[i], mask, dst);
46 #elif defined(CLIB_HAVE_VEC256_COMPRESS)
47 u32x8u *ev = (u32x8u *) elts;
48 for (int i = 0; i < 8; i++)
50 int cnt = _popcnt32 ((u8) mask);
51 u32x8_compress_store (ev[i], mask, dst);
55 #elif defined(CLIB_HAVE_VEC256)
58 u16 bit = count_trailing_zeros (mask);
59 mask = clear_lowest_set_bit (mask);
65 u16 bit = count_trailing_zeros (mask);
71 for (int i = 0; i < n_left; i++)
73 if (indices[i] == index)
84 static_always_inline u32
85 extract_unused_elts_by_index (extract_data_t *d, u32 *elts, u16 *indices,
86 u16 index, int n_left, u32 *dst)
89 u64 *bmp = d->used_elts;
92 dst = extract_unused_elts_x64 (elts, indices, index, 64, bmp, dst);
102 dst = extract_unused_elts_x64 (elts, indices, index, n_left, bmp, dst);
107 static_always_inline u32
108 find_first_unused_elt (extract_data_t *d)
110 u64 *ue = d->used_elts + d->uword_offset;
112 while (PREDICT_FALSE (ue[0] == ~0))
118 return d->uword_offset * 64 + count_trailing_zeros (~ue[0]);
121 static_always_inline u32
122 enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, extract_data_t *d,
123 u16 next_index, u32 *buffers, u16 *nexts, u32 n_buffers,
124 u32 n_left, u32 *tmp)
127 u32 n_extracted, n_free;
130 f = vlib_get_next_frame_internal (vm, node, next_index, 0);
132 n_free = VLIB_FRAME_SIZE - f->n_vectors;
134 /* if frame contains enough space for worst case scenario, we can avoid
136 if (n_free >= n_left)
137 to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
141 n_extracted = extract_unused_elts_by_index (d, buffers, nexts, next_index,
146 /* indices already written to frame, just close it */
147 vlib_put_next_frame (vm, node, next_index, n_free - n_extracted);
149 else if (n_free >= n_extracted)
151 /* enough space in the existing frame */
152 to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
153 vlib_buffer_copy_indices (to, tmp, n_extracted);
154 vlib_put_next_frame (vm, node, next_index, n_free - n_extracted);
159 to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
160 vlib_buffer_copy_indices (to, tmp, n_free);
161 vlib_put_next_frame (vm, node, next_index, 0);
164 u32 n_2nd_frame = n_extracted - n_free;
165 f = vlib_get_next_frame_internal (vm, node, next_index, 1);
166 to = vlib_frame_vector_args (f);
167 vlib_buffer_copy_indices (to, tmp + n_free, n_2nd_frame);
168 vlib_put_next_frame (vm, node, next_index,
169 VLIB_FRAME_SIZE - n_2nd_frame);
172 return n_left - n_extracted;
175 void __clib_section (".vlib_buffer_enqueue_to_next_fn")
176 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn)
177 (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts,
180 u32 tmp[VLIB_FRAME_SIZE];
184 while (count >= VLIB_FRAME_SIZE)
186 extract_data_t d = {};
187 n_left = VLIB_FRAME_SIZE;
189 next_index = nexts[0];
190 n_left = enqueue_one (vm, node, &d, next_index, buffers, nexts,
191 VLIB_FRAME_SIZE, n_left, tmp);
195 next_index = nexts[find_first_unused_elt (&d)];
196 n_left = enqueue_one (vm, node, &d, next_index, buffers, nexts,
197 VLIB_FRAME_SIZE, n_left, tmp);
200 buffers += VLIB_FRAME_SIZE;
201 nexts += VLIB_FRAME_SIZE;
202 count -= VLIB_FRAME_SIZE;
207 extract_data_t d = {};
208 next_index = nexts[0];
211 n_left = enqueue_one (vm, node, &d, next_index, buffers, nexts, count,
216 next_index = nexts[find_first_unused_elt (&d)];
217 n_left = enqueue_one (vm, node, &d, next_index, buffers, nexts,
223 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_fn);
225 void __clib_section (".vlib_buffer_enqueue_to_single_next_fn")
226 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn)
227 (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 next_index,
230 u32 *to_next, n_left_to_next, n_enq;
232 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
234 if (PREDICT_TRUE (n_left_to_next >= count))
236 vlib_buffer_copy_indices (to_next, buffers, count);
237 n_left_to_next -= count;
238 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
242 n_enq = n_left_to_next;
244 vlib_buffer_copy_indices (to_next, buffers, n_enq);
245 n_left_to_next -= n_enq;
247 if (PREDICT_FALSE (count > n_enq))
252 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
253 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
254 n_enq = clib_min (n_left_to_next, count);
257 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
259 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_fn);
261 u32 __clib_section (".vlib_buffer_enqueue_to_thread_fn")
262 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
263 (vlib_main_t *vm, u32 frame_queue_index, u32 *buffer_indices,
264 u16 *thread_indices, u32 n_packets, int drop_on_congestion)
266 vlib_thread_main_t *tm = vlib_get_thread_main ();
267 vlib_frame_queue_main_t *fqm;
268 vlib_frame_queue_per_thread_data_t *ptd;
269 u32 n_left = n_packets;
270 u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
271 vlib_frame_queue_elt_t *hf = 0;
272 u32 n_left_to_next_thread = 0, *to_next_thread = 0;
273 u32 next_thread_index, current_thread_index = ~0;
276 fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
277 ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
281 next_thread_index = thread_indices[0];
283 if (next_thread_index != current_thread_index)
285 if (drop_on_congestion &&
286 is_vlib_frame_queue_congested (
287 frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
288 ptd->congested_handoff_queue_by_thread_index))
290 dbi[0] = buffer_indices[0];
297 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
299 hf = vlib_get_worker_handoff_queue_elt (
300 frame_queue_index, next_thread_index,
301 ptd->handoff_queue_elt_by_thread_index);
303 n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
304 to_next_thread = &hf->buffer_index[hf->n_vectors];
305 current_thread_index = next_thread_index;
308 to_next_thread[0] = buffer_indices[0];
310 n_left_to_next_thread--;
312 if (n_left_to_next_thread == 0)
314 hf->n_vectors = VLIB_FRAME_SIZE;
315 vlib_put_frame_queue_elt (hf);
316 vlib_get_main_by_index (current_thread_index)->check_frame_queues =
318 current_thread_index = ~0;
319 ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
331 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
333 /* Ship frames to the thread nodes */
334 for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
336 if (ptd->handoff_queue_elt_by_thread_index[i])
338 hf = ptd->handoff_queue_elt_by_thread_index[i];
340 * It works better to let the handoff node
341 * rate-adapt, always ship the handoff queue element.
343 if (1 || hf->n_vectors == hf->last_n_vectors)
345 vlib_put_frame_queue_elt (hf);
346 vlib_get_main_by_index (i)->check_frame_queues = 1;
347 ptd->handoff_queue_elt_by_thread_index[i] = 0;
350 hf->last_n_vectors = hf->n_vectors;
352 ptd->congested_handoff_queue_by_thread_index[i] =
353 (vlib_frame_queue_t *) (~0);
356 if (drop_on_congestion && n_drop)
357 vlib_buffer_free (vm, drop_list, n_drop);
359 return n_packets - n_drop;
362 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
365 * Check the frame queue to see if any frames are available.
366 * If so, pull the packets off the frames and put them to
369 u32 __clib_section (".vlib_frame_queue_dequeue_fn")
370 CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
371 (vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
373 u32 thread_id = vm->thread_index;
374 vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
375 vlib_frame_queue_elt_t *elt;
383 ASSERT (vm == vlib_global_main.vlib_mains[thread_id]);
385 if (PREDICT_FALSE (fqm->node_index == ~0))
388 * Gather trace data for frame queues
390 if (PREDICT_FALSE (fq->trace))
392 frame_queue_trace_t *fqt;
393 frame_queue_nelt_counter_t *fqh;
396 fqt = &fqm->frame_queue_traces[thread_id];
398 fqt->nelts = fq->nelts;
399 fqt->head = fq->head;
400 fqt->head_hint = fq->head_hint;
401 fqt->tail = fq->tail;
402 fqt->threshold = fq->vector_threshold;
403 fqt->n_in_use = fqt->tail - fqt->head;
404 if (fqt->n_in_use >= fqt->nelts)
406 // if beyond max then use max
407 fqt->n_in_use = fqt->nelts - 1;
410 /* Record the number of elements in use in the histogram */
411 fqh = &fqm->frame_queue_histogram[thread_id];
412 fqh->count[fqt->n_in_use]++;
414 /* Record a snapshot of the elements in use */
415 for (elix = 0; elix < fqt->nelts; elix++)
417 elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1));
420 fqt->n_vectors[elix] = elt->n_vectors;
429 if (fq->head == fq->tail)
431 fq->head_hint = fq->head;
435 elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
439 fq->head_hint = fq->head;
443 from = elt->buffer_index;
444 msg_type = elt->msg_type;
446 ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME);
447 ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
449 f = vlib_get_frame_to_node (vm, fqm->node_index);
451 /* If the first vector is traced, set the frame trace flag */
452 b = vlib_get_buffer (vm, from[0]);
453 if (b->flags & VLIB_BUFFER_IS_TRACED)
454 f->frame_flags |= VLIB_NODE_FLAG_TRACE;
456 to = vlib_frame_vector_args (f);
458 vlib_buffer_copy_indices (to, from, elt->n_vectors);
460 vectors += elt->n_vectors;
461 f->n_vectors = elt->n_vectors;
462 vlib_put_frame_to_node (vm, fqm->node_index, f);
466 elt->msg_type = 0xfefefefe;
467 CLIB_MEMORY_BARRIER ();
472 * Limit the number of packets pushed into the graph
474 if (vectors >= fq->vector_threshold)
476 fq->head_hint = fq->head;
483 CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn);
485 #ifndef CLIB_MARCH_VARIANT
486 vlib_buffer_func_main_t vlib_buffer_func_main;
488 static clib_error_t *
489 vlib_buffer_funcs_init (vlib_main_t *vm)
491 vlib_buffer_func_main_t *bfm = &vlib_buffer_func_main;
492 bfm->buffer_enqueue_to_next_fn =
493 CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_fn);
494 bfm->buffer_enqueue_to_single_next_fn =
495 CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
496 bfm->buffer_enqueue_to_thread_fn =
497 CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
498 bfm->frame_queue_dequeue_fn =
499 CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn);
503 VLIB_INIT_FUNCTION (vlib_buffer_funcs_init);