2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * buffer_node.h: VLIB buffer handling node helper macros/inlines
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #ifndef included_vlib_buffer_node_h
41 #define included_vlib_buffer_node_h
44 vlib buffer/node functions
47 /** \brief Finish enqueueing two buffers forward in the graph.
48 Standard dual loop boilerplate element. This is a MACRO,
49 with MULTIPLE SIDE EFFECTS. In the ideal case,
50 <code>next_index == next0 == next1</code>,
51 which means that the speculative enqueue at the top of the dual loop
52 has correctly dealt with both packets. In that case, the macro does
55 @param vm vlib_main_t pointer, varies by thread
56 @param node current node vlib_node_runtime_t pointer
57 @param next_index speculated next index used for both packets
58 @param to_next speculated vector pointer used for both packets
59 @param n_left_to_next number of slots left in speculated vector
60 @param bi0 first buffer index
61 @param bi1 second buffer index
62 @param next0 actual next index to be used for the first packet
63 @param next1 actual next index to be used for the second packet
65 @return @c next_index -- speculative next index to be used for future packets
66 @return @c to_next -- speculative frame to be used for future packets
67 @return @c n_left_to_next -- number of slots left in speculative frame
70 #define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
74 int enqueue_code = (next0 != next_index) + 2*(next1 != next_index); \
76 if (PREDICT_FALSE (enqueue_code != 0)) \
78 switch (enqueue_code) \
84 n_left_to_next += 1; \
85 vlib_set_next_frame_buffer (vm, node, next0, bi0); \
91 n_left_to_next += 1; \
92 vlib_set_next_frame_buffer (vm, node, next1, bi1); \
96 /* A B B or A B C */ \
98 n_left_to_next += 2; \
99 vlib_set_next_frame_buffer (vm, node, next0, bi0); \
100 vlib_set_next_frame_buffer (vm, node, next1, bi1); \
101 if (next0 == next1) \
103 vlib_put_next_frame (vm, node, next_index, \
105 next_index = next1; \
106 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
113 /** \brief Finish enqueueing four buffers forward in the graph.
114 Standard quad loop boilerplate element. This is a MACRO,
115 with MULTIPLE SIDE EFFECTS. In the ideal case,
116 <code>next_index == next0 == next1 == next2 == next3</code>,
117 which means that the speculative enqueue at the top of the quad loop
118 has correctly dealt with all four packets. In that case, the macro does
121 @param vm vlib_main_t pointer, varies by thread
122 @param node current node vlib_node_runtime_t pointer
123 @param next_index speculated next index used for both packets
124 @param to_next speculated vector pointer used for both packets
125 @param n_left_to_next number of slots left in speculated vector
126 @param bi0 first buffer index
127 @param bi1 second buffer index
128 @param bi2 third buffer index
129 @param bi3 fourth buffer index
130 @param next0 actual next index to be used for the first packet
131 @param next1 actual next index to be used for the second packet
132 @param next2 actual next index to be used for the third packet
133 @param next3 actual next index to be used for the fourth packet
135 @return @c next_index -- speculative next index to be used for future packets
136 @return @c to_next -- speculative frame to be used for future packets
137 @return @c n_left_to_next -- number of slots left in speculative frame
140 #define vlib_validate_buffer_enqueue_x4(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,bi2,bi3,next0,next1,next2,next3) \
146 /* After the fact: check the [speculative] enqueue to "next" */ \
147 u32 fix_speculation = (next_index ^ next0) | (next_index ^ next1) \
148 | (next_index ^ next2) | (next_index ^ next3); \
149 if (PREDICT_FALSE(fix_speculation)) \
153 n_left_to_next += 4; \
155 /* If bi0 belongs to "next", send it there */ \
156 if (next_index == next0) \
162 else /* send it where it needs to go */ \
163 vlib_set_next_frame_buffer (vm, node, next0, bi0); \
165 if (next_index == next1) \
172 vlib_set_next_frame_buffer (vm, node, next1, bi1); \
174 if (next_index == next2) \
181 vlib_set_next_frame_buffer (vm, node, next2, bi2); \
183 if (next_index == next3) \
191 vlib_set_next_frame_buffer (vm, node, next3, bi3); \
193 /* Change speculation: last 2 packets went to the same node*/ \
194 if (next2 == next3) \
196 vlib_put_next_frame (vm, node, next_index, n_left_to_next); \
197 next_index = next3; \
198 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
204 /** \brief Finish enqueueing one buffer forward in the graph.
205 Standard single loop boilerplate element. This is a MACRO,
206 with MULTIPLE SIDE EFFECTS. In the ideal case,
207 <code>next_index == next0</code>,
208 which means that the speculative enqueue at the top of the single loop
209 has correctly dealt with the packet in hand. In that case, the macro does
212 @param vm vlib_main_t pointer, varies by thread
213 @param node current node vlib_node_runtime_t pointer
214 @param next_index speculated next index used for both packets
215 @param to_next speculated vector pointer used for both packets
216 @param n_left_to_next number of slots left in speculated vector
217 @param bi0 first buffer index
218 @param next0 actual next index to be used for the first packet
220 @return @c next_index -- speculative next index to be used for future packets
221 @return @c to_next -- speculative frame to be used for future packets
222 @return @c n_left_to_next -- number of slots left in speculative frame
224 #define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
227 if (PREDICT_FALSE (next0 != next_index)) \
229 vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); \
230 next_index = next0; \
231 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
235 n_left_to_next -= 1; \
240 generic_buffer_node_inline (vlib_main_t * vm,
241 vlib_node_runtime_t * node,
242 vlib_frame_t * frame,
246 void (*two_buffers) (vlib_main_t * vm,
251 u32 * next0, u32 * next1),
252 void (*one_buffer) (vlib_main_t * vm,
253 void *opaque1, uword opaque2,
257 u32 n_left_from, *from, *to_next;
260 from = vlib_frame_vector_args (frame);
261 n_left_from = frame->n_vectors;
262 next_index = node->cached_next_index;
264 if (node->flags & VLIB_NODE_FLAG_TRACE)
265 vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
266 /* stride */ 1, sizeof_trace);
268 while (n_left_from > 0)
272 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
274 while (n_left_from >= 4 && n_left_to_next >= 2)
276 vlib_buffer_t *p0, *p1;
280 /* Prefetch next iteration. */
282 vlib_buffer_t *p2, *p3;
284 p2 = vlib_get_buffer (vm, from[2]);
285 p3 = vlib_get_buffer (vm, from[3]);
287 vlib_prefetch_buffer_header (p2, LOAD);
288 vlib_prefetch_buffer_header (p3, LOAD);
290 CLIB_PREFETCH (p2->data, 64, LOAD);
291 CLIB_PREFETCH (p3->data, 64, LOAD);
294 pi0 = to_next[0] = from[0];
295 pi1 = to_next[1] = from[1];
301 p0 = vlib_get_buffer (vm, pi0);
302 p1 = vlib_get_buffer (vm, pi1);
304 two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
306 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
307 to_next, n_left_to_next,
308 pi0, pi1, next0, next1);
311 while (n_left_from > 0 && n_left_to_next > 0)
323 p0 = vlib_get_buffer (vm, pi0);
325 one_buffer (vm, opaque1, opaque2, p0, &next0);
327 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
328 to_next, n_left_to_next,
332 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
335 return frame->n_vectors;
338 static_always_inline void
339 vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
340 u32 * buffers, u16 * nexts, uword count)
342 u32 *to_next, n_left_to_next, max;
345 next_index = nexts[0];
346 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
347 max = clib_min (n_left_to_next, count);
352 if ((nexts[0] != next_index) || n_left_to_next == 0)
354 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
355 next_index = nexts[0];
356 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
357 max = clib_min (n_left_to_next, count);
359 #if defined(CLIB_HAVE_VEC512)
360 u16x32 next32 = CLIB_MEM_OVERFLOW_LOAD (u16x32_load_unaligned, nexts);
361 next32 = (next32 == u16x32_splat (next32[0]));
362 u64 bitmap = u16x32_msb_mask (next32);
363 n_enqueued = count_trailing_zeros (~bitmap);
364 #elif defined(CLIB_HAVE_VEC256)
365 u16x16 next16 = CLIB_MEM_OVERFLOW_LOAD (u16x16_load_unaligned, nexts);
366 next16 = (next16 == u16x16_splat (next16[0]));
367 u64 bitmap = u8x32_msb_mask ((u8x32) next16);
368 n_enqueued = count_trailing_zeros (~bitmap) / 2;
369 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
370 u16x8 next8 = CLIB_MEM_OVERFLOW_LOAD (u16x8_load_unaligned, nexts);
371 next8 = (next8 == u16x8_splat (next8[0]));
372 u64 bitmap = u8x16_msb_mask ((u8x16) next8);
373 n_enqueued = count_trailing_zeros (~bitmap) / 2;
378 x |= next_index ^ nexts[1];
379 x |= next_index ^ nexts[2];
380 x |= next_index ^ nexts[3];
381 n_enqueued = (x == 0) ? 4 : 1;
387 if (PREDICT_FALSE (n_enqueued > max))
390 #ifdef CLIB_HAVE_VEC512
391 if (n_enqueued >= 32)
393 vlib_buffer_copy_indices (to_next, buffers, 32);
397 n_left_to_next -= 32;
404 #ifdef CLIB_HAVE_VEC256
405 if (n_enqueued >= 16)
407 vlib_buffer_copy_indices (to_next, buffers, 16);
411 n_left_to_next -= 16;
418 #ifdef CLIB_HAVE_VEC128
421 vlib_buffer_copy_indices (to_next, buffers, 8);
434 vlib_buffer_copy_indices (to_next, buffers, 4);
445 to_next[0] = buffers[0];
455 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
458 static_always_inline void
459 vlib_buffer_enqueue_to_single_next (vlib_main_t * vm,
460 vlib_node_runtime_t * node, u32 * buffers,
461 u16 next_index, u32 count)
463 u32 *to_next, n_left_to_next, n_enq;
465 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
467 if (PREDICT_TRUE (n_left_to_next >= count))
469 vlib_buffer_copy_indices (to_next, buffers, count);
470 n_left_to_next -= count;
471 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
475 n_enq = n_left_to_next;
477 vlib_buffer_copy_indices (to_next, buffers, n_enq);
478 n_left_to_next -= n_enq;
480 if (PREDICT_FALSE (count > n_enq))
485 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
486 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
487 n_enq = clib_min (n_left_to_next, count);
490 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
493 static_always_inline u32
494 vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
495 u32 * buffer_indices, u16 * thread_indices,
496 u32 n_packets, int drop_on_congestion)
498 vlib_thread_main_t *tm = vlib_get_thread_main ();
499 vlib_frame_queue_main_t *fqm;
500 vlib_frame_queue_per_thread_data_t *ptd;
501 u32 n_left = n_packets;
502 u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
503 vlib_frame_queue_elt_t *hf = 0;
504 u32 n_left_to_next_thread = 0, *to_next_thread = 0;
505 u32 next_thread_index, current_thread_index = ~0;
508 fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
509 ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
513 next_thread_index = thread_indices[0];
515 if (next_thread_index != current_thread_index)
517 if (drop_on_congestion &&
518 is_vlib_frame_queue_congested
519 (frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
520 ptd->congested_handoff_queue_by_thread_index))
522 dbi[0] = buffer_indices[0];
529 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
531 hf = vlib_get_worker_handoff_queue_elt (frame_queue_index,
533 ptd->handoff_queue_elt_by_thread_index);
535 n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
536 to_next_thread = &hf->buffer_index[hf->n_vectors];
537 current_thread_index = next_thread_index;
540 to_next_thread[0] = buffer_indices[0];
542 n_left_to_next_thread--;
544 if (n_left_to_next_thread == 0)
546 hf->n_vectors = VLIB_FRAME_SIZE;
547 vlib_put_frame_queue_elt (hf);
548 vlib_get_main_by_index (current_thread_index)->check_frame_queues =
550 current_thread_index = ~0;
551 ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
563 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
565 /* Ship frames to the thread nodes */
566 for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
568 if (ptd->handoff_queue_elt_by_thread_index[i])
570 hf = ptd->handoff_queue_elt_by_thread_index[i];
572 * It works better to let the handoff node
573 * rate-adapt, always ship the handoff queue element.
575 if (1 || hf->n_vectors == hf->last_n_vectors)
577 vlib_put_frame_queue_elt (hf);
578 vlib_get_main_by_index (i)->check_frame_queues = 1;
579 ptd->handoff_queue_elt_by_thread_index[i] = 0;
582 hf->last_n_vectors = hf->n_vectors;
584 ptd->congested_handoff_queue_by_thread_index[i] =
585 (vlib_frame_queue_t *) (~0);
588 if (drop_on_congestion && n_drop)
589 vlib_buffer_free (vm, drop_list, n_drop);
591 return n_packets - n_drop;
594 #endif /* included_vlib_buffer_node_h */
597 * fd.io coding-style-patch-verification: ON
600 * eval: (c-set-style "gnu")