2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * buffer_node.h: VLIB buffer handling node helper macros/inlines
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #ifndef included_vlib_buffer_node_h
41 #define included_vlib_buffer_node_h
44 vlib buffer/node functions
47 /** \brief Finish enqueueing two buffers forward in the graph.
48 Standard dual loop boilerplate element. This is a MACRO,
49 with MULTIPLE SIDE EFFECTS. In the ideal case,
50 <code>next_index == next0 == next1</code>,
51 which means that the speculative enqueue at the top of the dual loop
52 has correctly dealt with both packets. In that case, the macro does
55 @param vm vlib_main_t pointer, varies by thread
56 @param node current node vlib_node_runtime_t pointer
57 @param next_index speculated next index used for both packets
58 @param to_next speculated vector pointer used for both packets
59 @param n_left_to_next number of slots left in speculated vector
60 @param bi0 first buffer index
61 @param bi1 second buffer index
62 @param next0 actual next index to be used for the first packet
63 @param next1 actual next index to be used for the second packet
65 @return @c next_index -- speculative next index to be used for future packets
66 @return @c to_next -- speculative frame to be used for future packets
67 @return @c n_left_to_next -- number of slots left in speculative frame
70 #define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
72 int enqueue_code = (next0 != next_index) + 2*(next1 != next_index); \
74 if (PREDICT_FALSE (enqueue_code != 0)) \
76 switch (enqueue_code) \
82 n_left_to_next += 1; \
83 vlib_set_next_frame_buffer (vm, node, next0, bi0); \
89 n_left_to_next += 1; \
90 vlib_set_next_frame_buffer (vm, node, next1, bi1); \
94 /* A B B or A B C */ \
96 n_left_to_next += 2; \
97 vlib_set_next_frame_buffer (vm, node, next0, bi0); \
98 vlib_set_next_frame_buffer (vm, node, next1, bi1); \
101 vlib_put_next_frame (vm, node, next_index, \
103 next_index = next1; \
104 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
111 /** \brief Finish enqueueing four buffers forward in the graph.
112 Standard quad loop boilerplate element. This is a MACRO,
113 with MULTIPLE SIDE EFFECTS. In the ideal case,
114 <code>next_index == next0 == next1 == next2 == next3</code>,
115 which means that the speculative enqueue at the top of the quad loop
116 has correctly dealt with all four packets. In that case, the macro does
119 @param vm vlib_main_t pointer, varies by thread
120 @param node current node vlib_node_runtime_t pointer
121 @param next_index speculated next index used for both packets
122 @param to_next speculated vector pointer used for both packets
123 @param n_left_to_next number of slots left in speculated vector
124 @param bi0 first buffer index
125 @param bi1 second buffer index
126 @param bi2 third buffer index
127 @param bi3 fourth buffer index
128 @param next0 actual next index to be used for the first packet
129 @param next1 actual next index to be used for the second packet
130 @param next2 actual next index to be used for the third packet
131 @param next3 actual next index to be used for the fourth packet
133 @return @c next_index -- speculative next index to be used for future packets
134 @return @c to_next -- speculative frame to be used for future packets
135 @return @c n_left_to_next -- number of slots left in speculative frame
138 #define vlib_validate_buffer_enqueue_x4(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,bi2,bi3,next0,next1,next2,next3) \
140 /* After the fact: check the [speculative] enqueue to "next" */ \
141 u32 fix_speculation = (next_index ^ next0) | (next_index ^ next1) \
142 | (next_index ^ next2) | (next_index ^ next3); \
143 if (PREDICT_FALSE(fix_speculation)) \
147 n_left_to_next += 4; \
149 /* If bi0 belongs to "next", send it there */ \
150 if (next_index == next0) \
156 else /* send it where it needs to go */ \
157 vlib_set_next_frame_buffer (vm, node, next0, bi0); \
159 if (next_index == next1) \
166 vlib_set_next_frame_buffer (vm, node, next1, bi1); \
168 if (next_index == next2) \
175 vlib_set_next_frame_buffer (vm, node, next2, bi2); \
177 if (next_index == next3) \
185 vlib_set_next_frame_buffer (vm, node, next3, bi3); \
187 /* Change speculation: last 2 packets went to the same node*/ \
188 if (next2 == next3) \
190 vlib_put_next_frame (vm, node, next_index, n_left_to_next); \
191 next_index = next3; \
192 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
198 /** \brief Finish enqueueing one buffer forward in the graph.
199 Standard single loop boilerplate element. This is a MACRO,
200 with MULTIPLE SIDE EFFECTS. In the ideal case,
201 <code>next_index == next0</code>,
202 which means that the speculative enqueue at the top of the single loop
203 has correctly dealt with the packet in hand. In that case, the macro does
206 @param vm vlib_main_t pointer, varies by thread
207 @param node current node vlib_node_runtime_t pointer
208 @param next_index speculated next index used for both packets
209 @param to_next speculated vector pointer used for both packets
210 @param n_left_to_next number of slots left in speculated vector
211 @param bi0 first buffer index
212 @param next0 actual next index to be used for the first packet
214 @return @c next_index -- speculative next index to be used for future packets
215 @return @c to_next -- speculative frame to be used for future packets
216 @return @c n_left_to_next -- number of slots left in speculative frame
218 #define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
220 if (PREDICT_FALSE (next0 != next_index)) \
222 vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); \
223 next_index = next0; \
224 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
228 n_left_to_next -= 1; \
233 generic_buffer_node_inline (vlib_main_t * vm,
234 vlib_node_runtime_t * node,
235 vlib_frame_t * frame,
239 void (*two_buffers) (vlib_main_t * vm,
244 u32 * next0, u32 * next1),
245 void (*one_buffer) (vlib_main_t * vm,
246 void *opaque1, uword opaque2,
250 u32 n_left_from, *from, *to_next;
253 from = vlib_frame_vector_args (frame);
254 n_left_from = frame->n_vectors;
255 next_index = node->cached_next_index;
257 if (node->flags & VLIB_NODE_FLAG_TRACE)
258 vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
259 /* stride */ 1, sizeof_trace);
261 while (n_left_from > 0)
265 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
267 while (n_left_from >= 4 && n_left_to_next >= 2)
269 vlib_buffer_t *p0, *p1;
273 /* Prefetch next iteration. */
275 vlib_buffer_t *p2, *p3;
277 p2 = vlib_get_buffer (vm, from[2]);
278 p3 = vlib_get_buffer (vm, from[3]);
280 vlib_prefetch_buffer_header (p2, LOAD);
281 vlib_prefetch_buffer_header (p3, LOAD);
283 CLIB_PREFETCH (p2->data, 64, LOAD);
284 CLIB_PREFETCH (p3->data, 64, LOAD);
287 pi0 = to_next[0] = from[0];
288 pi1 = to_next[1] = from[1];
294 p0 = vlib_get_buffer (vm, pi0);
295 p1 = vlib_get_buffer (vm, pi1);
297 two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
299 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
300 to_next, n_left_to_next,
301 pi0, pi1, next0, next1);
304 while (n_left_from > 0 && n_left_to_next > 0)
316 p0 = vlib_get_buffer (vm, pi0);
318 one_buffer (vm, opaque1, opaque2, p0, &next0);
320 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
321 to_next, n_left_to_next,
325 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
328 return frame->n_vectors;
331 static_always_inline void
332 vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
333 u32 * buffers, u16 * nexts, uword count)
335 u32 *to_next, n_left_to_next, max;
338 next_index = nexts[0];
339 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
340 max = clib_min (n_left_to_next, count);
345 if ((nexts[0] != next_index) || n_left_to_next == 0)
347 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
348 next_index = nexts[0];
349 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
350 max = clib_min (n_left_to_next, count);
352 #if defined(CLIB_HAVE_VEC512)
353 u16x32 next32 = u16x32_load_unaligned (nexts);
354 next32 = (next32 == u16x32_splat (next32[0]));
355 u64 bitmap = u16x32_msb_mask (next32);
356 n_enqueued = count_trailing_zeros (~bitmap);
357 #elif defined(CLIB_HAVE_VEC256)
358 u16x16 next16 = u16x16_load_unaligned (nexts);
359 next16 = (next16 == u16x16_splat (next16[0]));
360 u64 bitmap = u8x32_msb_mask ((u8x32) next16);
361 n_enqueued = count_trailing_zeros (~bitmap) / 2;
362 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
363 u16x8 next8 = u16x8_load_unaligned (nexts);
364 next8 = (next8 == u16x8_splat (next8[0]));
365 u64 bitmap = u8x16_msb_mask ((u8x16) next8);
366 n_enqueued = count_trailing_zeros (~bitmap) / 2;
369 x |= next_index ^ nexts[1];
370 x |= next_index ^ nexts[2];
371 x |= next_index ^ nexts[3];
372 n_enqueued = (x == 0) ? 4 : 1;
375 if (PREDICT_FALSE (n_enqueued > max))
378 #ifdef CLIB_HAVE_VEC512
379 if (n_enqueued >= 32)
381 clib_memcpy (to_next, buffers, 32 * sizeof (u32));
385 n_left_to_next -= 32;
392 #ifdef CLIB_HAVE_VEC256
393 if (n_enqueued >= 16)
395 clib_memcpy (to_next, buffers, 16 * sizeof (u32));
399 n_left_to_next -= 16;
406 #ifdef CLIB_HAVE_VEC128
409 clib_memcpy (to_next, buffers, 8 * sizeof (u32));
422 clib_memcpy (to_next, buffers, 4 * sizeof (u32));
433 to_next[0] = buffers[0];
443 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
446 static_always_inline u32
447 vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
448 u32 * buffer_indices, u16 * thread_indices,
449 u32 n_packets, int drop_on_congestion)
451 vlib_thread_main_t *tm = vlib_get_thread_main ();
452 vlib_frame_queue_main_t *fqm;
453 vlib_frame_queue_per_thread_data_t *ptd;
454 u32 n_left = n_packets;
455 u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
456 vlib_frame_queue_elt_t *hf = 0;
457 u32 n_left_to_next_thread = 0, *to_next_thread = 0;
458 u32 next_thread_index, current_thread_index = ~0;
461 fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
462 ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
466 next_thread_index = thread_indices[0];
468 if (next_thread_index != current_thread_index)
471 if (drop_on_congestion &&
472 is_vlib_frame_queue_congested
473 (frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
474 ptd->congested_handoff_queue_by_thread_index))
476 dbi[0] = buffer_indices[0];
483 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
485 hf = vlib_get_worker_handoff_queue_elt (frame_queue_index,
487 ptd->handoff_queue_elt_by_thread_index);
489 n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
490 to_next_thread = &hf->buffer_index[hf->n_vectors];
491 current_thread_index = next_thread_index;
494 to_next_thread[0] = buffer_indices[0];
496 n_left_to_next_thread--;
498 if (n_left_to_next_thread == 0)
500 hf->n_vectors = VLIB_FRAME_SIZE;
501 vlib_put_frame_queue_elt (hf);
502 current_thread_index = ~0;
503 ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
515 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
517 /* Ship frames to the thread nodes */
518 for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
520 if (ptd->handoff_queue_elt_by_thread_index[i])
522 hf = ptd->handoff_queue_elt_by_thread_index[i];
524 * It works better to let the handoff node
525 * rate-adapt, always ship the handoff queue element.
527 if (1 || hf->n_vectors == hf->last_n_vectors)
529 vlib_put_frame_queue_elt (hf);
530 ptd->handoff_queue_elt_by_thread_index[i] = 0;
533 hf->last_n_vectors = hf->n_vectors;
535 ptd->congested_handoff_queue_by_thread_index[i] =
536 (vlib_frame_queue_t *) (~0);
539 if (drop_on_congestion && n_drop)
540 vlib_buffer_free (vm, drop_list, n_drop);
542 return n_packets - n_drop;
545 #endif /* included_vlib_buffer_node_h */
548 * fd.io coding-style-patch-verification: ON
551 * eval: (c-set-style "gnu")