2 * vnet/pipeline.h: software pipeline
4 * Copyright (c) 2012 Cisco and/or its affiliates.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
21 * #define NSTAGES 3 or whatever
23 * <Define pipeline stages>
25 * #include <vnet/pipeline.h>
27 * static uword my_node_fn (vlib_main_t * vm,
28 * vlib_node_runtime_t * node,
29 * vlib_frame_t * frame)
31 * return dispatch_pipeline (vm, node, frame);
37 #error files which #include <vnet/pipeline.h> must define NSTAGES
41 #define STAGE_INLINE inline
45 * A prefetch stride of 2 is quasi-equivalent to doubling the number
46 * of stages with every other pipeline stage empty.
50 * This is a typical first pipeline stage, which prefetches
51 * buffer metadata and the first line of pkt data.
53 * #define stage0 generic_stage0
55 static STAGE_INLINE void generic_stage0 (vlib_main_t * vm,
56 vlib_node_runtime_t * node,
59 /* generic default stage 0 here */
60 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
61 vlib_prefetch_buffer_header (b, STORE);
62 CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE);
67 static STAGE_INLINE uword
68 dispatch_pipeline (vlib_main_t * vm,
69 vlib_node_runtime_t * node,
72 u32 * from = vlib_frame_vector_args (frame);
73 u32 n_left_from, n_left_to_next, * to_next, next_index, next0;
76 n_left_from = frame->n_vectors;
77 next_index = node->cached_next_index;
79 while (n_left_from > 0)
81 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
83 pi_limit = clib_min (n_left_from, n_left_to_next);
85 for (pi = 0; pi < NSTAGES-1; pi++)
89 stage0 (vm, node, from[pi]);
92 for (; pi < pi_limit; pi++)
94 stage0 (vm, node, from[pi]);
95 to_next[0] = from [pi - 1];
98 next0 = last_stage (vm, node, from [pi - 1]);
99 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
100 to_next, n_left_to_next,
101 from[pi - 1], next0);
103 if ((int) n_left_to_next < 0 && n_left_from > 0)
104 vlib_get_next_frame (vm, node, next_index, to_next,
108 for (; pi < (pi_limit + (NSTAGES-1)); pi++)
110 if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
112 to_next[0] = from [pi - 1];
115 next0 = last_stage (vm, node, from [pi - 1]);
116 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
117 to_next, n_left_to_next,
118 from[pi - 1], next0);
120 if ((int) n_left_to_next < 0 && n_left_from > 0)
121 vlib_get_next_frame (vm, node, next_index, to_next,
125 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
128 return frame->n_vectors;
133 static STAGE_INLINE uword
134 dispatch_pipeline (vlib_main_t * vm,
135 vlib_node_runtime_t * node,
136 vlib_frame_t * frame)
138 u32 * from = vlib_frame_vector_args (frame);
139 u32 n_left_from, n_left_to_next, * to_next, next_index, next0;
142 n_left_from = frame->n_vectors;
143 next_index = node->cached_next_index;
145 while (n_left_from > 0)
147 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
149 pi_limit = clib_min (n_left_from, n_left_to_next);
151 for (pi = 0; pi < NSTAGES-1; pi++)
155 stage0 (vm, node, from[pi]);
157 stage1 (vm, node, from[pi-1]);
160 for (; pi < pi_limit; pi++)
162 stage0 (vm, node, from[pi]);
163 stage1 (vm, node, from[pi-1]);
164 to_next[0] = from [pi - 2];
167 next0 = last_stage (vm, node, from [pi - 2]);
168 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
169 to_next, n_left_to_next,
170 from[pi - 2], next0);
172 if ((int) n_left_to_next < 0 && n_left_from > 0)
173 vlib_get_next_frame (vm, node, next_index, to_next,
178 for (; pi < (pi_limit + (NSTAGES-1)); pi++)
180 if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
181 stage1 (vm, node, from[pi-1]);
182 if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
184 to_next[0] = from[pi - 2];
187 next0 = last_stage (vm, node, from [pi - 2]);
188 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
189 to_next, n_left_to_next,
190 from[pi - 2], next0);
192 if ((int) n_left_to_next < 0 && n_left_from > 0)
193 vlib_get_next_frame (vm, node, next_index, to_next,
198 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
201 return frame->n_vectors;
206 static STAGE_INLINE uword
207 dispatch_pipeline (vlib_main_t * vm,
208 vlib_node_runtime_t * node,
209 vlib_frame_t * frame)
211 u32 * from = vlib_frame_vector_args (frame);
212 u32 n_left_from, n_left_to_next, * to_next, next_index, next0;
215 n_left_from = frame->n_vectors;
216 next_index = node->cached_next_index;
218 while (n_left_from > 0)
220 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
222 pi_limit = clib_min (n_left_from, n_left_to_next);
224 for (pi = 0; pi < NSTAGES-1; pi++)
228 stage0 (vm, node, from[pi]);
230 stage1 (vm, node, from[pi-1]);
232 stage2 (vm, node, from[pi-2]);
235 for (; pi < pi_limit; pi++)
237 stage0 (vm, node, from[pi]);
238 stage1 (vm, node, from[pi-1]);
239 stage2 (vm, node, from[pi-2]);
240 to_next[0] = from [pi - 3];
243 next0 = last_stage (vm, node, from [pi - 3]);
244 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
245 to_next, n_left_to_next,
246 from[pi - 3], next0);
248 if ((int) n_left_to_next < 0 && n_left_from > 0)
249 vlib_get_next_frame (vm, node, next_index, to_next,
254 for (; pi < (pi_limit + (NSTAGES-1)); pi++)
256 if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
257 stage1 (vm, node, from[pi-1]);
258 if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
259 stage2 (vm, node, from[pi-2]);
260 if (((pi - 3) >= 0) && ((pi - 3) < pi_limit))
262 to_next[0] = from[pi - 3];
265 next0 = last_stage (vm, node, from [pi - 3]);
266 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
267 to_next, n_left_to_next,
268 from[pi - 3], next0);
270 if ((int) n_left_to_next < 0 && n_left_from > 0)
271 vlib_get_next_frame (vm, node, next_index, to_next,
276 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
279 return frame->n_vectors;
285 static STAGE_INLINE uword
286 dispatch_pipeline (vlib_main_t * vm,
287 vlib_node_runtime_t * node,
288 vlib_frame_t * frame)
290 u32 * from = vlib_frame_vector_args (frame);
291 u32 n_left_from, n_left_to_next, * to_next, next_index, next0;
294 n_left_from = frame->n_vectors;
295 next_index = node->cached_next_index;
297 while (n_left_from > 0)
299 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
301 pi_limit = clib_min (n_left_from, n_left_to_next);
303 for (pi = 0; pi < NSTAGES-1; pi++)
307 stage0 (vm, node, from[pi]);
309 stage1 (vm, node, from[pi-1]);
311 stage2 (vm, node, from[pi-2]);
313 stage3 (vm, node, from[pi-3]);
316 for (; pi < pi_limit; pi++)
318 stage0 (vm, node, from[pi]);
319 stage1 (vm, node, from[pi-1]);
320 stage2 (vm, node, from[pi-2]);
321 stage3 (vm, node, from[pi-3]);
322 to_next[0] = from [pi - 4];
325 next0 = last_stage (vm, node, from [pi - 4]);
326 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
327 to_next, n_left_to_next,
328 from[pi - 4], next0);
330 if ((int) n_left_to_next < 0 && n_left_from > 0)
331 vlib_get_next_frame (vm, node, next_index, to_next,
336 for (; pi < (pi_limit + (NSTAGES-1)); pi++)
338 if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
339 stage1 (vm, node, from[pi-1]);
340 if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
341 stage2 (vm, node, from[pi - 2]);
342 if (((pi - 3) >= 0) && ((pi - 3) < pi_limit))
343 stage3 (vm, node, from[pi - 3]);
344 if (((pi - 4) >= 0) && ((pi - 4) < pi_limit))
346 to_next[0] = from[pi - 4];
349 next0 = last_stage (vm, node, from [pi - 4]);
350 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
351 to_next, n_left_to_next,
352 from[pi - 4], next0);
354 if ((int) n_left_to_next < 0 && n_left_from > 0)
355 vlib_get_next_frame (vm, node, next_index, to_next,
360 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
363 return frame->n_vectors;
368 static STAGE_INLINE uword
369 dispatch_pipeline (vlib_main_t * vm,
370 vlib_node_runtime_t * node,
371 vlib_frame_t * frame)
373 u32 * from = vlib_frame_vector_args (frame);
374 u32 n_left_from, n_left_to_next, * to_next, next_index, next0;
377 n_left_from = frame->n_vectors;
378 next_index = node->cached_next_index;
380 while (n_left_from > 0)
382 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
384 pi_limit = clib_min (n_left_from, n_left_to_next);
386 for (pi = 0; pi < NSTAGES-1; pi++)
390 stage0 (vm, node, from[pi]);
392 stage1 (vm, node, from[pi-1]);
394 stage2 (vm, node, from[pi-2]);
396 stage3 (vm, node, from[pi-3]);
398 stage4 (vm, node, from[pi-4]);
401 for (; pi < pi_limit; pi++)
403 stage0 (vm, node, from[pi]);
404 stage1 (vm, node, from[pi-1]);
405 stage2 (vm, node, from[pi-2]);
406 stage3 (vm, node, from[pi-3]);
407 stage4 (vm, node, from[pi-4]);
408 to_next[0] = from [pi - 5];
411 next0 = last_stage (vm, node, from [pi - 5]);
412 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
413 to_next, n_left_to_next,
414 from[pi - 5], next0);
416 if ((int) n_left_to_next < 0 && n_left_from > 0)
417 vlib_get_next_frame (vm, node, next_index, to_next,
422 for (; pi < (pi_limit + (NSTAGES-1)); pi++)
424 if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
425 stage1 (vm, node, from[pi-1]);
426 if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
427 stage2 (vm, node, from[pi - 2]);
428 if (((pi - 3) >= 0) && ((pi - 3) < pi_limit))
429 stage3 (vm, node, from[pi - 3]);
430 if (((pi - 4) >= 0) && ((pi - 4) < pi_limit))
431 stage4 (vm, node, from[pi - 4]);
432 if (((pi - 5) >= 0) && ((pi - 5) < pi_limit))
434 to_next[0] = from[pi - 5];
437 next0 = last_stage (vm, node, from [pi - 5]);
438 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
439 to_next, n_left_to_next,
440 from[pi - 5], next0);
442 if ((int) n_left_to_next < 0 && n_left_from > 0)
443 vlib_get_next_frame (vm, node, next_index, to_next,
448 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
451 return frame->n_vectors;