vlib: add vlib_buffer_enqueue_to_single_next(...) function
[vpp.git] / src / vlib / buffer_node.h
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * buffer_node.h: VLIB buffer handling node helper macros/inlines
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #ifndef included_vlib_buffer_node_h
41 #define included_vlib_buffer_node_h
42
43 /** \file
44     vlib buffer/node functions
45 */
46
47 /** \brief Finish enqueueing two buffers forward in the graph.
48  Standard dual loop boilerplate element. This is a MACRO,
49  with MULTIPLE SIDE EFFECTS. In the ideal case,
50  <code>next_index == next0 == next1</code>,
51  which means that the speculative enqueue at the top of the dual loop
52  has correctly dealt with both packets. In that case, the macro does
53  nothing at all.
54
55  @param vm vlib_main_t pointer, varies by thread
56  @param node current node vlib_node_runtime_t pointer
57  @param next_index speculated next index used for both packets
58  @param to_next speculated vector pointer used for both packets
59  @param n_left_to_next number of slots left in speculated vector
60  @param bi0 first buffer index
61  @param bi1 second buffer index
62  @param next0 actual next index to be used for the first packet
63  @param next1 actual next index to be used for the second packet
64
65  @return @c next_index -- speculative next index to be used for future packets
66  @return @c to_next -- speculative frame to be used for future packets
67  @return @c n_left_to_next -- number of slots left in speculative frame
68 */
69
70 #define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
71 do {                                                                    \
72   int enqueue_code = (next0 != next_index) + 2*(next1 != next_index);   \
73                                                                         \
74   if (PREDICT_FALSE (enqueue_code != 0))                                \
75     {                                                                   \
76       switch (enqueue_code)                                             \
77         {                                                               \
78         case 1:                                                         \
79           /* A B A */                                                   \
80           to_next[-2] = bi1;                                            \
81           to_next -= 1;                                                 \
82           n_left_to_next += 1;                                          \
83           vlib_set_next_frame_buffer (vm, node, next0, bi0);            \
84           break;                                                        \
85                                                                         \
86         case 2:                                                         \
87           /* A A B */                                                   \
88           to_next -= 1;                                                 \
89           n_left_to_next += 1;                                          \
90           vlib_set_next_frame_buffer (vm, node, next1, bi1);            \
91           break;                                                        \
92                                                                         \
93         case 3:                                                         \
94           /* A B B or A B C */                                          \
95           to_next -= 2;                                                 \
96           n_left_to_next += 2;                                          \
97           vlib_set_next_frame_buffer (vm, node, next0, bi0);            \
98           vlib_set_next_frame_buffer (vm, node, next1, bi1);            \
99           if (next0 == next1)                                           \
100             {                                                           \
101               vlib_put_next_frame (vm, node, next_index,                \
102                                    n_left_to_next);                     \
103               next_index = next1;                                       \
104               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
105             }                                                           \
106         }                                                               \
107     }                                                                   \
108 } while (0)
109
110
111 /** \brief Finish enqueueing four buffers forward in the graph.
112  Standard quad loop boilerplate element. This is a MACRO,
113  with MULTIPLE SIDE EFFECTS. In the ideal case,
114  <code>next_index == next0 == next1 == next2 == next3</code>,
115  which means that the speculative enqueue at the top of the quad loop
116  has correctly dealt with all four packets. In that case, the macro does
117  nothing at all.
118
119  @param vm vlib_main_t pointer, varies by thread
120  @param node current node vlib_node_runtime_t pointer
121  @param next_index speculated next index used for both packets
122  @param to_next speculated vector pointer used for both packets
123  @param n_left_to_next number of slots left in speculated vector
124  @param bi0 first buffer index
125  @param bi1 second buffer index
126  @param bi2 third buffer index
127  @param bi3 fourth buffer index
128  @param next0 actual next index to be used for the first packet
129  @param next1 actual next index to be used for the second packet
130  @param next2 actual next index to be used for the third packet
131  @param next3 actual next index to be used for the fourth packet
132
133  @return @c next_index -- speculative next index to be used for future packets
134  @return @c to_next -- speculative frame to be used for future packets
135  @return @c n_left_to_next -- number of slots left in speculative frame
136 */
137
138 #define vlib_validate_buffer_enqueue_x4(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,bi2,bi3,next0,next1,next2,next3) \
139 do {                                                                    \
140   /* After the fact: check the [speculative] enqueue to "next" */       \
141   u32 fix_speculation = (next_index ^ next0) | (next_index ^ next1)     \
142     | (next_index ^ next2) | (next_index ^ next3);                      \
143   if (PREDICT_FALSE(fix_speculation))                                   \
144     {                                                                   \
145       /* rewind... */                                                   \
146       to_next -= 4;                                                     \
147       n_left_to_next += 4;                                              \
148                                                                         \
149       /* If bi0 belongs to "next", send it there */                     \
150       if (next_index == next0)                                          \
151         {                                                               \
152           to_next[0] = bi0;                                             \
153           to_next++;                                                    \
154           n_left_to_next --;                                            \
155         }                                                               \
156       else              /* send it where it needs to go */              \
157         vlib_set_next_frame_buffer (vm, node, next0, bi0);              \
158                                                                         \
159       if (next_index == next1)                                          \
160         {                                                               \
161           to_next[0] = bi1;                                             \
162           to_next++;                                                    \
163           n_left_to_next --;                                            \
164         }                                                               \
165       else                                                              \
166         vlib_set_next_frame_buffer (vm, node, next1, bi1);              \
167                                                                         \
168       if (next_index == next2)                                          \
169         {                                                               \
170           to_next[0] = bi2;                                             \
171           to_next++;                                                    \
172           n_left_to_next --;                                            \
173         }                                                               \
174       else                                                              \
175         vlib_set_next_frame_buffer (vm, node, next2, bi2);              \
176                                                                         \
177       if (next_index == next3)                                          \
178         {                                                               \
179           to_next[0] = bi3;                                             \
180           to_next++;                                                    \
181           n_left_to_next --;                                            \
182         }                                                               \
183       else                                                              \
184         {                                                               \
185           vlib_set_next_frame_buffer (vm, node, next3, bi3);            \
186                                                                         \
187           /* Change speculation: last 2 packets went to the same node*/ \
188           if (next2 == next3)                                           \
189             {                                                           \
190               vlib_put_next_frame (vm, node, next_index, n_left_to_next); \
191               next_index = next3;                                       \
192               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
193             }                                                           \
194         }                                                               \
195     }                                                                   \
196  } while(0);
197
198 /** \brief Finish enqueueing one buffer forward in the graph.
199  Standard single loop boilerplate element. This is a MACRO,
200  with MULTIPLE SIDE EFFECTS. In the ideal case,
201  <code>next_index == next0</code>,
202  which means that the speculative enqueue at the top of the single loop
203  has correctly dealt with the packet in hand. In that case, the macro does
204  nothing at all.
205
206  @param vm vlib_main_t pointer, varies by thread
207  @param node current node vlib_node_runtime_t pointer
208  @param next_index speculated next index used for both packets
209  @param to_next speculated vector pointer used for both packets
210  @param n_left_to_next number of slots left in speculated vector
211  @param bi0 first buffer index
212  @param next0 actual next index to be used for the first packet
213
214  @return @c next_index -- speculative next index to be used for future packets
215  @return @c to_next -- speculative frame to be used for future packets
216  @return @c n_left_to_next -- number of slots left in speculative frame
217 */
218 #define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
219 do {                                                                    \
220   if (PREDICT_FALSE (next0 != next_index))                              \
221     {                                                                   \
222       vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);   \
223       next_index = next0;                                               \
224       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
225                                                                         \
226       to_next[0] = bi0;                                                 \
227       to_next += 1;                                                     \
228       n_left_to_next -= 1;                                              \
229     }                                                                   \
230 } while (0)
231
232 always_inline uword
233 generic_buffer_node_inline (vlib_main_t * vm,
234                             vlib_node_runtime_t * node,
235                             vlib_frame_t * frame,
236                             uword sizeof_trace,
237                             void *opaque1,
238                             uword opaque2,
239                             void (*two_buffers) (vlib_main_t * vm,
240                                                  void *opaque1,
241                                                  uword opaque2,
242                                                  vlib_buffer_t * b0,
243                                                  vlib_buffer_t * b1,
244                                                  u32 * next0, u32 * next1),
245                             void (*one_buffer) (vlib_main_t * vm,
246                                                 void *opaque1, uword opaque2,
247                                                 vlib_buffer_t * b0,
248                                                 u32 * next0))
249 {
250   u32 n_left_from, *from, *to_next;
251   u32 next_index;
252
253   from = vlib_frame_vector_args (frame);
254   n_left_from = frame->n_vectors;
255   next_index = node->cached_next_index;
256
257   if (node->flags & VLIB_NODE_FLAG_TRACE)
258     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
259                                    /* stride */ 1, sizeof_trace);
260
261   while (n_left_from > 0)
262     {
263       u32 n_left_to_next;
264
265       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
266
267       while (n_left_from >= 4 && n_left_to_next >= 2)
268         {
269           vlib_buffer_t *p0, *p1;
270           u32 pi0, next0;
271           u32 pi1, next1;
272
273           /* Prefetch next iteration. */
274           {
275             vlib_buffer_t *p2, *p3;
276
277             p2 = vlib_get_buffer (vm, from[2]);
278             p3 = vlib_get_buffer (vm, from[3]);
279
280             vlib_prefetch_buffer_header (p2, LOAD);
281             vlib_prefetch_buffer_header (p3, LOAD);
282
283             CLIB_PREFETCH (p2->data, 64, LOAD);
284             CLIB_PREFETCH (p3->data, 64, LOAD);
285           }
286
287           pi0 = to_next[0] = from[0];
288           pi1 = to_next[1] = from[1];
289           from += 2;
290           to_next += 2;
291           n_left_from -= 2;
292           n_left_to_next -= 2;
293
294           p0 = vlib_get_buffer (vm, pi0);
295           p1 = vlib_get_buffer (vm, pi1);
296
297           two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
298
299           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
300                                            to_next, n_left_to_next,
301                                            pi0, pi1, next0, next1);
302         }
303
304       while (n_left_from > 0 && n_left_to_next > 0)
305         {
306           vlib_buffer_t *p0;
307           u32 pi0, next0;
308
309           pi0 = from[0];
310           to_next[0] = pi0;
311           from += 1;
312           to_next += 1;
313           n_left_from -= 1;
314           n_left_to_next -= 1;
315
316           p0 = vlib_get_buffer (vm, pi0);
317
318           one_buffer (vm, opaque1, opaque2, p0, &next0);
319
320           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
321                                            to_next, n_left_to_next,
322                                            pi0, next0);
323         }
324
325       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
326     }
327
328   return frame->n_vectors;
329 }
330
331 static_always_inline void
332 vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
333                              u32 * buffers, u16 * nexts, uword count)
334 {
335   u32 *to_next, n_left_to_next, max;
336   u16 next_index;
337
338   next_index = nexts[0];
339   vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
340   max = clib_min (n_left_to_next, count);
341
342   while (count)
343     {
344       u32 n_enqueued;
345       if ((nexts[0] != next_index) || n_left_to_next == 0)
346         {
347           vlib_put_next_frame (vm, node, next_index, n_left_to_next);
348           next_index = nexts[0];
349           vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
350           max = clib_min (n_left_to_next, count);
351         }
352 #if defined(CLIB_HAVE_VEC512)
353       u16x32 next32 = u16x32_load_unaligned (nexts);
354       next32 = (next32 == u16x32_splat (next32[0]));
355       u64 bitmap = u16x32_msb_mask (next32);
356       n_enqueued = count_trailing_zeros (~bitmap);
357 #elif defined(CLIB_HAVE_VEC256)
358       u16x16 next16 = u16x16_load_unaligned (nexts);
359       next16 = (next16 == u16x16_splat (next16[0]));
360       u64 bitmap = u8x32_msb_mask ((u8x32) next16);
361       n_enqueued = count_trailing_zeros (~bitmap) / 2;
362 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
363       u16x8 next8 = u16x8_load_unaligned (nexts);
364       next8 = (next8 == u16x8_splat (next8[0]));
365       u64 bitmap = u8x16_msb_mask ((u8x16) next8);
366       n_enqueued = count_trailing_zeros (~bitmap) / 2;
367 #else
368       u16 x = 0;
369       if (count + 3 < max)
370         {
371           x |= next_index ^ nexts[1];
372           x |= next_index ^ nexts[2];
373           x |= next_index ^ nexts[3];
374           n_enqueued = (x == 0) ? 4 : 1;
375         }
376       else
377         n_enqueued = 1;
378 #endif
379
380       if (PREDICT_FALSE (n_enqueued > max))
381         n_enqueued = max;
382
383 #ifdef CLIB_HAVE_VEC512
384       if (n_enqueued >= 32)
385         {
386           clib_memcpy_fast (to_next, buffers, 32 * sizeof (u32));
387           nexts += 32;
388           to_next += 32;
389           buffers += 32;
390           n_left_to_next -= 32;
391           count -= 32;
392           max -= 32;
393           continue;
394         }
395 #endif
396
397 #ifdef CLIB_HAVE_VEC256
398       if (n_enqueued >= 16)
399         {
400           clib_memcpy_fast (to_next, buffers, 16 * sizeof (u32));
401           nexts += 16;
402           to_next += 16;
403           buffers += 16;
404           n_left_to_next -= 16;
405           count -= 16;
406           max -= 16;
407           continue;
408         }
409 #endif
410
411 #ifdef CLIB_HAVE_VEC128
412       if (n_enqueued >= 8)
413         {
414           clib_memcpy_fast (to_next, buffers, 8 * sizeof (u32));
415           nexts += 8;
416           to_next += 8;
417           buffers += 8;
418           n_left_to_next -= 8;
419           count -= 8;
420           max -= 8;
421           continue;
422         }
423 #endif
424
425       if (n_enqueued >= 4)
426         {
427           clib_memcpy_fast (to_next, buffers, 4 * sizeof (u32));
428           nexts += 4;
429           to_next += 4;
430           buffers += 4;
431           n_left_to_next -= 4;
432           count -= 4;
433           max -= 4;
434           continue;
435         }
436
437       /* copy */
438       to_next[0] = buffers[0];
439
440       /* next */
441       nexts += 1;
442       to_next += 1;
443       buffers += 1;
444       n_left_to_next -= 1;
445       count -= 1;
446       max -= 1;
447     }
448   vlib_put_next_frame (vm, node, next_index, n_left_to_next);
449 }
450
451 static_always_inline void
452 vlib_buffer_enqueue_to_single_next (vlib_main_t * vm,
453                                     vlib_node_runtime_t * node, u32 * buffers,
454                                     u16 next_index, u32 count)
455 {
456   u32 *to_next, n_left_to_next, n_enq;
457
458   vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
459
460   if (PREDICT_TRUE (n_left_to_next >= count))
461     {
462       clib_memcpy_fast (to_next, buffers, count * sizeof (u32));
463       n_left_to_next -= count;
464       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
465       return;
466     }
467
468   n_enq = n_left_to_next;
469 next:
470   clib_memcpy_fast (to_next, buffers, n_enq * sizeof (u32));
471   n_left_to_next -= n_enq;
472
473   if (PREDICT_FALSE (count > n_enq))
474     {
475       count -= n_enq;
476       buffers += n_enq;
477
478       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
479       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
480       n_enq = clib_min (n_left_to_next, count);
481       goto next;
482     }
483   vlib_put_next_frame (vm, node, next_index, n_left_to_next);
484 }
485
486 static_always_inline u32
487 vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
488                                u32 * buffer_indices, u16 * thread_indices,
489                                u32 n_packets, int drop_on_congestion)
490 {
491   vlib_thread_main_t *tm = vlib_get_thread_main ();
492   vlib_frame_queue_main_t *fqm;
493   vlib_frame_queue_per_thread_data_t *ptd;
494   u32 n_left = n_packets;
495   u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
496   vlib_frame_queue_elt_t *hf = 0;
497   u32 n_left_to_next_thread = 0, *to_next_thread = 0;
498   u32 next_thread_index, current_thread_index = ~0;
499   int i;
500
501   fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
502   ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
503
504   while (n_left)
505     {
506       next_thread_index = thread_indices[0];
507
508       if (next_thread_index != current_thread_index)
509         {
510
511           if (drop_on_congestion &&
512               is_vlib_frame_queue_congested
513               (frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
514                ptd->congested_handoff_queue_by_thread_index))
515             {
516               dbi[0] = buffer_indices[0];
517               dbi++;
518               n_drop++;
519               goto next;
520             }
521
522           if (hf)
523             hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
524
525           hf = vlib_get_worker_handoff_queue_elt (frame_queue_index,
526                                                   next_thread_index,
527                                                   ptd->handoff_queue_elt_by_thread_index);
528
529           n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
530           to_next_thread = &hf->buffer_index[hf->n_vectors];
531           current_thread_index = next_thread_index;
532         }
533
534       to_next_thread[0] = buffer_indices[0];
535       to_next_thread++;
536       n_left_to_next_thread--;
537
538       if (n_left_to_next_thread == 0)
539         {
540           hf->n_vectors = VLIB_FRAME_SIZE;
541           vlib_put_frame_queue_elt (hf);
542           current_thread_index = ~0;
543           ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
544           hf = 0;
545         }
546
547       /* next */
548     next:
549       thread_indices += 1;
550       buffer_indices += 1;
551       n_left -= 1;
552     }
553
554   if (hf)
555     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
556
557   /* Ship frames to the thread nodes */
558   for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
559     {
560       if (ptd->handoff_queue_elt_by_thread_index[i])
561         {
562           hf = ptd->handoff_queue_elt_by_thread_index[i];
563           /*
564            * It works better to let the handoff node
565            * rate-adapt, always ship the handoff queue element.
566            */
567           if (1 || hf->n_vectors == hf->last_n_vectors)
568             {
569               vlib_put_frame_queue_elt (hf);
570               ptd->handoff_queue_elt_by_thread_index[i] = 0;
571             }
572           else
573             hf->last_n_vectors = hf->n_vectors;
574         }
575       ptd->congested_handoff_queue_by_thread_index[i] =
576         (vlib_frame_queue_t *) (~0);
577     }
578
579   if (drop_on_congestion && n_drop)
580     vlib_buffer_free (vm, drop_list, n_drop);
581
582   return n_packets - n_drop;
583 }
584
585 #endif /* included_vlib_buffer_node_h */
586
587 /*
588  * fd.io coding-style-patch-verification: ON
589  *
590  * Local Variables:
591  * eval: (c-set-style "gnu")
592  * End:
593  */