VAPI: bugfixes
[vpp.git] / src / vlib / buffer_node.h
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * buffer_node.h: VLIB buffer handling node helper macros/inlines
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #ifndef included_vlib_buffer_node_h
41 #define included_vlib_buffer_node_h
42
43 /** \file
44     vlib buffer/node functions
45 */
46
47 /** \brief Finish enqueueing two buffers forward in the graph.
48  Standard dual loop boilerplate element. This is a MACRO,
49  with MULTIPLE SIDE EFFECTS. In the ideal case,
50  <code>next_index == next0 == next1</code>,
51  which means that the speculative enqueue at the top of the dual loop
52  has correctly dealt with both packets. In that case, the macro does
53  nothing at all.
54
55  @param vm vlib_main_t pointer, varies by thread
56  @param node current node vlib_node_runtime_t pointer
57  @param next_index speculated next index used for both packets
58  @param to_next speculated vector pointer used for both packets
59  @param n_left_to_next number of slots left in speculated vector
60  @param bi0 first buffer index
61  @param bi1 second buffer index
62  @param next0 actual next index to be used for the first packet
63  @param next1 actual next index to be used for the second packet
64
65  @return @c next_index -- speculative next index to be used for future packets
66  @return @c to_next -- speculative frame to be used for future packets
67  @return @c n_left_to_next -- number of slots left in speculative frame
68 */
69
70 #define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
71 do {                                                                    \
72   int enqueue_code = (next0 != next_index) + 2*(next1 != next_index);   \
73                                                                         \
74   if (PREDICT_FALSE (enqueue_code != 0))                                \
75     {                                                                   \
76       switch (enqueue_code)                                             \
77         {                                                               \
78         case 1:                                                         \
79           /* A B A */                                                   \
80           to_next[-2] = bi1;                                            \
81           to_next -= 1;                                                 \
82           n_left_to_next += 1;                                          \
83           vlib_set_next_frame_buffer (vm, node, next0, bi0);            \
84           break;                                                        \
85                                                                         \
86         case 2:                                                         \
87           /* A A B */                                                   \
88           to_next -= 1;                                                 \
89           n_left_to_next += 1;                                          \
90           vlib_set_next_frame_buffer (vm, node, next1, bi1);            \
91           break;                                                        \
92                                                                         \
93         case 3:                                                         \
94           /* A B B or A B C */                                          \
95           to_next -= 2;                                                 \
96           n_left_to_next += 2;                                          \
97           vlib_set_next_frame_buffer (vm, node, next0, bi0);            \
98           vlib_set_next_frame_buffer (vm, node, next1, bi1);            \
99           if (next0 == next1)                                           \
100             {                                                           \
101               vlib_put_next_frame (vm, node, next_index,                \
102                                    n_left_to_next);                     \
103               next_index = next1;                                       \
104               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
105             }                                                           \
106         }                                                               \
107     }                                                                   \
108 } while (0)
109
110
111 /** \brief Finish enqueueing four buffers forward in the graph.
112  Standard quad loop boilerplate element. This is a MACRO,
113  with MULTIPLE SIDE EFFECTS. In the ideal case,
114  <code>next_index == next0 == next1 == next2 == next3</code>,
115  which means that the speculative enqueue at the top of the quad loop
116  has correctly dealt with all four packets. In that case, the macro does
117  nothing at all.
118
119  @param vm vlib_main_t pointer, varies by thread
120  @param node current node vlib_node_runtime_t pointer
121  @param next_index speculated next index used for both packets
122  @param to_next speculated vector pointer used for both packets
123  @param n_left_to_next number of slots left in speculated vector
124  @param bi0 first buffer index
125  @param bi1 second buffer index
126  @param bi2 third buffer index
127  @param bi3 fourth buffer index
128  @param next0 actual next index to be used for the first packet
129  @param next1 actual next index to be used for the second packet
130  @param next2 actual next index to be used for the third packet
131  @param next3 actual next index to be used for the fourth packet
132
133  @return @c next_index -- speculative next index to be used for future packets
134  @return @c to_next -- speculative frame to be used for future packets
135  @return @c n_left_to_next -- number of slots left in speculative frame
136 */
137
138 #define vlib_validate_buffer_enqueue_x4(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,bi2,bi3,next0,next1,next2,next3) \
139 do {                                                                    \
140   /* After the fact: check the [speculative] enqueue to "next" */       \
141   u32 fix_speculation = (next_index ^ next0) | (next_index ^ next1)     \
142     | (next_index ^ next2) | (next_index ^ next3);                      \
143   if (PREDICT_FALSE(fix_speculation))                                   \
144     {                                                                   \
145       /* rewind... */                                                   \
146       to_next -= 4;                                                     \
147       n_left_to_next += 4;                                              \
148                                                                         \
149       /* If bi0 belongs to "next", send it there */                     \
150       if (next_index == next0)                                          \
151         {                                                               \
152           to_next[0] = bi0;                                             \
153           to_next++;                                                    \
154           n_left_to_next --;                                            \
155         }                                                               \
156       else              /* send it where it needs to go */              \
157         vlib_set_next_frame_buffer (vm, node, next0, bi0);              \
158                                                                         \
159       if (next_index == next1)                                          \
160         {                                                               \
161           to_next[0] = bi1;                                             \
162           to_next++;                                                    \
163           n_left_to_next --;                                            \
164         }                                                               \
165       else                                                              \
166         vlib_set_next_frame_buffer (vm, node, next1, bi1);              \
167                                                                         \
168       if (next_index == next2)                                          \
169         {                                                               \
170           to_next[0] = bi2;                                             \
171           to_next++;                                                    \
172           n_left_to_next --;                                            \
173         }                                                               \
174       else                                                              \
175         vlib_set_next_frame_buffer (vm, node, next2, bi2);              \
176                                                                         \
177       if (next_index == next3)                                          \
178         {                                                               \
179           to_next[0] = bi3;                                             \
180           to_next++;                                                    \
181           n_left_to_next --;                                            \
182         }                                                               \
183       else                                                              \
184         {                                                               \
185           vlib_set_next_frame_buffer (vm, node, next3, bi3);            \
186                                                                         \
187           /* Change speculation: last 2 packets went to the same node*/ \
188           if (next2 == next3)                                           \
189             {                                                           \
190               vlib_put_next_frame (vm, node, next_index, n_left_to_next); \
191               next_index = next3;                                       \
192               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
193             }                                                           \
194         }                                                               \
195     }                                                                   \
196  } while(0);
197
198 /** \brief Finish enqueueing one buffer forward in the graph.
199  Standard single loop boilerplate element. This is a MACRO,
200  with MULTIPLE SIDE EFFECTS. In the ideal case,
201  <code>next_index == next0</code>,
202  which means that the speculative enqueue at the top of the single loop
203  has correctly dealt with the packet in hand. In that case, the macro does
204  nothing at all.
205
206  @param vm vlib_main_t pointer, varies by thread
207  @param node current node vlib_node_runtime_t pointer
208  @param next_index speculated next index used for both packets
209  @param to_next speculated vector pointer used for both packets
210  @param n_left_to_next number of slots left in speculated vector
211  @param bi0 first buffer index
212  @param next0 actual next index to be used for the first packet
213
214  @return @c next_index -- speculative next index to be used for future packets
215  @return @c to_next -- speculative frame to be used for future packets
216  @return @c n_left_to_next -- number of slots left in speculative frame
217 */
218 #define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
219 do {                                                                    \
220   if (PREDICT_FALSE (next0 != next_index))                              \
221     {                                                                   \
222       vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);   \
223       next_index = next0;                                               \
224       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
225                                                                         \
226       to_next[0] = bi0;                                                 \
227       to_next += 1;                                                     \
228       n_left_to_next -= 1;                                              \
229     }                                                                   \
230 } while (0)
231
232 always_inline uword
233 generic_buffer_node_inline (vlib_main_t * vm,
234                             vlib_node_runtime_t * node,
235                             vlib_frame_t * frame,
236                             uword sizeof_trace,
237                             void *opaque1,
238                             uword opaque2,
239                             void (*two_buffers) (vlib_main_t * vm,
240                                                  void *opaque1,
241                                                  uword opaque2,
242                                                  vlib_buffer_t * b0,
243                                                  vlib_buffer_t * b1,
244                                                  u32 * next0, u32 * next1),
245                             void (*one_buffer) (vlib_main_t * vm,
246                                                 void *opaque1, uword opaque2,
247                                                 vlib_buffer_t * b0,
248                                                 u32 * next0))
249 {
250   u32 n_left_from, *from, *to_next;
251   u32 next_index;
252
253   from = vlib_frame_vector_args (frame);
254   n_left_from = frame->n_vectors;
255   next_index = node->cached_next_index;
256
257   if (node->flags & VLIB_NODE_FLAG_TRACE)
258     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
259                                    /* stride */ 1, sizeof_trace);
260
261   while (n_left_from > 0)
262     {
263       u32 n_left_to_next;
264
265       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
266
267       while (n_left_from >= 4 && n_left_to_next >= 2)
268         {
269           vlib_buffer_t *p0, *p1;
270           u32 pi0, next0;
271           u32 pi1, next1;
272
273           /* Prefetch next iteration. */
274           {
275             vlib_buffer_t *p2, *p3;
276
277             p2 = vlib_get_buffer (vm, from[2]);
278             p3 = vlib_get_buffer (vm, from[3]);
279
280             vlib_prefetch_buffer_header (p2, LOAD);
281             vlib_prefetch_buffer_header (p3, LOAD);
282
283             CLIB_PREFETCH (p2->data, 64, LOAD);
284             CLIB_PREFETCH (p3->data, 64, LOAD);
285           }
286
287           pi0 = to_next[0] = from[0];
288           pi1 = to_next[1] = from[1];
289           from += 2;
290           to_next += 2;
291           n_left_from -= 2;
292           n_left_to_next -= 2;
293
294           p0 = vlib_get_buffer (vm, pi0);
295           p1 = vlib_get_buffer (vm, pi1);
296
297           two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
298
299           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
300                                            to_next, n_left_to_next,
301                                            pi0, pi1, next0, next1);
302         }
303
304       while (n_left_from > 0 && n_left_to_next > 0)
305         {
306           vlib_buffer_t *p0;
307           u32 pi0, next0;
308
309           pi0 = from[0];
310           to_next[0] = pi0;
311           from += 1;
312           to_next += 1;
313           n_left_from -= 1;
314           n_left_to_next -= 1;
315
316           p0 = vlib_get_buffer (vm, pi0);
317
318           one_buffer (vm, opaque1, opaque2, p0, &next0);
319
320           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
321                                            to_next, n_left_to_next,
322                                            pi0, next0);
323         }
324
325       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
326     }
327
328   return frame->n_vectors;
329 }
330
331 static_always_inline void
332 vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
333                              u32 * buffers, u16 * nexts, uword count)
334 {
335   u32 *to_next, n_left_to_next, max;
336   u16 next_index;
337
338   next_index = nexts[0];
339   vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
340   max = clib_min (n_left_to_next, count);
341
342   while (count)
343     {
344       u32 n_enqueued;
345       if ((nexts[0] != next_index) || n_left_to_next == 0)
346         {
347           vlib_put_next_frame (vm, node, next_index, n_left_to_next);
348           next_index = nexts[0];
349           vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
350           max = clib_min (n_left_to_next, count);
351         }
352 #if defined(CLIB_HAVE_VEC512)
353       u16x32 next32 = u16x32_load_unaligned (nexts);
354       next32 = (next32 == u16x32_splat (next32[0]));
355       u64 bitmap = u16x32_msb_mask (next32);
356       n_enqueued = count_trailing_zeros (~bitmap);
357 #elif defined(CLIB_HAVE_VEC256)
358       u16x16 next16 = u16x16_load_unaligned (nexts);
359       next16 = (next16 == u16x16_splat (next16[0]));
360       u64 bitmap = u8x32_msb_mask ((u8x32) next16);
361       n_enqueued = count_trailing_zeros (~bitmap) / 2;
362 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
363       u16x8 next8 = u16x8_load_unaligned (nexts);
364       next8 = (next8 == u16x8_splat (next8[0]));
365       u64 bitmap = u8x16_msb_mask ((u8x16) next8);
366       n_enqueued = count_trailing_zeros (~bitmap) / 2;
367 #else
368       u16 x = 0;
369       x |= next_index ^ nexts[1];
370       x |= next_index ^ nexts[2];
371       x |= next_index ^ nexts[3];
372       n_enqueued = (x == 0) ? 4 : 1;
373 #endif
374
375       if (PREDICT_FALSE (n_enqueued > max))
376         n_enqueued = max;
377
378 #ifdef CLIB_HAVE_VEC512
379       if (n_enqueued >= 32)
380         {
381           clib_memcpy (to_next, buffers, 32 * sizeof (u32));
382           nexts += 32;
383           to_next += 32;
384           buffers += 32;
385           n_left_to_next -= 32;
386           count -= 32;
387           max -= 32;
388           continue;
389         }
390 #endif
391
392 #ifdef CLIB_HAVE_VEC256
393       if (n_enqueued >= 16)
394         {
395           clib_memcpy (to_next, buffers, 16 * sizeof (u32));
396           nexts += 16;
397           to_next += 16;
398           buffers += 16;
399           n_left_to_next -= 16;
400           count -= 16;
401           max -= 16;
402           continue;
403         }
404 #endif
405
406 #ifdef CLIB_HAVE_VEC128
407       if (n_enqueued >= 8)
408         {
409           clib_memcpy (to_next, buffers, 8 * sizeof (u32));
410           nexts += 8;
411           to_next += 8;
412           buffers += 8;
413           n_left_to_next -= 8;
414           count -= 8;
415           max -= 8;
416           continue;
417         }
418 #endif
419
420       if (n_enqueued >= 4)
421         {
422           clib_memcpy (to_next, buffers, 4 * sizeof (u32));
423           nexts += 4;
424           to_next += 4;
425           buffers += 4;
426           n_left_to_next -= 4;
427           count -= 4;
428           max -= 4;
429           continue;
430         }
431
432       /* copy */
433       to_next[0] = buffers[0];
434
435       /* next */
436       nexts += 1;
437       to_next += 1;
438       buffers += 1;
439       n_left_to_next -= 1;
440       count -= 1;
441       max -= 1;
442     }
443   vlib_put_next_frame (vm, node, next_index, n_left_to_next);
444 }
445
446 static_always_inline u32
447 vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
448                                u32 * buffer_indices, u16 * thread_indices,
449                                u32 n_packets, int drop_on_congestion)
450 {
451   vlib_thread_main_t *tm = vlib_get_thread_main ();
452   vlib_frame_queue_main_t *fqm;
453   vlib_frame_queue_per_thread_data_t *ptd;
454   u32 n_left = n_packets;
455   u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
456   vlib_frame_queue_elt_t *hf = 0;
457   u32 n_left_to_next_thread = 0, *to_next_thread = 0;
458   u32 next_thread_index, current_thread_index = ~0;
459   int i;
460
461   fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
462   ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
463
464   while (n_left)
465     {
466       next_thread_index = thread_indices[0];
467
468       if (next_thread_index != current_thread_index)
469         {
470
471           if (drop_on_congestion &&
472               is_vlib_frame_queue_congested
473               (frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
474                ptd->congested_handoff_queue_by_thread_index))
475             {
476               dbi[0] = buffer_indices[0];
477               dbi++;
478               n_drop++;
479               goto next;
480             }
481
482           if (hf)
483             hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
484
485           hf = vlib_get_worker_handoff_queue_elt (frame_queue_index,
486                                                   next_thread_index,
487                                                   ptd->handoff_queue_elt_by_thread_index);
488
489           n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
490           to_next_thread = &hf->buffer_index[hf->n_vectors];
491           current_thread_index = next_thread_index;
492         }
493
494       to_next_thread[0] = buffer_indices[0];
495       to_next_thread++;
496       n_left_to_next_thread--;
497
498       if (n_left_to_next_thread == 0)
499         {
500           hf->n_vectors = VLIB_FRAME_SIZE;
501           vlib_put_frame_queue_elt (hf);
502           current_thread_index = ~0;
503           ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
504           hf = 0;
505         }
506
507       /* next */
508     next:
509       thread_indices += 1;
510       buffer_indices += 1;
511       n_left -= 1;
512     }
513
514   if (hf)
515     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
516
517   /* Ship frames to the thread nodes */
518   for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
519     {
520       if (ptd->handoff_queue_elt_by_thread_index[i])
521         {
522           hf = ptd->handoff_queue_elt_by_thread_index[i];
523           /*
524            * It works better to let the handoff node
525            * rate-adapt, always ship the handoff queue element.
526            */
527           if (1 || hf->n_vectors == hf->last_n_vectors)
528             {
529               vlib_put_frame_queue_elt (hf);
530               ptd->handoff_queue_elt_by_thread_index[i] = 0;
531             }
532           else
533             hf->last_n_vectors = hf->n_vectors;
534         }
535       ptd->congested_handoff_queue_by_thread_index[i] =
536         (vlib_frame_queue_t *) (~0);
537     }
538
539   if (drop_on_congestion && n_drop)
540     vlib_buffer_free (vm, drop_list, n_drop);
541
542   return n_packets - n_drop;
543 }
544
545 #endif /* included_vlib_buffer_node_h */
546
547 /*
548  * fd.io coding-style-patch-verification: ON
549  *
550  * Local Variables:
551  * eval: (c-set-style "gnu")
552  * End:
553  */