Cleanup of handoff code
[vpp.git] / src / vlib / buffer_node.h
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * buffer_node.h: VLIB buffer handling node helper macros/inlines
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #ifndef included_vlib_buffer_node_h
41 #define included_vlib_buffer_node_h
42
43 /** \file
44     vlib buffer/node functions
45 */
46
47 /** \brief Finish enqueueing two buffers forward in the graph.
48  Standard dual loop boilerplate element. This is a MACRO,
49  with MULTIPLE SIDE EFFECTS. In the ideal case,
50  <code>next_index == next0 == next1</code>,
51  which means that the speculative enqueue at the top of the dual loop
52  has correctly dealt with both packets. In that case, the macro does
53  nothing at all.
54
55  @param vm vlib_main_t pointer, varies by thread
56  @param node current node vlib_node_runtime_t pointer
57  @param next_index speculated next index used for both packets
58  @param to_next speculated vector pointer used for both packets
59  @param n_left_to_next number of slots left in speculated vector
60  @param bi0 first buffer index
61  @param bi1 second buffer index
62  @param next0 actual next index to be used for the first packet
63  @param next1 actual next index to be used for the second packet
64
65  @return @c next_index -- speculative next index to be used for future packets
66  @return @c to_next -- speculative frame to be used for future packets
67  @return @c n_left_to_next -- number of slots left in speculative frame
68 */
69
70 #define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
71 do {                                                                    \
72   int enqueue_code = (next0 != next_index) + 2*(next1 != next_index);   \
73                                                                         \
74   if (PREDICT_FALSE (enqueue_code != 0))                                \
75     {                                                                   \
76       switch (enqueue_code)                                             \
77         {                                                               \
78         case 1:                                                         \
79           /* A B A */                                                   \
80           to_next[-2] = bi1;                                            \
81           to_next -= 1;                                                 \
82           n_left_to_next += 1;                                          \
83           vlib_set_next_frame_buffer (vm, node, next0, bi0);            \
84           break;                                                        \
85                                                                         \
86         case 2:                                                         \
87           /* A A B */                                                   \
88           to_next -= 1;                                                 \
89           n_left_to_next += 1;                                          \
90           vlib_set_next_frame_buffer (vm, node, next1, bi1);            \
91           break;                                                        \
92                                                                         \
93         case 3:                                                         \
94           /* A B B or A B C */                                          \
95           to_next -= 2;                                                 \
96           n_left_to_next += 2;                                          \
97           vlib_set_next_frame_buffer (vm, node, next0, bi0);            \
98           vlib_set_next_frame_buffer (vm, node, next1, bi1);            \
99           if (next0 == next1)                                           \
100             {                                                           \
101               vlib_put_next_frame (vm, node, next_index,                \
102                                    n_left_to_next);                     \
103               next_index = next1;                                       \
104               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
105             }                                                           \
106         }                                                               \
107     }                                                                   \
108 } while (0)
109
110
111 /** \brief Finish enqueueing four buffers forward in the graph.
112  Standard quad loop boilerplate element. This is a MACRO,
113  with MULTIPLE SIDE EFFECTS. In the ideal case,
114  <code>next_index == next0 == next1 == next2 == next3</code>,
115  which means that the speculative enqueue at the top of the quad loop
116  has correctly dealt with all four packets. In that case, the macro does
117  nothing at all.
118
119  @param vm vlib_main_t pointer, varies by thread
120  @param node current node vlib_node_runtime_t pointer
121  @param next_index speculated next index used for both packets
122  @param to_next speculated vector pointer used for both packets
123  @param n_left_to_next number of slots left in speculated vector
124  @param bi0 first buffer index
125  @param bi1 second buffer index
126  @param bi2 third buffer index
127  @param bi3 fourth buffer index
128  @param next0 actual next index to be used for the first packet
129  @param next1 actual next index to be used for the second packet
130  @param next2 actual next index to be used for the third packet
131  @param next3 actual next index to be used for the fourth packet
132
133  @return @c next_index -- speculative next index to be used for future packets
134  @return @c to_next -- speculative frame to be used for future packets
135  @return @c n_left_to_next -- number of slots left in speculative frame
136 */
137
138 #define vlib_validate_buffer_enqueue_x4(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,bi2,bi3,next0,next1,next2,next3) \
139 do {                                                                    \
140   /* After the fact: check the [speculative] enqueue to "next" */       \
141   u32 fix_speculation = (next_index ^ next0) | (next_index ^ next1)     \
142     | (next_index ^ next2) | (next_index ^ next3);                      \
143   if (PREDICT_FALSE(fix_speculation))                                   \
144     {                                                                   \
145       /* rewind... */                                                   \
146       to_next -= 4;                                                     \
147       n_left_to_next += 4;                                              \
148                                                                         \
149       /* If bi0 belongs to "next", send it there */                     \
150       if (next_index == next0)                                          \
151         {                                                               \
152           to_next[0] = bi0;                                             \
153           to_next++;                                                    \
154           n_left_to_next --;                                            \
155         }                                                               \
156       else              /* send it where it needs to go */              \
157         vlib_set_next_frame_buffer (vm, node, next0, bi0);              \
158                                                                         \
159       if (next_index == next1)                                          \
160         {                                                               \
161           to_next[0] = bi1;                                             \
162           to_next++;                                                    \
163           n_left_to_next --;                                            \
164         }                                                               \
165       else                                                              \
166         vlib_set_next_frame_buffer (vm, node, next1, bi1);              \
167                                                                         \
168       if (next_index == next2)                                          \
169         {                                                               \
170           to_next[0] = bi2;                                             \
171           to_next++;                                                    \
172           n_left_to_next --;                                            \
173         }                                                               \
174       else                                                              \
175         vlib_set_next_frame_buffer (vm, node, next2, bi2);              \
176                                                                         \
177       if (next_index == next3)                                          \
178         {                                                               \
179           to_next[0] = bi3;                                             \
180           to_next++;                                                    \
181           n_left_to_next --;                                            \
182         }                                                               \
183       else                                                              \
184         {                                                               \
185           vlib_set_next_frame_buffer (vm, node, next3, bi3);            \
186                                                                         \
187           /* Change speculation: last 2 packets went to the same node*/ \
188           if (next2 == next3)                                           \
189             {                                                           \
190               vlib_put_next_frame (vm, node, next_index, n_left_to_next); \
191               next_index = next3;                                       \
192               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
193             }                                                           \
194         }                                                               \
195     }                                                                   \
196  } while(0);
197
198 /** \brief Finish enqueueing one buffer forward in the graph.
199  Standard single loop boilerplate element. This is a MACRO,
200  with MULTIPLE SIDE EFFECTS. In the ideal case,
201  <code>next_index == next0</code>,
202  which means that the speculative enqueue at the top of the single loop
203  has correctly dealt with the packet in hand. In that case, the macro does
204  nothing at all.
205
206  @param vm vlib_main_t pointer, varies by thread
207  @param node current node vlib_node_runtime_t pointer
208  @param next_index speculated next index used for both packets
209  @param to_next speculated vector pointer used for both packets
210  @param n_left_to_next number of slots left in speculated vector
211  @param bi0 first buffer index
212  @param next0 actual next index to be used for the first packet
213
214  @return @c next_index -- speculative next index to be used for future packets
215  @return @c to_next -- speculative frame to be used for future packets
216  @return @c n_left_to_next -- number of slots left in speculative frame
217 */
218 #define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
219 do {                                                                    \
220   if (PREDICT_FALSE (next0 != next_index))                              \
221     {                                                                   \
222       vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);   \
223       next_index = next0;                                               \
224       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
225                                                                         \
226       to_next[0] = bi0;                                                 \
227       to_next += 1;                                                     \
228       n_left_to_next -= 1;                                              \
229     }                                                                   \
230 } while (0)
231
232 always_inline uword
233 generic_buffer_node_inline (vlib_main_t * vm,
234                             vlib_node_runtime_t * node,
235                             vlib_frame_t * frame,
236                             uword sizeof_trace,
237                             void *opaque1,
238                             uword opaque2,
239                             void (*two_buffers) (vlib_main_t * vm,
240                                                  void *opaque1,
241                                                  uword opaque2,
242                                                  vlib_buffer_t * b0,
243                                                  vlib_buffer_t * b1,
244                                                  u32 * next0, u32 * next1),
245                             void (*one_buffer) (vlib_main_t * vm,
246                                                 void *opaque1, uword opaque2,
247                                                 vlib_buffer_t * b0,
248                                                 u32 * next0))
249 {
250   u32 n_left_from, *from, *to_next;
251   u32 next_index;
252
253   from = vlib_frame_vector_args (frame);
254   n_left_from = frame->n_vectors;
255   next_index = node->cached_next_index;
256
257   if (node->flags & VLIB_NODE_FLAG_TRACE)
258     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
259                                    /* stride */ 1, sizeof_trace);
260
261   while (n_left_from > 0)
262     {
263       u32 n_left_to_next;
264
265       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
266
267       while (n_left_from >= 4 && n_left_to_next >= 2)
268         {
269           vlib_buffer_t *p0, *p1;
270           u32 pi0, next0;
271           u32 pi1, next1;
272
273           /* Prefetch next iteration. */
274           {
275             vlib_buffer_t *p2, *p3;
276
277             p2 = vlib_get_buffer (vm, from[2]);
278             p3 = vlib_get_buffer (vm, from[3]);
279
280             vlib_prefetch_buffer_header (p2, LOAD);
281             vlib_prefetch_buffer_header (p3, LOAD);
282
283             CLIB_PREFETCH (p2->data, 64, LOAD);
284             CLIB_PREFETCH (p3->data, 64, LOAD);
285           }
286
287           pi0 = to_next[0] = from[0];
288           pi1 = to_next[1] = from[1];
289           from += 2;
290           to_next += 2;
291           n_left_from -= 2;
292           n_left_to_next -= 2;
293
294           p0 = vlib_get_buffer (vm, pi0);
295           p1 = vlib_get_buffer (vm, pi1);
296
297           two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
298
299           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
300                                            to_next, n_left_to_next,
301                                            pi0, pi1, next0, next1);
302         }
303
304       while (n_left_from > 0 && n_left_to_next > 0)
305         {
306           vlib_buffer_t *p0;
307           u32 pi0, next0;
308
309           pi0 = from[0];
310           to_next[0] = pi0;
311           from += 1;
312           to_next += 1;
313           n_left_from -= 1;
314           n_left_to_next -= 1;
315
316           p0 = vlib_get_buffer (vm, pi0);
317
318           one_buffer (vm, opaque1, opaque2, p0, &next0);
319
320           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
321                                            to_next, n_left_to_next,
322                                            pi0, next0);
323         }
324
325       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
326     }
327
328   return frame->n_vectors;
329 }
330
331 static_always_inline void
332 vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
333                              u32 * buffers, u16 * nexts, uword count)
334 {
335   u32 *to_next, n_left_to_next, max;
336   u16 next_index;
337
338   next_index = nexts[0];
339   vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
340   max = clib_min (n_left_to_next, count);
341
342   while (count)
343     {
344       u32 n_enqueued;
345       if ((nexts[0] != next_index) || n_left_to_next == 0)
346         {
347           vlib_put_next_frame (vm, node, next_index, n_left_to_next);
348           next_index = nexts[0];
349           vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
350           max = clib_min (n_left_to_next, count);
351         }
352 #if defined(CLIB_HAVE_VEC512)
353       u16x32 next32 = u16x32_load_unaligned (nexts);
354       next32 = (next32 == u16x32_splat (next32[0]));
355       u64 bitmap = u16x32_msb_mask (next32);
356       n_enqueued = count_trailing_zeros (~bitmap);
357 #elif defined(CLIB_HAVE_VEC256)
358       u16x16 next16 = u16x16_load_unaligned (nexts);
359       next16 = (next16 == u16x16_splat (next16[0]));
360       u64 bitmap = u8x32_msb_mask ((u8x32) next16);
361       n_enqueued = count_trailing_zeros (~bitmap) / 2;
362 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
363       u16x8 next8 = u16x8_load_unaligned (nexts);
364       next8 = (next8 == u16x8_splat (next8[0]));
365       u64 bitmap = u8x16_msb_mask ((u8x16) next8);
366       n_enqueued = count_trailing_zeros (~bitmap) / 2;
367 #else
368       u16 x = 0;
369       x |= next_index ^ nexts[1];
370       x |= next_index ^ nexts[2];
371       x |= next_index ^ nexts[3];
372       n_enqueued = (x == 0) ? 4 : 1;
373 #endif
374
375       if (PREDICT_FALSE (n_enqueued > max))
376         n_enqueued = max;
377
378 #ifdef CLIB_HAVE_VEC512
379       if (n_enqueued >= 32)
380         {
381           clib_memcpy (to_next, buffers, 32 * sizeof (u32));
382           nexts += 32;
383           to_next += 32;
384           buffers += 32;
385           n_left_to_next -= 32;
386           count -= 32;
387           max -= 32;
388           continue;
389         }
390 #endif
391
392 #ifdef CLIB_HAVE_VEC256
393       if (n_enqueued >= 16)
394         {
395           clib_memcpy (to_next, buffers, 16 * sizeof (u32));
396           nexts += 16;
397           to_next += 16;
398           buffers += 16;
399           n_left_to_next -= 16;
400           count -= 16;
401           max -= 16;
402           continue;
403         }
404 #endif
405
406 #ifdef CLIB_HAVE_VEC128
407       if (n_enqueued >= 8)
408         {
409           clib_memcpy (to_next, buffers, 8 * sizeof (u32));
410           nexts += 8;
411           to_next += 8;
412           buffers += 8;
413           n_left_to_next -= 8;
414           count -= 8;
415           max -= 8;
416           continue;
417         }
418 #endif
419
420       if (n_enqueued >= 4)
421         {
422           clib_memcpy (to_next, buffers, 4 * sizeof (u32));
423           nexts += 4;
424           to_next += 4;
425           buffers += 4;
426           n_left_to_next -= 4;
427           count -= 4;
428           max -= 4;
429           continue;
430         }
431
432       /* copy */
433       to_next[0] = buffers[0];
434
435       /* next */
436       nexts += 1;
437       to_next += 1;
438       buffers += 1;
439       n_left_to_next -= 1;
440       count -= 1;
441       max -= 1;
442     }
443   vlib_put_next_frame (vm, node, next_index, n_left_to_next);
444 }
445
446 static_always_inline void
447 vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
448                                u32 * buffer_indices, u16 * thread_indices,
449                                u32 n_left)
450 {
451   vlib_thread_main_t *tm = vlib_get_thread_main ();
452   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_thread_index =
453     0;
454   static __thread vlib_frame_queue_t **congested_handoff_queue_by_thread_index
455     = 0;
456   vlib_frame_queue_elt_t *hf = 0;
457   u32 n_left_to_next_thread = 0, *to_next_thread = 0;
458   u32 next_thread_index, current_thread_index = ~0;
459   int i;
460
461   if (PREDICT_FALSE (handoff_queue_elt_by_thread_index == 0))
462     {
463       vec_validate (handoff_queue_elt_by_thread_index, tm->n_vlib_mains - 1);
464       vec_validate_init_empty (congested_handoff_queue_by_thread_index,
465                                tm->n_vlib_mains - 1,
466                                (vlib_frame_queue_t *) (~0));
467     }
468
469   while (n_left)
470     {
471       next_thread_index = thread_indices[0];
472
473       if (next_thread_index != current_thread_index)
474         {
475           if (hf)
476             hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
477
478           hf = vlib_get_worker_handoff_queue_elt (frame_queue_index,
479                                                   next_thread_index,
480                                                   handoff_queue_elt_by_thread_index);
481
482           n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
483           to_next_thread = &hf->buffer_index[hf->n_vectors];
484           current_thread_index = next_thread_index;
485         }
486
487       to_next_thread[0] = buffer_indices[0];
488       to_next_thread++;
489       n_left_to_next_thread--;
490
491       if (n_left_to_next_thread == 0)
492         {
493           hf->n_vectors = VLIB_FRAME_SIZE;
494           vlib_put_frame_queue_elt (hf);
495           current_thread_index = ~0;
496           handoff_queue_elt_by_thread_index[next_thread_index] = 0;
497           hf = 0;
498         }
499
500       /* next */
501       thread_indices += 1;
502       buffer_indices += 1;
503       n_left -= 1;
504     }
505
506   if (hf)
507     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
508
509   /* Ship frames to the thread nodes */
510   for (i = 0; i < vec_len (handoff_queue_elt_by_thread_index); i++)
511     {
512       if (handoff_queue_elt_by_thread_index[i])
513         {
514           hf = handoff_queue_elt_by_thread_index[i];
515           /*
516            * It works better to let the handoff node
517            * rate-adapt, always ship the handoff queue element.
518            */
519           if (1 || hf->n_vectors == hf->last_n_vectors)
520             {
521               vlib_put_frame_queue_elt (hf);
522               handoff_queue_elt_by_thread_index[i] = 0;
523             }
524           else
525             hf->last_n_vectors = hf->n_vectors;
526         }
527       congested_handoff_queue_by_thread_index[i] =
528         (vlib_frame_queue_t *) (~0);
529     }
530 }
531
532 #endif /* included_vlib_buffer_node_h */
533
534 /*
535  * fd.io coding-style-patch-verification: ON
536  *
537  * Local Variables:
538  * eval: (c-set-style "gnu")
539  * End:
540  */