A Protocol Independent Hierarchical FIB (VPP-352)
[vpp.git] / vnet / vnet / handoff.c
1
2 /*
3  * Copyright (c) 2016 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <vnet/vnet.h>
18 #include <vppinfra/xxhash.h>
19 #include <vlib/threads.h>
20 #include <vnet/handoff.h>
21
22 typedef struct
23 {
24   uword *workers_bitmap;
25   u32 *workers;
26 } per_inteface_handoff_data_t;
27
28 typedef struct
29 {
30   u32 cached_next_index;
31   u32 num_workers;
32   u32 first_worker_index;
33
34   per_inteface_handoff_data_t *if_data;
35
36   /* convenience variables */
37   vlib_main_t *vlib_main;
38   vnet_main_t *vnet_main;
39 } handoff_main_t;
40
41 handoff_main_t handoff_main;
42
43 typedef struct
44 {
45   u32 sw_if_index;
46   u32 next_worker_index;
47   u32 buffer_index;
48 } worker_handoff_trace_t;
49
50 /* packet trace format function */
51 static u8 *
52 format_worker_handoff_trace (u8 * s, va_list * args)
53 {
54   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
55   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
56   worker_handoff_trace_t *t = va_arg (*args, worker_handoff_trace_t *);
57
58   s =
59     format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
60             t->sw_if_index, t->next_worker_index, t->buffer_index);
61   return s;
62 }
63
64 vlib_node_registration_t handoff_node;
65
66 static uword
67 worker_handoff_node_fn (vlib_main_t * vm,
68                         vlib_node_runtime_t * node, vlib_frame_t * frame)
69 {
70   handoff_main_t *hm = &handoff_main;
71   vlib_thread_main_t *tm = vlib_get_thread_main ();
72   u32 n_left_from, *from;
73   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
74   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
75     = 0;
76   vlib_frame_queue_elt_t *hf = 0;
77   int i;
78   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
79   u32 next_worker_index = 0;
80   u32 current_worker_index = ~0;
81
82   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
83     {
84       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
85
86       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
87                                hm->first_worker_index + hm->num_workers - 1,
88                                (vlib_frame_queue_t *) (~0));
89     }
90
91   from = vlib_frame_vector_args (frame);
92   n_left_from = frame->n_vectors;
93
94   while (n_left_from > 0)
95     {
96       u32 bi0;
97       vlib_buffer_t *b0;
98       u32 sw_if_index0;
99       u32 hash;
100       u64 hash_key;
101       per_inteface_handoff_data_t *ihd0;
102       u32 index0;
103
104       bi0 = from[0];
105       from += 1;
106       n_left_from -= 1;
107
108       b0 = vlib_get_buffer (vm, bi0);
109       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
110       ASSERT (hm->if_data);
111       ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0);
112
113       next_worker_index = hm->first_worker_index;
114
115       /*
116        * Force unknown traffic onto worker 0,
117        * and into ethernet-input. $$$$ add more hashes.
118        */
119
120       /* Compute ingress LB hash */
121       hash_key = eth_get_key ((ethernet_header_t *) b0->data);
122       hash = (u32) clib_xxhash (hash_key);
123
124       /* if input node did not specify next index, then packet
125          should go to eternet-input */
126       if (PREDICT_FALSE ((b0->flags & BUFFER_HANDOFF_NEXT_VALID) == 0))
127         vnet_buffer (b0)->handoff.next_index =
128           HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT;
129       else if (vnet_buffer (b0)->handoff.next_index ==
130                HANDOFF_DISPATCH_NEXT_IP4_INPUT
131                || vnet_buffer (b0)->handoff.next_index ==
132                HANDOFF_DISPATCH_NEXT_IP6_INPUT
133                || vnet_buffer (b0)->handoff.next_index ==
134                HANDOFF_DISPATCH_NEXT_MPLS_INPUT)
135         vlib_buffer_advance (b0, (sizeof (ethernet_header_t)));
136
137       if (PREDICT_TRUE (is_pow2 (vec_len (ihd0->workers))))
138         index0 = hash & (vec_len (ihd0->workers) - 1);
139       else
140         index0 = hash % vec_len (ihd0->workers);
141
142       next_worker_index += ihd0->workers[index0];
143
144       if (next_worker_index != current_worker_index)
145         {
146           if (hf)
147             hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
148
149           hf = dpdk_get_handoff_queue_elt (next_worker_index,
150                                            handoff_queue_elt_by_worker_index);
151
152           n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
153           to_next_worker = &hf->buffer_index[hf->n_vectors];
154           current_worker_index = next_worker_index;
155         }
156
157       /* enqueue to correct worker thread */
158       to_next_worker[0] = bi0;
159       to_next_worker++;
160       n_left_to_next_worker--;
161
162       if (n_left_to_next_worker == 0)
163         {
164           hf->n_vectors = VLIB_FRAME_SIZE;
165           vlib_put_handoff_queue_elt (hf);
166           current_worker_index = ~0;
167           handoff_queue_elt_by_worker_index[next_worker_index] = 0;
168           hf = 0;
169         }
170
171       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
172                          && (b0->flags & VLIB_BUFFER_IS_TRACED)))
173         {
174           worker_handoff_trace_t *t =
175             vlib_add_trace (vm, node, b0, sizeof (*t));
176           t->sw_if_index = sw_if_index0;
177           t->next_worker_index = next_worker_index - hm->first_worker_index;
178           t->buffer_index = bi0;
179         }
180
181     }
182
183   if (hf)
184     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
185
186   /* Ship frames to the worker nodes */
187   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
188     {
189       if (handoff_queue_elt_by_worker_index[i])
190         {
191           hf = handoff_queue_elt_by_worker_index[i];
192           /*
193            * It works better to let the handoff node
194            * rate-adapt, always ship the handoff queue element.
195            */
196           if (1 || hf->n_vectors == hf->last_n_vectors)
197             {
198               vlib_put_handoff_queue_elt (hf);
199               handoff_queue_elt_by_worker_index[i] = 0;
200             }
201           else
202             hf->last_n_vectors = hf->n_vectors;
203         }
204       congested_handoff_queue_by_worker_index[i] =
205         (vlib_frame_queue_t *) (~0);
206     }
207   hf = 0;
208   current_worker_index = ~0;
209   return frame->n_vectors;
210 }
211
212 /* *INDENT-OFF* */
213 VLIB_REGISTER_NODE (worker_handoff_node) = {
214   .function = worker_handoff_node_fn,
215   .name = "worker-handoff",
216   .vector_size = sizeof (u32),
217   .format_trace = format_worker_handoff_trace,
218   .type = VLIB_NODE_TYPE_INTERNAL,
219
220   .n_next_nodes = 1,
221   .next_nodes = {
222     [0] = "error-drop",
223   },
224 };
225 /* *INDENT-ON* */
226
227 VLIB_NODE_FUNCTION_MULTIARCH (worker_handoff_node, worker_handoff_node_fn)
228      int interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
229                                            uword * bitmap, int enable_disable)
230 {
231   handoff_main_t *hm = &handoff_main;
232   vnet_sw_interface_t *sw;
233   vnet_main_t *vnm = vnet_get_main ();
234   per_inteface_handoff_data_t *d;
235   int i, rv;
236   u32 node_index = enable_disable ? worker_handoff_node.index : ~0;
237
238   if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
239     return VNET_API_ERROR_INVALID_SW_IF_INDEX;
240
241   sw = vnet_get_sw_interface (vnm, sw_if_index);
242   if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
243     return VNET_API_ERROR_INVALID_SW_IF_INDEX;
244
245   if (clib_bitmap_last_set (bitmap) >= hm->num_workers)
246     return VNET_API_ERROR_INVALID_WORKER;
247
248   vec_validate (hm->if_data, sw_if_index);
249   d = vec_elt_at_index (hm->if_data, sw_if_index);
250
251   vec_free (d->workers);
252   vec_free (d->workers_bitmap);
253
254   if (enable_disable)
255     {
256       d->workers_bitmap = bitmap;
257       /* *INDENT-OFF* */
258       clib_bitmap_foreach (i, bitmap,
259         ({
260           vec_add1(d->workers, i);
261         }));
262       /* *INDENT-ON* */
263     }
264
265   rv = vnet_hw_interface_rx_redirect_to_node (vnm, sw_if_index, node_index);
266   return rv;
267 }
268
269 static clib_error_t *
270 set_interface_handoff_command_fn (vlib_main_t * vm,
271                                   unformat_input_t * input,
272                                   vlib_cli_command_t * cmd)
273 {
274   u32 sw_if_index = ~0;
275   int enable_disable = 1;
276   uword *bitmap = 0;
277
278   int rv = 0;
279
280   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
281     {
282       if (unformat (input, "disable"))
283         enable_disable = 0;
284       else if (unformat (input, "workers %U", unformat_bitmap_list, &bitmap))
285         ;
286       else if (unformat (input, "%U", unformat_vnet_sw_interface,
287                          vnet_get_main (), &sw_if_index))
288         ;
289       else
290         break;
291     }
292
293   if (sw_if_index == ~0)
294     return clib_error_return (0, "Please specify an interface...");
295
296   if (bitmap == 0)
297     return clib_error_return (0, "Please specify list of workers...");
298
299   rv =
300     interface_handoff_enable_disable (vm, sw_if_index, bitmap,
301                                       enable_disable);
302
303   switch (rv)
304     {
305     case 0:
306       break;
307
308     case VNET_API_ERROR_INVALID_SW_IF_INDEX:
309       return clib_error_return (0, "Invalid interface");
310       break;
311
312     case VNET_API_ERROR_INVALID_WORKER:
313       return clib_error_return (0, "Invalid worker(s)");
314       break;
315
316     case VNET_API_ERROR_UNIMPLEMENTED:
317       return clib_error_return (0,
318                                 "Device driver doesn't support redirection");
319       break;
320
321     default:
322       return clib_error_return (0, "unknown return value %d", rv);
323     }
324   return 0;
325 }
326
327 /* *INDENT-OFF* */
328 VLIB_CLI_COMMAND (set_interface_handoff_command, static) = {
329   .path = "set interface handoff",
330   .short_help =
331   "set interface handoff <interface-name> workers <workers-list>",
332   .function = set_interface_handoff_command_fn,
333 };
334 /* *INDENT-ON* */
335
336 typedef struct
337 {
338   u32 buffer_index;
339   u32 next_index;
340   u32 sw_if_index;
341 } handoff_dispatch_trace_t;
342
343 /* packet trace format function */
344 static u8 *
345 format_handoff_dispatch_trace (u8 * s, va_list * args)
346 {
347   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
348   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
349   handoff_dispatch_trace_t *t = va_arg (*args, handoff_dispatch_trace_t *);
350
351   s = format (s, "handoff-dispatch: sw_if_index %d next_index %d buffer 0x%x",
352               t->sw_if_index, t->next_index, t->buffer_index);
353   return s;
354 }
355
356
357 vlib_node_registration_t handoff_dispatch_node;
358
359 #define foreach_handoff_dispatch_error \
360 _(EXAMPLE, "example packets")
361
362 typedef enum
363 {
364 #define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym,
365   foreach_handoff_dispatch_error
366 #undef _
367     HANDOFF_DISPATCH_N_ERROR,
368 } handoff_dispatch_error_t;
369
370 static char *handoff_dispatch_error_strings[] = {
371 #define _(sym,string) string,
372   foreach_handoff_dispatch_error
373 #undef _
374 };
375
376 static uword
377 handoff_dispatch_node_fn (vlib_main_t * vm,
378                           vlib_node_runtime_t * node, vlib_frame_t * frame)
379 {
380   u32 n_left_from, *from, *to_next;
381   handoff_dispatch_next_t next_index;
382
383   from = vlib_frame_vector_args (frame);
384   n_left_from = frame->n_vectors;
385   next_index = node->cached_next_index;
386
387   while (n_left_from > 0)
388     {
389       u32 n_left_to_next;
390
391       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
392
393       while (n_left_from >= 4 && n_left_to_next >= 2)
394         {
395           u32 bi0, bi1;
396           vlib_buffer_t *b0, *b1;
397           u32 next0, next1;
398           u32 sw_if_index0, sw_if_index1;
399
400           /* Prefetch next iteration. */
401           {
402             vlib_buffer_t *p2, *p3;
403
404             p2 = vlib_get_buffer (vm, from[2]);
405             p3 = vlib_get_buffer (vm, from[3]);
406
407             vlib_prefetch_buffer_header (p2, LOAD);
408             vlib_prefetch_buffer_header (p3, LOAD);
409           }
410
411           /* speculatively enqueue b0 and b1 to the current next frame */
412           to_next[0] = bi0 = from[0];
413           to_next[1] = bi1 = from[1];
414           from += 2;
415           to_next += 2;
416           n_left_from -= 2;
417           n_left_to_next -= 2;
418
419           b0 = vlib_get_buffer (vm, bi0);
420           b1 = vlib_get_buffer (vm, bi1);
421
422           next0 = vnet_buffer (b0)->handoff.next_index;
423           next1 = vnet_buffer (b1)->handoff.next_index;
424
425           if (PREDICT_FALSE (vm->trace_main.trace_active_hint))
426             {
427               if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
428                 {
429                   vlib_trace_buffer (vm, node, next0, b0,       /* follow_chain */
430                                      0);
431                   handoff_dispatch_trace_t *t =
432                     vlib_add_trace (vm, node, b0, sizeof (*t));
433                   sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
434                   t->sw_if_index = sw_if_index0;
435                   t->next_index = next0;
436                   t->buffer_index = bi0;
437                 }
438               if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
439                 {
440                   vlib_trace_buffer (vm, node, next1, b1,       /* follow_chain */
441                                      0);
442                   handoff_dispatch_trace_t *t =
443                     vlib_add_trace (vm, node, b1, sizeof (*t));
444                   sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
445                   t->sw_if_index = sw_if_index1;
446                   t->next_index = next1;
447                   t->buffer_index = bi1;
448                 }
449             }
450
451           /* verify speculative enqueues, maybe switch current next frame */
452           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
453                                            to_next, n_left_to_next,
454                                            bi0, bi1, next0, next1);
455         }
456
457       while (n_left_from > 0 && n_left_to_next > 0)
458         {
459           u32 bi0;
460           vlib_buffer_t *b0;
461           u32 next0;
462           u32 sw_if_index0;
463
464           /* speculatively enqueue b0 to the current next frame */
465           bi0 = from[0];
466           to_next[0] = bi0;
467           from += 1;
468           to_next += 1;
469           n_left_from -= 1;
470           n_left_to_next -= 1;
471
472           b0 = vlib_get_buffer (vm, bi0);
473
474           next0 = vnet_buffer (b0)->handoff.next_index;
475
476           if (PREDICT_FALSE (vm->trace_main.trace_active_hint))
477             {
478               if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
479                 {
480                   vlib_trace_buffer (vm, node, next0, b0,       /* follow_chain */
481                                      0);
482                   handoff_dispatch_trace_t *t =
483                     vlib_add_trace (vm, node, b0, sizeof (*t));
484                   sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
485                   t->sw_if_index = sw_if_index0;
486                   t->next_index = next0;
487                   t->buffer_index = bi0;
488                 }
489             }
490
491           /* verify speculative enqueue, maybe switch current next frame */
492           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
493                                            to_next, n_left_to_next,
494                                            bi0, next0);
495         }
496
497       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
498     }
499
500   return frame->n_vectors;
501 }
502
503 /* *INDENT-OFF* */
504 VLIB_REGISTER_NODE (handoff_dispatch_node) = {
505   .function = handoff_dispatch_node_fn,
506   .name = "handoff-dispatch",
507   .vector_size = sizeof (u32),
508   .format_trace = format_handoff_dispatch_trace,
509   .type = VLIB_NODE_TYPE_INTERNAL,
510   .flags = VLIB_NODE_FLAG_IS_HANDOFF,
511
512   .n_errors = ARRAY_LEN(handoff_dispatch_error_strings),
513   .error_strings = handoff_dispatch_error_strings,
514
515   .n_next_nodes = HANDOFF_DISPATCH_N_NEXT,
516
517   .next_nodes = {
518         [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop",
519         [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input",
520         [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
521         [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input",
522         [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-input",
523   },
524 };
525 /* *INDENT-ON* */
526
527 VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn)
528      clib_error_t *handoff_init (vlib_main_t * vm)
529 {
530   handoff_main_t *hm = &handoff_main;
531   vlib_thread_main_t *tm = vlib_get_thread_main ();
532   clib_error_t *error;
533   uword *p;
534
535   if ((error = vlib_call_init_function (vm, threads_init)))
536     return error;
537
538   vlib_thread_registration_t *tr;
539   /* Only the standard vnet worker threads are supported */
540   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
541   if (p)
542     {
543       tr = (vlib_thread_registration_t *) p[0];
544       if (tr)
545         {
546           hm->num_workers = tr->count;
547           hm->first_worker_index = tr->first_index;
548         }
549     }
550
551   hm->vlib_main = vm;
552   hm->vnet_main = &vnet_main;
553
554   ASSERT (tm->handoff_dispatch_node_index == ~0);
555   tm->handoff_dispatch_node_index = handoff_dispatch_node.index;
556
557   return 0;
558 }
559
560 VLIB_INIT_FUNCTION (handoff_init);
561
562 /*
563  * fd.io coding-style-patch-verification: ON
564  *
565  * Local Variables:
566  * eval: (c-set-style "gnu")
567  * End:
568  */