6cb8d02b4315a79cfb7bdc75df913a737be5ddbd
[vpp.git] / vnet / vnet / handoff.c
1
2 /*
3  * Copyright (c) 2016 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <vnet/vnet.h>
18 #include <vppinfra/xxhash.h>
19 #include <vlib/threads.h>
20 #include <vnet/handoff.h>
21
22 typedef struct {
23   uword * workers_bitmap;
24   u32 * workers;
25 } per_inteface_handoff_data_t;
26
27 typedef struct {
28   u32 cached_next_index;
29   u32 num_workers;
30   u32 first_worker_index;
31
32   per_inteface_handoff_data_t * if_data;
33
34   /* convenience variables */
35   vlib_main_t * vlib_main;
36   vnet_main_t * vnet_main;
37 } handoff_main_t;
38
39 handoff_main_t handoff_main;
40
41 typedef struct {
42   u32 sw_if_index;
43   u32 next_worker_index;
44   u32 buffer_index;
45 } worker_handoff_trace_t;
46
47 /* packet trace format function */
48 static u8 * format_worker_handoff_trace (u8 * s, va_list * args)
49 {
50   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
51   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
52   worker_handoff_trace_t * t = va_arg (*args, worker_handoff_trace_t *);
53
54   s = format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
55               t->sw_if_index, t->next_worker_index, t->buffer_index);
56   return s;
57 }
58
59 vlib_node_registration_t handoff_node;
60
61 static uword
62 worker_handoff_node_fn (vlib_main_t * vm,
63                         vlib_node_runtime_t * node,
64                         vlib_frame_t * frame)
65 {
66   handoff_main_t * hm = &handoff_main;
67   vlib_thread_main_t * tm = vlib_get_thread_main();
68   u32 n_left_from, * from;
69   static __thread vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index;
70   static __thread vlib_frame_queue_t ** congested_handoff_queue_by_worker_index = 0;
71   vlib_frame_queue_elt_t * hf = 0;
72   int i;
73   u32 n_left_to_next_worker = 0, * to_next_worker = 0;
74   u32 next_worker_index = 0;
75   u32 current_worker_index = ~0;
76
77   if (PREDICT_FALSE(handoff_queue_elt_by_worker_index == 0))
78     {
79       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
80
81       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
82                                hm->first_worker_index + hm->num_workers - 1,
83                                (vlib_frame_queue_t *)(~0));
84     }
85
86   from = vlib_frame_vector_args (frame);
87   n_left_from = frame->n_vectors;
88
89   while (n_left_from > 0)
90     {
91       u32 bi0;
92       vlib_buffer_t * b0;
93       u32 sw_if_index0;
94       u32 hash;
95       u64 hash_key;
96       per_inteface_handoff_data_t * ihd0;
97       u32 index0;
98
99       bi0 = from[0];
100       from += 1;
101       n_left_from -= 1;
102
103       b0 = vlib_get_buffer (vm, bi0);
104       sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
105       ASSERT (hm->if_data);
106       ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0);
107
108       next_worker_index = hm->first_worker_index;
109
110       /*
111        * Force unknown traffic onto worker 0,
112        * and into ethernet-input. $$$$ add more hashes.
113        */
114
115       /* Compute ingress LB hash */
116       hash_key = eth_get_key ((ethernet_header_t *) b0->data);
117       hash = (u32) clib_xxhash (hash_key);
118
119       /* if input node did not specify next index, then packet
120          should go to eternet-input */
121       if (PREDICT_FALSE ((b0->flags & BUFFER_HANDOFF_NEXT_VALID) == 0))
122         vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT;
123       else if (vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_IP4_INPUT ||
124                vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_IP6_INPUT ||
125                vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_MPLS_INPUT)
126         vlib_buffer_advance (b0, (sizeof(ethernet_header_t)));
127
128       if (PREDICT_TRUE (is_pow2 (vec_len (ihd0->workers))))
129         index0 = hash & (vec_len (ihd0->workers) - 1);
130       else
131         index0 = hash % vec_len (ihd0->workers);
132
133       next_worker_index += ihd0->workers[index0];
134
135       if (next_worker_index != current_worker_index)
136         {
137           if (hf)
138             hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
139
140           hf = dpdk_get_handoff_queue_elt(next_worker_index,
141                                           handoff_queue_elt_by_worker_index);
142
143           n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
144           to_next_worker = &hf->buffer_index[hf->n_vectors];
145           current_worker_index = next_worker_index;
146         }
147
148       /* enqueue to correct worker thread */
149       to_next_worker[0] = bi0;
150       to_next_worker++;
151       n_left_to_next_worker--;
152
153       if (n_left_to_next_worker == 0)
154         {
155           hf->n_vectors = VLIB_FRAME_SIZE;
156           vlib_put_handoff_queue_elt(hf);
157           current_worker_index = ~0;
158           handoff_queue_elt_by_worker_index[next_worker_index] = 0;
159           hf = 0;
160         }
161
162       if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
163                         && (b0->flags & VLIB_BUFFER_IS_TRACED)))
164         {
165           worker_handoff_trace_t *t =
166              vlib_add_trace (vm, node, b0, sizeof (*t));
167           t->sw_if_index = sw_if_index0;
168           t->next_worker_index = next_worker_index - hm->first_worker_index;
169           t->buffer_index = bi0;
170         }
171
172     }
173
174   if (hf)
175     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
176
177   /* Ship frames to the worker nodes */
178   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
179     {
180       if (handoff_queue_elt_by_worker_index[i])
181         {
182           hf = handoff_queue_elt_by_worker_index[i];
183           /*
184            * It works better to let the handoff node
185            * rate-adapt, always ship the handoff queue element.
186            */
187           if (1 || hf->n_vectors == hf->last_n_vectors)
188             {
189               vlib_put_handoff_queue_elt(hf);
190               handoff_queue_elt_by_worker_index[i] = 0;
191             }
192           else
193             hf->last_n_vectors = hf->n_vectors;
194         }
195       congested_handoff_queue_by_worker_index[i] = (vlib_frame_queue_t *)(~0);
196     }
197   hf = 0;
198   current_worker_index = ~0;
199   return frame->n_vectors;
200 }
201
202 VLIB_REGISTER_NODE (worker_handoff_node) = {
203   .function = worker_handoff_node_fn,
204   .name = "worker-handoff",
205   .vector_size = sizeof (u32),
206   .format_trace = format_worker_handoff_trace,
207   .type = VLIB_NODE_TYPE_INTERNAL,
208
209   .n_next_nodes = 1,
210   .next_nodes = {
211         [0] = "error-drop",
212   },
213 };
214
215 VLIB_NODE_FUNCTION_MULTIARCH (worker_handoff_node, worker_handoff_node_fn)
216
217 int interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
218                                       uword * bitmap, int enable_disable)
219 {
220   handoff_main_t * hm = &handoff_main;
221   vnet_sw_interface_t * sw;
222   vnet_main_t * vnm = vnet_get_main();
223   per_inteface_handoff_data_t * d;
224   int i, rv;
225   u32 node_index = enable_disable ? worker_handoff_node.index : ~0;
226
227   if (pool_is_free_index (vnm->interface_main.sw_interfaces,
228                           sw_if_index))
229     return VNET_API_ERROR_INVALID_SW_IF_INDEX;
230
231   sw = vnet_get_sw_interface (vnm, sw_if_index);
232   if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
233     return VNET_API_ERROR_INVALID_SW_IF_INDEX;
234
235   if (clib_bitmap_last_set(bitmap) >= hm->num_workers)
236     return VNET_API_ERROR_INVALID_WORKER;
237
238   vec_validate (hm->if_data, sw_if_index);
239   d = vec_elt_at_index(hm->if_data, sw_if_index);
240
241   vec_free (d->workers);
242   vec_free (d->workers_bitmap);
243
244   if (enable_disable)
245     {
246       d->workers_bitmap = bitmap;
247       clib_bitmap_foreach (i, bitmap,
248         ({
249           vec_add1(d->workers, i);
250         }));
251     }
252
253   rv = vnet_hw_interface_rx_redirect_to_node (vnm, sw_if_index, node_index);
254   return rv;
255 }
256
257 static clib_error_t *
258 set_interface_handoff_command_fn (vlib_main_t * vm,
259                                   unformat_input_t * input,
260                                   vlib_cli_command_t * cmd)
261 {
262   u32 sw_if_index = ~0;
263   int enable_disable = 1;
264   uword * bitmap = 0;
265
266   int rv = 0;
267
268   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
269     if (unformat (input, "disable"))
270       enable_disable = 0;
271     else if (unformat (input, "workers %U", unformat_bitmap_list,
272                        &bitmap))
273       ;
274     else if (unformat (input, "%U", unformat_vnet_sw_interface,
275                        vnet_get_main(), &sw_if_index))
276       ;
277     else
278       break;
279   }
280
281   if (sw_if_index == ~0)
282     return clib_error_return (0, "Please specify an interface...");
283
284   if (bitmap == 0)
285     return clib_error_return (0, "Please specify list of workers...");
286
287   rv = interface_handoff_enable_disable (vm, sw_if_index, bitmap, enable_disable);
288
289   switch(rv) {
290     case 0:
291       break;
292
293     case VNET_API_ERROR_INVALID_SW_IF_INDEX:
294       return clib_error_return (0, "Invalid interface");
295       break;
296
297     case VNET_API_ERROR_INVALID_WORKER:
298       return clib_error_return (0, "Invalid worker(s)");
299       break;
300
301     case VNET_API_ERROR_UNIMPLEMENTED:
302       return clib_error_return (0, "Device driver doesn't support redirection");
303       break;
304
305     default:
306       return clib_error_return (0, "unknown return value %d", rv);
307   }
308   return 0;
309 }
310
311 VLIB_CLI_COMMAND (set_interface_handoff_command, static) = {
312     .path = "set interface handoff",
313     .short_help =
314     "set interface handoff <interface-name> workers <workers-list>",
315     .function = set_interface_handoff_command_fn,
316 };
317
318 typedef struct {
319   u32 buffer_index;
320   u32 next_index;
321   u32 sw_if_index;
322 } handoff_dispatch_trace_t;
323
324 /* packet trace format function */
325 static u8 * format_handoff_dispatch_trace (u8 * s, va_list * args)
326 {
327   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
328   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
329   handoff_dispatch_trace_t * t = va_arg (*args, handoff_dispatch_trace_t *);
330
331   s = format (s, "handoff-dispatch: sw_if_index %d next_index %d buffer 0x%x",
332       t->sw_if_index,
333       t->next_index,
334       t->buffer_index);
335   return s;
336 }
337
338
339 vlib_node_registration_t handoff_dispatch_node;
340
341 #define foreach_handoff_dispatch_error \
342 _(EXAMPLE, "example packets")
343
344 typedef enum {
345 #define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym,
346   foreach_handoff_dispatch_error
347 #undef _
348   HANDOFF_DISPATCH_N_ERROR,
349 } handoff_dispatch_error_t;
350
351 static char * handoff_dispatch_error_strings[] = {
352 #define _(sym,string) string,
353   foreach_handoff_dispatch_error
354 #undef _
355 };
356
357 static uword
358 handoff_dispatch_node_fn (vlib_main_t * vm,
359                   vlib_node_runtime_t * node,
360                   vlib_frame_t * frame)
361 {
362   u32 n_left_from, * from, * to_next;
363   handoff_dispatch_next_t next_index;
364
365   from = vlib_frame_vector_args (frame);
366   n_left_from = frame->n_vectors;
367   next_index = node->cached_next_index;
368
369   while (n_left_from > 0)
370     {
371       u32 n_left_to_next;
372
373       vlib_get_next_frame (vm, node, next_index,
374                            to_next, n_left_to_next);
375
376       while (n_left_from >= 4 && n_left_to_next >= 2)
377         {
378           u32 bi0, bi1;
379           vlib_buffer_t * b0, * b1;
380           u32 next0, next1;
381           u32 sw_if_index0, sw_if_index1;
382
383           /* Prefetch next iteration. */
384           {
385             vlib_buffer_t * p2, * p3;
386
387             p2 = vlib_get_buffer (vm, from[2]);
388             p3 = vlib_get_buffer (vm, from[3]);
389
390             vlib_prefetch_buffer_header (p2, LOAD);
391             vlib_prefetch_buffer_header (p3, LOAD);
392           }
393
394           /* speculatively enqueue b0 and b1 to the current next frame */
395           to_next[0] = bi0 = from[0];
396           to_next[1] = bi1 = from[1];
397           from += 2;
398           to_next += 2;
399           n_left_from -= 2;
400           n_left_to_next -= 2;
401
402           b0 = vlib_get_buffer (vm, bi0);
403           b1 = vlib_get_buffer (vm, bi1);
404
405           next0 = vnet_buffer(b0)->handoff.next_index;
406           next1 = vnet_buffer(b1)->handoff.next_index;
407
408           if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
409             {
410             if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
411               {
412                 vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
413                 handoff_dispatch_trace_t *t =
414                   vlib_add_trace (vm, node, b0, sizeof (*t));
415                 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
416                 t->sw_if_index = sw_if_index0;
417                 t->next_index = next0;
418                 t->buffer_index = bi0;
419               }
420             if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
421               {
422                 vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0);
423                 handoff_dispatch_trace_t *t =
424                   vlib_add_trace (vm, node, b1, sizeof (*t));
425                 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
426                 t->sw_if_index = sw_if_index1;
427                 t->next_index = next1;
428                 t->buffer_index = bi1;
429               }
430             }
431
432           /* verify speculative enqueues, maybe switch current next frame */
433           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
434                                            to_next, n_left_to_next,
435                                            bi0, bi1, next0, next1);
436         }
437
438       while (n_left_from > 0 && n_left_to_next > 0)
439         {
440           u32 bi0;
441           vlib_buffer_t * b0;
442           u32 next0;
443           u32 sw_if_index0;
444
445           /* speculatively enqueue b0 to the current next frame */
446           bi0 = from[0];
447           to_next[0] = bi0;
448           from += 1;
449           to_next += 1;
450           n_left_from -= 1;
451           n_left_to_next -= 1;
452
453           b0 = vlib_get_buffer (vm, bi0);
454
455           next0 = vnet_buffer(b0)->handoff.next_index;
456
457           if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
458             {
459             if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
460               {
461                 vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
462                 handoff_dispatch_trace_t *t =
463                   vlib_add_trace (vm, node, b0, sizeof (*t));
464                 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
465                 t->sw_if_index = sw_if_index0;
466                 t->next_index = next0;
467                 t->buffer_index = bi0;
468               }
469             }
470
471           /* verify speculative enqueue, maybe switch current next frame */
472           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
473                                            to_next, n_left_to_next,
474                                            bi0, next0);
475         }
476
477       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
478     }
479
480   return frame->n_vectors;
481 }
482
483 VLIB_REGISTER_NODE (handoff_dispatch_node) = {
484   .function = handoff_dispatch_node_fn,
485   .name = "handoff-dispatch",
486   .vector_size = sizeof (u32),
487   .format_trace = format_handoff_dispatch_trace,
488   .type = VLIB_NODE_TYPE_INTERNAL,
489   .flags = VLIB_NODE_FLAG_IS_HANDOFF,
490
491   .n_errors = ARRAY_LEN(handoff_dispatch_error_strings),
492   .error_strings = handoff_dispatch_error_strings,
493
494   .n_next_nodes = HANDOFF_DISPATCH_N_NEXT,
495
496   .next_nodes = {
497         [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop",
498         [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input",
499         [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
500         [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input",
501         [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-gre-input",
502   },
503 };
504
505 VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn)
506
507 clib_error_t *handoff_init (vlib_main_t *vm)
508 {
509   handoff_main_t * hm = &handoff_main;
510   vlib_thread_main_t * tm = vlib_get_thread_main();
511   uword * p;
512
513   vlib_thread_registration_t * tr;
514   /* Only the standard vnet worker threads are supported */
515   p = hash_get_mem (tm->thread_registrations_by_name, "workers");
516   tr = (vlib_thread_registration_t *) p[0];
517   if (tr)
518     {
519       hm->num_workers = tr->count;
520       hm->first_worker_index = tr->first_index;
521     }
522
523   hm->vlib_main = vm;
524   hm->vnet_main = &vnet_main;
525
526   return 0;
527 }
528
529 VLIB_INIT_FUNCTION (handoff_init);