bond: Add bonding driver and LACP protocol
[vpp.git] / src / vnet / bonding / node.c
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2017 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17
18 #define _GNU_SOURCE
19 #include <stdint.h>
20 #include <vnet/llc/llc.h>
21 #include <vnet/snap/snap.h>
22 #include <vnet/bonding/node.h>
23
24 bond_main_t bond_main;
25
26 #define foreach_bond_input_error \
27   _(NONE, "no error")            \
28   _(IF_DOWN, "interface down")   \
29   _(NO_SLAVE, "no slave")        \
30   _(NO_BOND, "no bond interface")\
31   _(PASS_THRU, "pass through")
32
33 typedef enum
34 {
35 #define _(f,s) BOND_INPUT_ERROR_##f,
36   foreach_bond_input_error
37 #undef _
38     BOND_INPUT_N_ERROR,
39 } bond_input_error_t;
40
41 static char *bond_input_error_strings[] = {
42 #define _(n,s) s,
43   foreach_bond_input_error
44 #undef _
45 };
46
47 static u8 *
48 format_bond_input_trace (u8 * s, va_list * args)
49 {
50   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
51   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
52   bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *);
53   vnet_hw_interface_t *hw, *hw1;
54   vnet_main_t *vnm = vnet_get_main ();
55
56   hw = vnet_get_sup_hw_interface (vnm, t->sw_if_index);
57   hw1 = vnet_get_sup_hw_interface (vnm, t->bond_sw_if_index);
58   s = format (s, "src %U, dst %U, %s -> %s",
59               format_ethernet_address, t->ethernet.src_address,
60               format_ethernet_address, t->ethernet.dst_address,
61               hw->name, hw1->name);
62
63   return s;
64 }
65
66 static_always_inline u8
67 packet_is_cdp (ethernet_header_t * eth)
68 {
69   llc_header_t *llc;
70   snap_header_t *snap;
71
72   llc = (llc_header_t *) (eth + 1);
73   snap = (snap_header_t *) (llc + 1);
74
75   return ((eth->type == htons (ETHERNET_TYPE_CDP)) ||
76           ((llc->src_sap == 0xAA) && (llc->control == 0x03) &&
77            (snap->protocol == htons (0x2000)) &&
78            (snap->oui[0] == 0) && (snap->oui[1] == 0) &&
79            (snap->oui[2] == 0x0C)));
80 }
81
82 static inline void
83 bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node,
84                           slave_if_t * sif, ethernet_header_t * eth,
85                           vlib_buffer_t * b0)
86 {
87   bond_if_t *bif;
88   u16 thread_index = vlib_get_thread_index ();
89   u16 *ethertype_p, ethertype;
90   ethernet_vlan_header_t *vlan;
91
92   if (PREDICT_TRUE (sif != 0))
93     {
94       bif = bond_get_master_by_sw_if_index (sif->group);
95       if (PREDICT_TRUE (bif != 0))
96         {
97           if (PREDICT_TRUE (vec_len (bif->slaves) >= 1))
98             {
99               if (PREDICT_TRUE (bif->admin_up == 1))
100                 {
101                   if (!ethernet_frame_is_tagged (ntohs (eth->type)))
102                     {
103                       // Let some layer2 packets pass through.
104                       if (PREDICT_TRUE ((eth->type !=
105                                          htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
106                                         && !packet_is_cdp (eth)
107                                         && (eth->type !=
108                                             htons
109                                             (ETHERNET_TYPE_802_1_LLDP))))
110                         {
111                           // Change the physical interface to
112                           // bond interface
113                           vnet_buffer (b0)->sw_if_index[VLIB_RX] =
114                             bif->sw_if_index;
115
116                           /* increase rx counters */
117                           vlib_increment_simple_counter
118                             (vnet_main.interface_main.sw_if_counters +
119                              VNET_INTERFACE_COUNTER_RX, thread_index,
120                              bif->sw_if_index, 1);
121                         }
122                       else
123                         {
124                           vlib_error_count (vm, node->node_index,
125                                             BOND_INPUT_ERROR_PASS_THRU, 1);
126                         }
127                     }
128                   else
129                     {
130                       vlan = (void *) (eth + 1);
131                       ethertype_p = &vlan->type;
132                       if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
133                         {
134                           vlan++;
135                           ethertype_p = &vlan->type;
136                         }
137                       ethertype = *ethertype_p;
138                       if (PREDICT_TRUE ((ethertype !=
139                                          htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
140                                         && (ethertype !=
141                                             htons (ETHERNET_TYPE_CDP))
142                                         && (ethertype !=
143                                             htons
144                                             (ETHERNET_TYPE_802_1_LLDP))))
145                         {
146                           // Change the physical interface to
147                           // bond interface
148                           vnet_buffer (b0)->sw_if_index[VLIB_RX] =
149                             bif->sw_if_index;
150
151                           /* increase rx counters */
152                           vlib_increment_simple_counter
153                             (vnet_main.interface_main.sw_if_counters +
154                              VNET_INTERFACE_COUNTER_RX, thread_index,
155                              bif->sw_if_index, 1);
156                         }
157                       else
158                         {
159                           vlib_error_count (vm, node->node_index,
160                                             BOND_INPUT_ERROR_PASS_THRU, 1);
161                         }
162                     }
163                 }
164               else
165                 {
166                   vlib_error_count (vm, node->node_index,
167                                     BOND_INPUT_ERROR_IF_DOWN, 1);
168                 }
169             }
170           else
171             {
172               vlib_error_count (vm, node->node_index,
173                                 BOND_INPUT_ERROR_NO_SLAVE, 1);
174             }
175         }
176       else
177         {
178           vlib_error_count (vm, node->node_index,
179                             BOND_INPUT_ERROR_NO_BOND, 1);
180         }
181     }
182   else
183     {
184       vlib_error_count (vm, node->node_index, BOND_INPUT_ERROR_NO_SLAVE, 1);
185     }
186
187 }
188
189 static uword
190 bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
191                vlib_frame_t * frame)
192 {
193   u32 bi0, bi1, bi2, bi3;
194   vlib_buffer_t *b0, *b1, *b2, *b3;
195   u32 next_index;
196   u32 *from, *to_next, n_left_from, n_left_to_next;
197   ethernet_header_t *eth, *eth1, *eth2, *eth3;
198   u32 next0, next1, next2, next3;
199   bond_packet_trace_t *t0;
200   uword n_trace = vlib_get_trace_count (vm, node);
201   u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3;
202   slave_if_t *sif, *sif1, *sif2, *sif3;
203   u16 thread_index = vlib_get_thread_index ();
204
205   /* Vector of buffer / pkt indices we're supposed to process */
206   from = vlib_frame_vector_args (frame);
207
208   /* Number of buffers / pkts */
209   n_left_from = frame->n_vectors;
210
211   /* Speculatively send the first buffer to the last disposition we used */
212   next_index = node->cached_next_index;
213
214   while (n_left_from > 0)
215     {
216       /* set up to enqueue to our disposition with index = next_index */
217       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
218
219       while (n_left_from >= 12 && n_left_to_next >= 4)
220         {
221           // Prefetch next iteration
222           {
223             vlib_buffer_t *b4, *b5, *b6, *b7;
224
225             b4 = vlib_get_buffer (vm, from[4]);
226             b5 = vlib_get_buffer (vm, from[5]);
227             b6 = vlib_get_buffer (vm, from[6]);
228             b7 = vlib_get_buffer (vm, from[7]);
229
230             vlib_prefetch_buffer_header (b4, STORE);
231             vlib_prefetch_buffer_header (b5, STORE);
232             vlib_prefetch_buffer_header (b6, STORE);
233             vlib_prefetch_buffer_header (b7, STORE);
234
235             CLIB_PREFETCH (b4->data, CLIB_CACHE_LINE_BYTES, LOAD);
236             CLIB_PREFETCH (b5->data, CLIB_CACHE_LINE_BYTES, LOAD);
237             CLIB_PREFETCH (b6->data, CLIB_CACHE_LINE_BYTES, LOAD);
238             CLIB_PREFETCH (b7->data, CLIB_CACHE_LINE_BYTES, LOAD);
239           }
240
241           next0 = 0;
242           next1 = 0;
243           next2 = 0;
244           next3 = 0;
245
246           bi0 = from[0];
247           bi1 = from[1];
248           bi2 = from[2];
249           bi3 = from[3];
250
251           to_next[0] = bi0;
252           to_next[1] = bi1;
253           to_next[2] = bi2;
254           to_next[3] = bi3;
255
256           from += 4;
257           to_next += 4;
258           n_left_from -= 4;
259           n_left_to_next -= 4;
260
261           b0 = vlib_get_buffer (vm, bi0);
262           b1 = vlib_get_buffer (vm, bi1);
263           b2 = vlib_get_buffer (vm, bi2);
264           b3 = vlib_get_buffer (vm, bi3);
265
266           vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0,
267                              b0);
268           vnet_feature_next (vnet_buffer (b1)->sw_if_index[VLIB_RX], &next1,
269                              b1);
270           vnet_feature_next (vnet_buffer (b2)->sw_if_index[VLIB_RX], &next2,
271                              b2);
272           vnet_feature_next (vnet_buffer (b3)->sw_if_index[VLIB_RX], &next3,
273                              b3);
274
275           eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
276           eth1 = (ethernet_header_t *) vlib_buffer_get_current (b1);
277           eth2 = (ethernet_header_t *) vlib_buffer_get_current (b2);
278           eth3 = (ethernet_header_t *) vlib_buffer_get_current (b3);
279
280           sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
281           sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
282           sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
283           sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
284
285           // sw_if_index points to the physical interface
286           sif = bond_get_slave_by_sw_if_index (sw_if_index);
287           sif1 = bond_get_slave_by_sw_if_index (sw_if_index1);
288           sif2 = bond_get_slave_by_sw_if_index (sw_if_index2);
289           sif3 = bond_get_slave_by_sw_if_index (sw_if_index3);
290
291           bond_sw_if_index_rewrite (vm, node, sif, eth, b0);
292           bond_sw_if_index_rewrite (vm, node, sif1, eth1, b1);
293           bond_sw_if_index_rewrite (vm, node, sif2, eth2, b2);
294           bond_sw_if_index_rewrite (vm, node, sif3, eth3, b3);
295
296           if (PREDICT_FALSE (n_trace > 0))
297             {
298               vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ );
299               vlib_set_trace_count (vm, node, --n_trace);
300               t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
301               t0->ethernet = *eth;
302               t0->sw_if_index = sw_if_index;
303               t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
304
305               if (PREDICT_TRUE (n_trace > 0))
306                 {
307                   vlib_trace_buffer (vm, node, next1, b1,
308                                      0 /* follow_chain */ );
309                   vlib_set_trace_count (vm, node, --n_trace);
310                   t0 = vlib_add_trace (vm, node, b1, sizeof (*t0));
311                   t0->ethernet = *eth1;
312                   t0->sw_if_index = sw_if_index1;
313                   t0->bond_sw_if_index =
314                     vnet_buffer (b1)->sw_if_index[VLIB_RX];
315
316                   if (PREDICT_TRUE (n_trace > 0))
317                     {
318                       vlib_trace_buffer (vm, node, next1, b2,
319                                          0 /* follow_chain */ );
320                       vlib_set_trace_count (vm, node, --n_trace);
321                       t0 = vlib_add_trace (vm, node, b2, sizeof (*t0));
322                       t0->ethernet = *eth2;
323                       t0->sw_if_index = sw_if_index2;
324                       t0->bond_sw_if_index =
325                         vnet_buffer (b2)->sw_if_index[VLIB_RX];
326
327                       if (PREDICT_TRUE (n_trace > 0))
328                         {
329                           vlib_trace_buffer (vm, node, next1, b2,
330                                              0 /* follow_chain */ );
331                           vlib_set_trace_count (vm, node, --n_trace);
332                           t0 = vlib_add_trace (vm, node, b3, sizeof (*t0));
333                           t0->ethernet = *eth3;
334                           t0->sw_if_index = sw_if_index3;
335                           t0->bond_sw_if_index =
336                             vnet_buffer (b3)->sw_if_index[VLIB_RX];
337                         }
338                     }
339                 }
340             }
341
342           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
343           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
344           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
345           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
346
347           /* verify speculative enqueue, maybe switch current next frame */
348           vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
349                                            to_next, n_left_to_next,
350                                            bi0, bi1, bi2, bi3, next0, next1,
351                                            next2, next3);
352         }
353
354       while (n_left_from > 0 && n_left_to_next > 0)
355         {
356           // Prefetch next iteration
357           if (n_left_from > 1)
358             {
359               vlib_buffer_t *p2;
360
361               p2 = vlib_get_buffer (vm, from[1]);
362               vlib_prefetch_buffer_header (p2, STORE);
363               CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
364             }
365
366           next0 = 0;
367           bi0 = from[0];
368           to_next[0] = bi0;
369           from += 1;
370           to_next += 1;
371           n_left_from -= 1;
372           n_left_to_next -= 1;
373
374           b0 = vlib_get_buffer (vm, bi0);
375           vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0,
376                              b0);
377
378           eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
379
380           sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
381           // sw_if_index points to the physical interface
382           sif = bond_get_slave_by_sw_if_index (sw_if_index);
383           bond_sw_if_index_rewrite (vm, node, sif, eth, b0);
384
385           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
386
387           /* verify speculative enqueue, maybe switch current next frame */
388           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
389                                            to_next, n_left_to_next,
390                                            bi0, next0);
391         }
392       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
393     }
394
395   vlib_node_increment_counter (vm, bond_input_node.index,
396                                BOND_INPUT_ERROR_NONE, frame->n_vectors);
397
398   vnet_device_increment_rx_packets (thread_index, frame->n_vectors);
399
400   return frame->n_vectors;
401 }
402
403 static clib_error_t *
404 bond_input_init (vlib_main_t * vm)
405 {
406   return 0;
407 }
408
409 /* *INDENT-OFF* */
410 VLIB_REGISTER_NODE (bond_input_node) = {
411   .function = bond_input_fn,
412   .name = "bond-input",
413   .vector_size = sizeof (u32),
414   .format_buffer = format_ethernet_header_with_length,
415   .format_trace = format_bond_input_trace,
416   .type = VLIB_NODE_TYPE_INTERNAL,
417   .n_errors = BOND_INPUT_N_ERROR,
418   .error_strings = bond_input_error_strings,
419   .n_next_nodes = 0,
420   .next_nodes =
421   {
422     [0] = "error-drop"
423   }
424 };
425
426 VLIB_INIT_FUNCTION (bond_input_init);
427
428 VNET_FEATURE_INIT (bond_input, static) =
429 {
430   .arc_name = "device-input",
431   .node_name = "bond-input",
432   .runs_before = VNET_FEATURES ("ethernet-input"),
433 };
434 VLIB_NODE_FUNCTION_MULTIARCH (bond_input_node, bond_input_fn)
435 /* *INDENT-ON* */
436
437 static clib_error_t *
438 bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
439 {
440   bond_main_t *bm = &bond_main;
441   slave_if_t *sif;
442   vlib_main_t *vm = bm->vlib_main;
443
444   sif = bond_get_slave_by_sw_if_index (sw_if_index);
445   if (sif)
446     {
447       sif->port_enabled = flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
448       if (sif->port_enabled == 0)
449         {
450           if (sif->lacp_enabled == 0)
451             {
452               bond_disable_collecting_distributing (vm, sif);
453             }
454         }
455       else
456         {
457           if (sif->lacp_enabled == 0)
458             {
459               bond_enable_collecting_distributing (vm, sif);
460             }
461         }
462     }
463
464   return 0;
465 }
466
467 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bond_sw_interface_up_down);
468
469 static clib_error_t *
470 bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
471 {
472   bond_main_t *bm = &bond_main;
473   slave_if_t *sif;
474   vnet_sw_interface_t *sw;
475   vlib_main_t *vm = bm->vlib_main;
476   vnet_interface_main_t *im = &vnm->interface_main;
477
478   sw = pool_elt_at_index (im->sw_interfaces, hw_if_index);
479   sif = bond_get_slave_by_sw_if_index (sw->sw_if_index);
480   if (sif)
481     {
482       if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
483         {
484           if (sif->lacp_enabled == 0)
485             {
486               bond_disable_collecting_distributing (vm, sif);
487             }
488         }
489       else
490         {
491           if (sif->lacp_enabled == 0)
492             {
493               bond_enable_collecting_distributing (vm, sif);
494             }
495         }
496     }
497
498   return 0;
499 }
500
501 VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bond_hw_interface_up_down);
502
503 /*
504  * fd.io coding-style-patch-verification: ON
505  *
506  * Local Variables:
507  * eval: (c-set-style "gnu")
508  * End:
509  */