9f6579cda89929445e4ff7c12de6346189330435
[vpp.git] / src / vnet / bonding / node.c
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2017 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17
18 #define _GNU_SOURCE
19 #include <stdint.h>
20 #include <vnet/llc/llc.h>
21 #include <vnet/snap/snap.h>
22 #include <vnet/bonding/node.h>
23
24 #ifndef CLIB_MARCH_VARIANT
25 bond_main_t bond_main;
26 #endif /* CLIB_MARCH_VARIANT */
27
28 #define foreach_bond_input_error \
29   _(NONE, "no error")            \
30   _(IF_DOWN, "interface down")   \
31   _(PASS_THRU, "pass through (CDP, LLDP, slow protocols)")
32
33 typedef enum
34 {
35 #define _(f,s) BOND_INPUT_ERROR_##f,
36   foreach_bond_input_error
37 #undef _
38     BOND_INPUT_N_ERROR,
39 } bond_input_error_t;
40
41 static char *bond_input_error_strings[] = {
42 #define _(n,s) s,
43   foreach_bond_input_error
44 #undef _
45 };
46
47 static u8 *
48 format_bond_input_trace (u8 * s, va_list * args)
49 {
50   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
51   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
52   bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *);
53
54   s = format (s, "src %U, dst %U, %U -> %U",
55               format_ethernet_address, t->ethernet.src_address,
56               format_ethernet_address, t->ethernet.dst_address,
57               format_vnet_sw_if_index_name, vnet_get_main (),
58               t->sw_if_index,
59               format_vnet_sw_if_index_name, vnet_get_main (),
60               t->bond_sw_if_index);
61
62   return s;
63 }
64
65 typedef enum
66 {
67   BOND_INPUT_NEXT_DROP,
68   BOND_INPUT_N_NEXT,
69 } bond_output_next_t;
70
71 static_always_inline u8
72 packet_is_cdp (ethernet_header_t * eth)
73 {
74   llc_header_t *llc;
75   snap_header_t *snap;
76
77   llc = (llc_header_t *) (eth + 1);
78   snap = (snap_header_t *) (llc + 1);
79
80   return ((eth->type == htons (ETHERNET_TYPE_CDP)) ||
81           ((llc->src_sap == 0xAA) && (llc->control == 0x03) &&
82            (snap->protocol == htons (0x2000)) &&
83            (snap->oui[0] == 0) && (snap->oui[1] == 0) &&
84            (snap->oui[2] == 0x0C)));
85 }
86
87 static inline void
88 bond_sw_if_idx_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node,
89                         vlib_buffer_t * b, u32 bond_sw_if_index,
90                         u32 * n_rx_packets, u32 * n_rx_bytes)
91 {
92   u16 *ethertype_p, ethertype;
93   ethernet_vlan_header_t *vlan;
94   ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b);
95
96   (*n_rx_packets)++;
97   *n_rx_bytes += b->current_length;
98   ethertype = clib_mem_unaligned (&eth->type, u16);
99   if (!ethernet_frame_is_tagged (ntohs (ethertype)))
100     {
101       // Let some layer2 packets pass through.
102       if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
103                         && !packet_is_cdp (eth)
104                         && (ethertype != htons (ETHERNET_TYPE_802_1_LLDP))))
105         {
106           /* Change the physical interface to bond interface */
107           vnet_buffer (b)->sw_if_index[VLIB_RX] = bond_sw_if_index;
108           return;
109         }
110     }
111   else
112     {
113       vlan = (void *) (eth + 1);
114       ethertype_p = &vlan->type;
115       ethertype = clib_mem_unaligned (ethertype_p, u16);
116       if (ethertype == ntohs (ETHERNET_TYPE_VLAN))
117         {
118           vlan++;
119           ethertype_p = &vlan->type;
120         }
121       ethertype = clib_mem_unaligned (ethertype_p, u16);
122       if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
123                         && (ethertype != htons (ETHERNET_TYPE_CDP))
124                         && (ethertype != htons (ETHERNET_TYPE_802_1_LLDP))))
125         {
126           /* Change the physical interface to bond interface */
127           vnet_buffer (b)->sw_if_index[VLIB_RX] = bond_sw_if_index;
128           return;
129         }
130     }
131
132   vlib_error_count (vm, node->node_index, BOND_INPUT_ERROR_PASS_THRU, 1);
133   return;
134 }
135
136 static inline void
137 bond_update_next (vlib_main_t * vm, vlib_node_runtime_t * node,
138                   u32 * last_slave_sw_if_index, u32 slave_sw_if_index,
139                   u32 * bond_sw_if_index, vlib_buffer_t * b,
140                   u32 * next_index, vlib_error_t * error)
141 {
142   slave_if_t *sif;
143   bond_if_t *bif;
144
145   *next_index = BOND_INPUT_NEXT_DROP;
146   *error = 0;
147
148   if (PREDICT_TRUE (*last_slave_sw_if_index == slave_sw_if_index))
149     goto next;
150
151   *last_slave_sw_if_index = slave_sw_if_index;
152
153   sif = bond_get_slave_by_sw_if_index (slave_sw_if_index);
154   ASSERT (sif);
155
156   bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
157
158   ASSERT (bif);
159   ASSERT (vec_len (bif->slaves));
160
161   if (PREDICT_TRUE (bif->admin_up == 0))
162     {
163       *bond_sw_if_index = slave_sw_if_index;
164       *error = node->errors[BOND_INPUT_ERROR_IF_DOWN];
165     }
166
167   *bond_sw_if_index = bif->sw_if_index;
168
169 next:
170   vnet_feature_next (next_index, b);
171 }
172
173 static_always_inline void
174 bond_update_next_x4 (vlib_buffer_t * b0, vlib_buffer_t * b1,
175                      vlib_buffer_t * b2, vlib_buffer_t * b3)
176 {
177   u32 tmp0, tmp1, tmp2, tmp3;
178
179   tmp0 = tmp1 = tmp2 = tmp3 = BOND_INPUT_NEXT_DROP;
180   vnet_feature_next (&tmp0, b0);
181   vnet_feature_next (&tmp1, b1);
182   vnet_feature_next (&tmp2, b2);
183   vnet_feature_next (&tmp3, b3);
184 }
185
186 VLIB_NODE_FN (bond_input_node) (vlib_main_t * vm,
187                                 vlib_node_runtime_t * node,
188                                 vlib_frame_t * frame)
189 {
190   u16 thread_index = vm->thread_index;
191   u32 *from, n_left;
192   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
193   u32 sw_if_indices[VLIB_FRAME_SIZE], *sw_if_index;
194   u16 nexts[VLIB_FRAME_SIZE], *next;
195   u32 last_slave_sw_if_index = ~0;
196   u32 bond_sw_if_index = 0;
197   vlib_error_t error = 0;
198   u32 next_index = 0;
199   u32 n_rx_bytes = 0, n_rx_packets = 0;
200
201   /* Vector of buffer / pkt indices we're supposed to process */
202   from = vlib_frame_vector_args (frame);
203
204   /* Number of buffers / pkts */
205   n_left = frame->n_vectors;
206
207   vlib_get_buffers (vm, from, bufs, n_left);
208
209   b = bufs;
210   next = nexts;
211   sw_if_index = sw_if_indices;
212
213   while (n_left >= 4)
214     {
215       u32 x = 0;
216       /* Prefetch next iteration */
217       if (PREDICT_TRUE (n_left >= 16))
218         {
219           vlib_prefetch_buffer_data (b[8], LOAD);
220           vlib_prefetch_buffer_data (b[9], LOAD);
221           vlib_prefetch_buffer_data (b[10], LOAD);
222           vlib_prefetch_buffer_data (b[11], LOAD);
223
224           vlib_prefetch_buffer_header (b[12], LOAD);
225           vlib_prefetch_buffer_header (b[13], LOAD);
226           vlib_prefetch_buffer_header (b[14], LOAD);
227           vlib_prefetch_buffer_header (b[15], LOAD);
228         }
229
230       sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
231       sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
232       sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
233       sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
234
235       x |= sw_if_index[0] ^ last_slave_sw_if_index;
236       x |= sw_if_index[1] ^ last_slave_sw_if_index;
237       x |= sw_if_index[2] ^ last_slave_sw_if_index;
238       x |= sw_if_index[3] ^ last_slave_sw_if_index;
239
240       if (PREDICT_TRUE (x == 0))
241         {
242           /*
243            * Optimize to call update_next only if there is a feature arc
244            * after bond-input. Test feature count greater than 1 because
245            * bond-input itself is a feature arc for this slave interface.
246            */
247           ASSERT ((vnet_buffer (b[0])->feature_arc_index ==
248                    vnet_buffer (b[1])->feature_arc_index) &&
249                   (vnet_buffer (b[0])->feature_arc_index ==
250                    vnet_buffer (b[2])->feature_arc_index) &&
251                   (vnet_buffer (b[0])->feature_arc_index ==
252                    vnet_buffer (b[3])->feature_arc_index));
253           if (PREDICT_FALSE (vnet_get_feature_count
254                              (vnet_buffer (b[0])->feature_arc_index,
255                               last_slave_sw_if_index) > 1))
256             bond_update_next_x4 (b[0], b[1], b[2], b[3]);
257
258           next[0] = next[1] = next[2] = next[3] = next_index;
259           if (next_index == BOND_INPUT_NEXT_DROP)
260             {
261               b[0]->error = error;
262               b[1]->error = error;
263               b[2]->error = error;
264               b[3]->error = error;
265             }
266           else
267             {
268               bond_sw_if_idx_rewrite (vm, node, b[0], bond_sw_if_index,
269                                       &n_rx_packets, &n_rx_bytes);
270               bond_sw_if_idx_rewrite (vm, node, b[1], bond_sw_if_index,
271                                       &n_rx_packets, &n_rx_bytes);
272               bond_sw_if_idx_rewrite (vm, node, b[2], bond_sw_if_index,
273                                       &n_rx_packets, &n_rx_bytes);
274               bond_sw_if_idx_rewrite (vm, node, b[3], bond_sw_if_index,
275                                       &n_rx_packets, &n_rx_bytes);
276             }
277         }
278       else
279         {
280           bond_update_next (vm, node, &last_slave_sw_if_index, sw_if_index[0],
281                             &bond_sw_if_index, b[0], &next_index, &error);
282           next[0] = next_index;
283           if (next_index == BOND_INPUT_NEXT_DROP)
284             b[0]->error = error;
285           else
286             bond_sw_if_idx_rewrite (vm, node, b[0], bond_sw_if_index,
287                                     &n_rx_packets, &n_rx_bytes);
288
289           bond_update_next (vm, node, &last_slave_sw_if_index, sw_if_index[1],
290                             &bond_sw_if_index, b[1], &next_index, &error);
291           next[1] = next_index;
292           if (next_index == BOND_INPUT_NEXT_DROP)
293             b[1]->error = error;
294           else
295             bond_sw_if_idx_rewrite (vm, node, b[1], bond_sw_if_index,
296                                     &n_rx_packets, &n_rx_bytes);
297
298           bond_update_next (vm, node, &last_slave_sw_if_index, sw_if_index[2],
299                             &bond_sw_if_index, b[2], &next_index, &error);
300           next[2] = next_index;
301           if (next_index == BOND_INPUT_NEXT_DROP)
302             b[2]->error = error;
303           else
304             bond_sw_if_idx_rewrite (vm, node, b[2], bond_sw_if_index,
305                                     &n_rx_packets, &n_rx_bytes);
306
307           bond_update_next (vm, node, &last_slave_sw_if_index, sw_if_index[3],
308                             &bond_sw_if_index, b[3], &next_index, &error);
309           next[3] = next_index;
310           if (next_index == BOND_INPUT_NEXT_DROP)
311             b[3]->error = error;
312           else
313             bond_sw_if_idx_rewrite (vm, node, b[3], bond_sw_if_index,
314                                     &n_rx_packets, &n_rx_bytes);
315         }
316
317       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
318       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]);
319       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
320       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
321
322       /* next */
323       n_left -= 4;
324       b += 4;
325       sw_if_index += 4;
326       next += 4;
327     }
328
329   while (n_left)
330     {
331       sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
332       bond_update_next (vm, node, &last_slave_sw_if_index, sw_if_index[0],
333                         &bond_sw_if_index, b[0], &next_index, &error);
334       next[0] = next_index;
335       if (next_index == BOND_INPUT_NEXT_DROP)
336         b[0]->error = error;
337       else
338         bond_sw_if_idx_rewrite (vm, node, b[0], bond_sw_if_index,
339                                 &n_rx_packets, &n_rx_bytes);
340
341       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
342
343       /* next */
344       n_left -= 1;
345       b += 1;
346       sw_if_index += 1;
347       next += 1;
348     }
349
350   if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
351     {
352       n_left = frame->n_vectors;        /* number of packets to process */
353       b = bufs;
354       sw_if_index = sw_if_indices;
355       bond_packet_trace_t *t0;
356
357       while (n_left)
358         {
359           if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
360             {
361               t0 = vlib_add_trace (vm, node, b[0], sizeof (*t0));
362               t0->sw_if_index = sw_if_index[0];
363               clib_memcpy_fast (&t0->ethernet, vlib_buffer_get_current (b[0]),
364                                 sizeof (ethernet_header_t));
365               t0->bond_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
366             }
367           /* next */
368           n_left--;
369           b++;
370           sw_if_index++;
371         }
372     }
373
374   /* increase rx counters */
375   vlib_increment_combined_counter
376     (vnet_main.interface_main.combined_sw_if_counters +
377      VNET_INTERFACE_COUNTER_RX, thread_index, bond_sw_if_index, n_rx_packets,
378      n_rx_bytes);
379
380   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
381   vlib_node_increment_counter (vm, bond_input_node.index,
382                                BOND_INPUT_ERROR_NONE, frame->n_vectors);
383
384   return frame->n_vectors;
385 }
386
387 static clib_error_t *
388 bond_input_init (vlib_main_t * vm)
389 {
390   return 0;
391 }
392
393 /* *INDENT-OFF* */
394 VLIB_REGISTER_NODE (bond_input_node) = {
395   .name = "bond-input",
396   .vector_size = sizeof (u32),
397   .format_buffer = format_ethernet_header_with_length,
398   .format_trace = format_bond_input_trace,
399   .type = VLIB_NODE_TYPE_INTERNAL,
400   .n_errors = BOND_INPUT_N_ERROR,
401   .error_strings = bond_input_error_strings,
402   .n_next_nodes = BOND_INPUT_N_NEXT,
403   .next_nodes =
404   {
405     [BOND_INPUT_NEXT_DROP] = "error-drop"
406   }
407 };
408
409 VLIB_INIT_FUNCTION (bond_input_init);
410
411 VNET_FEATURE_INIT (bond_input, static) =
412 {
413   .arc_name = "device-input",
414   .node_name = "bond-input",
415   .runs_before = VNET_FEATURES ("ethernet-input"),
416 };
417 /* *INDENT-ON* */
418
419 static clib_error_t *
420 bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
421 {
422   bond_main_t *bm = &bond_main;
423   slave_if_t *sif;
424   vlib_main_t *vm = bm->vlib_main;
425
426   sif = bond_get_slave_by_sw_if_index (sw_if_index);
427   if (sif)
428     {
429       if (sif->lacp_enabled)
430         return 0;
431
432       /* port_enabled is both admin up and hw link up */
433       sif->port_enabled = ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) &&
434                            vnet_sw_interface_is_link_up (vnm, sw_if_index));
435       if (sif->port_enabled == 0)
436         bond_disable_collecting_distributing (vm, sif);
437       else
438         bond_enable_collecting_distributing (vm, sif);
439     }
440
441   return 0;
442 }
443
444 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bond_sw_interface_up_down);
445
446 static clib_error_t *
447 bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
448 {
449   bond_main_t *bm = &bond_main;
450   slave_if_t *sif;
451   vnet_sw_interface_t *sw;
452   vlib_main_t *vm = bm->vlib_main;
453
454   sw = vnet_get_hw_sw_interface (vnm, hw_if_index);
455   sif = bond_get_slave_by_sw_if_index (sw->sw_if_index);
456   if (sif)
457     {
458       if (sif->lacp_enabled)
459         return 0;
460
461       /* port_enabled is both admin up and hw link up */
462       sif->port_enabled = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) &&
463                            vnet_sw_interface_is_admin_up (vnm,
464                                                           sw->sw_if_index));
465       if (sif->port_enabled == 0)
466         bond_disable_collecting_distributing (vm, sif);
467       else
468         bond_enable_collecting_distributing (vm, sif);
469     }
470
471   return 0;
472 }
473
474 VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bond_hw_interface_up_down);
475
476 /*
477  * fd.io coding-style-patch-verification: ON
478  *
479  * Local Variables:
480  * eval: (c-set-style "gnu")
481  * End:
482  */