bonding lacp: replace slave string with member
[vpp.git] / src / vnet / bonding / node.c
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2017 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17
18 #define _GNU_SOURCE
19 #include <stdint.h>
20 #include <vnet/llc/llc.h>
21 #include <vnet/snap/snap.h>
22 #include <vnet/bonding/node.h>
23
24 #ifndef CLIB_MARCH_VARIANT
25 bond_main_t bond_main;
26 #endif /* CLIB_MARCH_VARIANT */
27
28 #define foreach_bond_input_error \
29   _(NONE, "no error")            \
30   _(IF_DOWN, "interface down")   \
31   _(PASSIVE_IF, "traffic received on passive interface")   \
32   _(PASS_THRU, "pass through (CDP, LLDP, slow protocols)")
33
34 typedef enum
35 {
36 #define _(f,s) BOND_INPUT_ERROR_##f,
37   foreach_bond_input_error
38 #undef _
39     BOND_INPUT_N_ERROR,
40 } bond_input_error_t;
41
42 static char *bond_input_error_strings[] = {
43 #define _(n,s) s,
44   foreach_bond_input_error
45 #undef _
46 };
47
48 static u8 *
49 format_bond_input_trace (u8 * s, va_list * args)
50 {
51   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
52   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
53   bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *);
54
55   s = format (s, "src %U, dst %U, %U -> %U",
56               format_ethernet_address, t->ethernet.src_address,
57               format_ethernet_address, t->ethernet.dst_address,
58               format_vnet_sw_if_index_name, vnet_get_main (),
59               t->sw_if_index,
60               format_vnet_sw_if_index_name, vnet_get_main (),
61               t->bond_sw_if_index);
62
63   return s;
64 }
65
66 typedef enum
67 {
68   BOND_INPUT_NEXT_DROP,
69   BOND_INPUT_N_NEXT,
70 } bond_output_next_t;
71
72 static_always_inline u8
73 packet_is_cdp (ethernet_header_t * eth)
74 {
75   llc_header_t *llc;
76   snap_header_t *snap;
77
78   llc = (llc_header_t *) (eth + 1);
79   snap = (snap_header_t *) (llc + 1);
80
81   return ((eth->type == htons (ETHERNET_TYPE_CDP)) ||
82           ((llc->src_sap == 0xAA) && (llc->control == 0x03) &&
83            (snap->protocol == htons (0x2000)) &&
84            (snap->oui[0] == 0) && (snap->oui[1] == 0) &&
85            (snap->oui[2] == 0x0C)));
86 }
87
88 static inline void
89 bond_sw_if_idx_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node,
90                         vlib_buffer_t * b, u32 bond_sw_if_index,
91                         u32 * n_rx_packets, u32 * n_rx_bytes)
92 {
93   u16 *ethertype_p, ethertype;
94   ethernet_vlan_header_t *vlan;
95   ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b);
96
97   (*n_rx_packets)++;
98   *n_rx_bytes += b->current_length;
99   ethertype = clib_mem_unaligned (&eth->type, u16);
100   if (!ethernet_frame_is_tagged (ntohs (ethertype)))
101     {
102       // Let some layer2 packets pass through.
103       if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
104                         && !packet_is_cdp (eth)
105                         && (ethertype != htons (ETHERNET_TYPE_802_1_LLDP))))
106         {
107           /* Change the physical interface to bond interface */
108           vnet_buffer (b)->sw_if_index[VLIB_RX] = bond_sw_if_index;
109           return;
110         }
111     }
112   else
113     {
114       vlan = (void *) (eth + 1);
115       ethertype_p = &vlan->type;
116       ethertype = clib_mem_unaligned (ethertype_p, u16);
117       if (ethertype == ntohs (ETHERNET_TYPE_VLAN))
118         {
119           vlan++;
120           ethertype_p = &vlan->type;
121         }
122       ethertype = clib_mem_unaligned (ethertype_p, u16);
123       if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
124                         && (ethertype != htons (ETHERNET_TYPE_CDP))
125                         && (ethertype != htons (ETHERNET_TYPE_802_1_LLDP))))
126         {
127           /* Change the physical interface to bond interface */
128           vnet_buffer (b)->sw_if_index[VLIB_RX] = bond_sw_if_index;
129           return;
130         }
131     }
132
133   vlib_error_count (vm, node->node_index, BOND_INPUT_ERROR_PASS_THRU, 1);
134   return;
135 }
136
137 static inline void
138 bond_update_next (vlib_main_t * vm, vlib_node_runtime_t * node,
139                   u32 * last_member_sw_if_index, u32 member_sw_if_index,
140                   u32 * bond_sw_if_index, vlib_buffer_t * b,
141                   u32 * next_index, vlib_error_t * error)
142 {
143   member_if_t *mif;
144   bond_if_t *bif;
145
146   *next_index = BOND_INPUT_NEXT_DROP;
147   *error = 0;
148
149   if (PREDICT_TRUE (*last_member_sw_if_index == member_sw_if_index))
150     goto next;
151
152   *last_member_sw_if_index = member_sw_if_index;
153
154   mif = bond_get_member_by_sw_if_index (member_sw_if_index);
155   ALWAYS_ASSERT (mif);
156
157   bif = bond_get_bond_if_by_dev_instance (mif->bif_dev_instance);
158
159   ALWAYS_ASSERT (bif);
160   ASSERT (vec_len (bif->members));
161
162   if (PREDICT_FALSE (bif->admin_up == 0))
163     {
164       *bond_sw_if_index = member_sw_if_index;
165       *error = node->errors[BOND_INPUT_ERROR_IF_DOWN];
166     }
167
168   if (PREDICT_FALSE ((bif->mode == BOND_MODE_ACTIVE_BACKUP) &&
169                      vec_len (bif->active_members) &&
170                      (member_sw_if_index != bif->active_members[0])))
171     {
172       *bond_sw_if_index = member_sw_if_index;
173       *error = node->errors[BOND_INPUT_ERROR_PASSIVE_IF];
174       return;
175     }
176
177   *bond_sw_if_index = bif->sw_if_index;
178
179 next:
180   vnet_feature_next (next_index, b);
181 }
182
183 static_always_inline void
184 bond_update_next_x4 (vlib_buffer_t * b0, vlib_buffer_t * b1,
185                      vlib_buffer_t * b2, vlib_buffer_t * b3)
186 {
187   u32 tmp0, tmp1, tmp2, tmp3;
188
189   tmp0 = tmp1 = tmp2 = tmp3 = BOND_INPUT_NEXT_DROP;
190   vnet_feature_next (&tmp0, b0);
191   vnet_feature_next (&tmp1, b1);
192   vnet_feature_next (&tmp2, b2);
193   vnet_feature_next (&tmp3, b3);
194 }
195
196 VLIB_NODE_FN (bond_input_node) (vlib_main_t * vm,
197                                 vlib_node_runtime_t * node,
198                                 vlib_frame_t * frame)
199 {
200   u16 thread_index = vm->thread_index;
201   u32 *from, n_left;
202   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
203   u32 sw_if_indices[VLIB_FRAME_SIZE], *sw_if_index;
204   u16 nexts[VLIB_FRAME_SIZE], *next;
205   u32 last_member_sw_if_index = ~0;
206   u32 bond_sw_if_index = 0;
207   vlib_error_t error = 0;
208   u32 next_index = 0;
209   u32 n_rx_bytes = 0, n_rx_packets = 0;
210
211   /* Vector of buffer / pkt indices we're supposed to process */
212   from = vlib_frame_vector_args (frame);
213
214   /* Number of buffers / pkts */
215   n_left = frame->n_vectors;
216
217   vlib_get_buffers (vm, from, bufs, n_left);
218
219   b = bufs;
220   next = nexts;
221   sw_if_index = sw_if_indices;
222
223   while (n_left >= 4)
224     {
225       u32 x = 0;
226       /* Prefetch next iteration */
227       if (PREDICT_TRUE (n_left >= 16))
228         {
229           vlib_prefetch_buffer_data (b[8], LOAD);
230           vlib_prefetch_buffer_data (b[9], LOAD);
231           vlib_prefetch_buffer_data (b[10], LOAD);
232           vlib_prefetch_buffer_data (b[11], LOAD);
233
234           vlib_prefetch_buffer_header (b[12], LOAD);
235           vlib_prefetch_buffer_header (b[13], LOAD);
236           vlib_prefetch_buffer_header (b[14], LOAD);
237           vlib_prefetch_buffer_header (b[15], LOAD);
238         }
239
240       sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
241       sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
242       sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
243       sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
244
245       x |= sw_if_index[0] ^ last_member_sw_if_index;
246       x |= sw_if_index[1] ^ last_member_sw_if_index;
247       x |= sw_if_index[2] ^ last_member_sw_if_index;
248       x |= sw_if_index[3] ^ last_member_sw_if_index;
249
250       if (PREDICT_TRUE (x == 0))
251         {
252           /*
253            * Optimize to call update_next only if there is a feature arc
254            * after bond-input. Test feature count greater than 1 because
255            * bond-input itself is a feature arc for this member interface.
256            */
257           ASSERT ((vnet_buffer (b[0])->feature_arc_index ==
258                    vnet_buffer (b[1])->feature_arc_index) &&
259                   (vnet_buffer (b[0])->feature_arc_index ==
260                    vnet_buffer (b[2])->feature_arc_index) &&
261                   (vnet_buffer (b[0])->feature_arc_index ==
262                    vnet_buffer (b[3])->feature_arc_index));
263           if (PREDICT_FALSE (vnet_get_feature_count
264                              (vnet_buffer (b[0])->feature_arc_index,
265                               last_member_sw_if_index) > 1))
266             bond_update_next_x4 (b[0], b[1], b[2], b[3]);
267
268           next[0] = next[1] = next[2] = next[3] = next_index;
269           if (next_index == BOND_INPUT_NEXT_DROP)
270             {
271               b[0]->error = error;
272               b[1]->error = error;
273               b[2]->error = error;
274               b[3]->error = error;
275             }
276           else
277             {
278               bond_sw_if_idx_rewrite (vm, node, b[0], bond_sw_if_index,
279                                       &n_rx_packets, &n_rx_bytes);
280               bond_sw_if_idx_rewrite (vm, node, b[1], bond_sw_if_index,
281                                       &n_rx_packets, &n_rx_bytes);
282               bond_sw_if_idx_rewrite (vm, node, b[2], bond_sw_if_index,
283                                       &n_rx_packets, &n_rx_bytes);
284               bond_sw_if_idx_rewrite (vm, node, b[3], bond_sw_if_index,
285                                       &n_rx_packets, &n_rx_bytes);
286             }
287         }
288       else
289         {
290           bond_update_next (vm, node, &last_member_sw_if_index,
291                             sw_if_index[0], &bond_sw_if_index, b[0],
292                             &next_index, &error);
293           next[0] = next_index;
294           if (next_index == BOND_INPUT_NEXT_DROP)
295             b[0]->error = error;
296           else
297             bond_sw_if_idx_rewrite (vm, node, b[0], bond_sw_if_index,
298                                     &n_rx_packets, &n_rx_bytes);
299
300           bond_update_next (vm, node, &last_member_sw_if_index,
301                             sw_if_index[1], &bond_sw_if_index, b[1],
302                             &next_index, &error);
303           next[1] = next_index;
304           if (next_index == BOND_INPUT_NEXT_DROP)
305             b[1]->error = error;
306           else
307             bond_sw_if_idx_rewrite (vm, node, b[1], bond_sw_if_index,
308                                     &n_rx_packets, &n_rx_bytes);
309
310           bond_update_next (vm, node, &last_member_sw_if_index,
311                             sw_if_index[2], &bond_sw_if_index, b[2],
312                             &next_index, &error);
313           next[2] = next_index;
314           if (next_index == BOND_INPUT_NEXT_DROP)
315             b[2]->error = error;
316           else
317             bond_sw_if_idx_rewrite (vm, node, b[2], bond_sw_if_index,
318                                     &n_rx_packets, &n_rx_bytes);
319
320           bond_update_next (vm, node, &last_member_sw_if_index,
321                             sw_if_index[3], &bond_sw_if_index, b[3],
322                             &next_index, &error);
323           next[3] = next_index;
324           if (next_index == BOND_INPUT_NEXT_DROP)
325             b[3]->error = error;
326           else
327             bond_sw_if_idx_rewrite (vm, node, b[3], bond_sw_if_index,
328                                     &n_rx_packets, &n_rx_bytes);
329         }
330
331       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
332       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]);
333       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
334       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
335
336       /* next */
337       n_left -= 4;
338       b += 4;
339       sw_if_index += 4;
340       next += 4;
341     }
342
343   while (n_left)
344     {
345       sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
346       bond_update_next (vm, node, &last_member_sw_if_index, sw_if_index[0],
347                         &bond_sw_if_index, b[0], &next_index, &error);
348       next[0] = next_index;
349       if (next_index == BOND_INPUT_NEXT_DROP)
350         b[0]->error = error;
351       else
352         bond_sw_if_idx_rewrite (vm, node, b[0], bond_sw_if_index,
353                                 &n_rx_packets, &n_rx_bytes);
354
355       VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
356
357       /* next */
358       n_left -= 1;
359       b += 1;
360       sw_if_index += 1;
361       next += 1;
362     }
363
364   if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
365     {
366       n_left = frame->n_vectors;        /* number of packets to process */
367       b = bufs;
368       sw_if_index = sw_if_indices;
369       bond_packet_trace_t *t0;
370
371       while (n_left)
372         {
373           if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
374             {
375               t0 = vlib_add_trace (vm, node, b[0], sizeof (*t0));
376               t0->sw_if_index = sw_if_index[0];
377               clib_memcpy_fast (&t0->ethernet, vlib_buffer_get_current (b[0]),
378                                 sizeof (ethernet_header_t));
379               t0->bond_sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
380             }
381           /* next */
382           n_left--;
383           b++;
384           sw_if_index++;
385         }
386     }
387
388   /* increase rx counters */
389   vlib_increment_combined_counter
390     (vnet_main.interface_main.combined_sw_if_counters +
391      VNET_INTERFACE_COUNTER_RX, thread_index, bond_sw_if_index, n_rx_packets,
392      n_rx_bytes);
393
394   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
395   vlib_node_increment_counter (vm, bond_input_node.index,
396                                BOND_INPUT_ERROR_NONE, frame->n_vectors);
397
398   return frame->n_vectors;
399 }
400
401 static clib_error_t *
402 bond_input_init (vlib_main_t * vm)
403 {
404   return 0;
405 }
406
407 /* *INDENT-OFF* */
408 VLIB_REGISTER_NODE (bond_input_node) = {
409   .name = "bond-input",
410   .vector_size = sizeof (u32),
411   .format_buffer = format_ethernet_header_with_length,
412   .format_trace = format_bond_input_trace,
413   .type = VLIB_NODE_TYPE_INTERNAL,
414   .n_errors = BOND_INPUT_N_ERROR,
415   .error_strings = bond_input_error_strings,
416   .n_next_nodes = BOND_INPUT_N_NEXT,
417   .next_nodes =
418   {
419     [BOND_INPUT_NEXT_DROP] = "error-drop"
420   }
421 };
422
423 VLIB_INIT_FUNCTION (bond_input_init);
424
425 VNET_FEATURE_INIT (bond_input, static) =
426 {
427   .arc_name = "device-input",
428   .node_name = "bond-input",
429   .runs_before = VNET_FEATURES ("ethernet-input"),
430 };
431 /* *INDENT-ON* */
432
433 static clib_error_t *
434 bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
435 {
436   bond_main_t *bm = &bond_main;
437   member_if_t *mif;
438   vlib_main_t *vm = bm->vlib_main;
439
440   mif = bond_get_member_by_sw_if_index (sw_if_index);
441   if (mif)
442     {
443       if (mif->lacp_enabled)
444         return 0;
445
446       /* port_enabled is both admin up and hw link up */
447       mif->port_enabled = ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) &&
448                            vnet_sw_interface_is_link_up (vnm, sw_if_index));
449       if (mif->port_enabled == 0)
450         bond_disable_collecting_distributing (vm, mif);
451       else
452         bond_enable_collecting_distributing (vm, mif);
453     }
454
455   return 0;
456 }
457
458 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bond_sw_interface_up_down);
459
460 static clib_error_t *
461 bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
462 {
463   bond_main_t *bm = &bond_main;
464   member_if_t *mif;
465   vnet_sw_interface_t *sw;
466   vlib_main_t *vm = bm->vlib_main;
467
468   sw = vnet_get_hw_sw_interface (vnm, hw_if_index);
469   mif = bond_get_member_by_sw_if_index (sw->sw_if_index);
470   if (mif)
471     {
472       if (mif->lacp_enabled)
473         return 0;
474
475       /* port_enabled is both admin up and hw link up */
476       mif->port_enabled = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) &&
477                            vnet_sw_interface_is_admin_up (vnm,
478                                                           sw->sw_if_index));
479       if (mif->port_enabled == 0)
480         bond_disable_collecting_distributing (vm, mif);
481       else
482         bond_enable_collecting_distributing (vm, mif);
483     }
484
485   return 0;
486 }
487
488 VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bond_hw_interface_up_down);
489
490 /*
491  * fd.io coding-style-patch-verification: ON
492  *
493  * Local Variables:
494  * eval: (c-set-style "gnu")
495  * End:
496  */