bond: send gratuitous arp when the active slave went down in active-backup mode 15/12915/1
authorSteven <sluong@cisco.com>
Tue, 5 Jun 2018 18:09:32 +0000 (11:09 -0700)
committerSteven <sluong@cisco.com>
Tue, 5 Jun 2018 18:09:32 +0000 (11:09 -0700)
- Modify the API send_ip6_na and send_ip4_garp to take sw_if_index instead
of vnet_hw_interface_t and add call to build_ethernet_rewrite to support
subinterface/vlan
- Add code to bonding driver to send an event to bond_process when the first
interface becomes active or when the active interface is down
- Create a bond_process to walk the interface and the corresponding
subinterfaces to send garp/ip6_na when an event is received.
- Minor cleanup in bonding/node.c

Note: dpdk bonding driver does not send garp/ip6_na for subinterfaces. There is
no attempt to fix it here. But the infra is now done and should be easy to
add the support.

Change-Id: If3ecc4cd0fb3051330f7fa11ca0dab3e18557ce1
Signed-off-by: Steven <sluong@cisco.com>
src/plugins/dpdk/device/common.c
src/plugins/gbp/gbp_endpoint.c
src/vnet/bonding/cli.c
src/vnet/bonding/device.c
src/vnet/bonding/node.c
src/vnet/bonding/node.h
src/vnet/ethernet/arp.c
src/vnet/ethernet/arp_packet.h
src/vnet/ip/ip6.h
src/vnet/ip/ip6_neighbor.c

index dad4332..99dd181 100644 (file)
@@ -222,7 +222,6 @@ static uword
 send_garp_na_process (vlib_main_t * vm,
                      vlib_node_runtime_t * rt, vlib_frame_t * f)
 {
-  vnet_main_t *vnm = vnet_get_main ();
   uword event_type, *event_data = 0;
 
   while (1)
@@ -238,11 +237,9 @@ send_garp_na_process (vlib_main_t * vm,
          if (i < 5)            /* wait 0.2 sec for link to settle, max total 1 sec */
            vlib_process_suspend (vm, 0.2);
          dpdk_device_t *xd = &dpdk_main.devices[dpdk_port];
-         u32 hw_if_index = xd->hw_if_index;
-         vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
          dpdk_update_link_state (xd, vlib_time_now (vm));
-         send_ip4_garp (vm, hi);
-         send_ip6_na (vm, hi);
+         send_ip4_garp (vm, xd->sw_if_index);
+         send_ip6_na (vm, xd->sw_if_index);
        }
       vec_reset_length (event_data);
     }
index 91505da..cffa6da 100644 (file)
@@ -179,13 +179,11 @@ gbp_endpoint_update (u32 sw_if_index,
   if (ip46_address_is_ip4 (&gbpe->ge_key->gek_ip))
     send_ip4_garp_w_addr (vlib_get_main (),
                          &gbpe->ge_key->gek_ip.ip4,
-                         vnet_get_sup_hw_interface
-                         (vnet_get_main (), gepg->gepg_uplink_sw_if_index));
+                         gepg->gepg_uplink_sw_if_index);
   else
     send_ip6_na_w_addr (vlib_get_main (),
                        &gbpe->ge_key->gek_ip.ip6,
-                       vnet_get_sup_hw_interface
-                       (vnet_get_main (), gepg->gepg_uplink_sw_if_index));
+                       gepg->gepg_uplink_sw_if_index);
 
   return (0);
 }
index 2799bb8..91c6e2c 100644 (file)
 void
 bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
 {
+  bond_main_t *bm = &bond_main;
   bond_if_t *bif;
   int i;
   uword p;
+  u8 switching_active = 0;
 
   bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
   clib_spinlock_lock_if_init (&bif->lockp);
@@ -35,8 +37,18 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
     p = *vec_elt_at_index (bif->active_slaves, i);
     if (p == sif->sw_if_index)
       {
+       /* Are we disabling the very 1st slave? */
+       if (sif->sw_if_index == *vec_elt_at_index (bif->active_slaves, 0))
+         switching_active = 1;
+
        vec_del1 (bif->active_slaves, i);
        hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index);
+
+       /* We got a new slave just becoming active? */
+       if ((vec_len (bif->active_slaves) >= 1) &&
+           (bif->mode == BOND_MODE_ACTIVE_BACKUP) && switching_active)
+         vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
+                                    BOND_SEND_GARP_NA, bif->hw_if_index);
        break;
       }
   }
@@ -47,6 +59,7 @@ void
 bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
 {
   bond_if_t *bif;
+  bond_main_t *bm = &bond_main;
 
   bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
   clib_spinlock_lock_if_init (&bif->lockp);
@@ -55,6 +68,12 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
       hash_set (bif->active_slave_by_sw_if_index, sif->sw_if_index,
                sif->sw_if_index);
       vec_add1 (bif->active_slaves, sif->sw_if_index);
+
+      /* First slave becomes active? */
+      if ((vec_len (bif->active_slaves) == 1) &&
+         (bif->mode == BOND_MODE_ACTIVE_BACKUP))
+       vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
+                                  BOND_SEND_GARP_NA, bif->hw_if_index);
     }
   clib_spinlock_unlock_if_init (&bif->lockp);
 }
index 8ddec80..1ade1c2 100644 (file)
@@ -23,6 +23,8 @@
 #include <vnet/ip/ip6_hop_by_hop_packet.h>
 #include <vnet/bonding/node.h>
 #include <vppinfra/lb_hash_hash.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/arp_packet.h>
 
 #define foreach_bond_tx_error     \
   _(NONE, "no error")             \
@@ -700,6 +702,52 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   return frame->n_vectors;
 }
 
+static walk_rc_t
+bond_active_interface_switch_cb (vnet_main_t * vnm, u32 sw_if_index,
+                                void *arg)
+{
+  bond_main_t *bm = &bond_main;
+
+  send_ip4_garp (bm->vlib_main, sw_if_index);
+  send_ip6_na (bm->vlib_main, sw_if_index);
+
+  return (WALK_CONTINUE);
+}
+
+static uword
+bond_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  uword event_type, *event_data = 0;
+
+  while (1)
+    {
+      u32 i;
+      u32 hw_if_index;
+
+      vlib_process_wait_for_event (vm);
+      event_type = vlib_process_get_events (vm, &event_data);
+      ASSERT (event_type == BOND_SEND_GARP_NA);
+      for (i = 0; i < vec_len (event_data); i++)
+       {
+         hw_if_index = event_data[i];
+         /* walk hw interface to process all subinterfaces */
+         vnet_hw_interface_walk_sw (vnm, hw_if_index,
+                                    bond_active_interface_switch_cb, 0);
+       }
+      vec_reset_length (event_data);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (bond_process_node) = {
+  .function = bond_process,
+  .type = VLIB_NODE_TYPE_PROCESS,
+  .name = "bond-process",
+};
+/* *INDENT-ON* */
+
 /* *INDENT-OFF* */
 VNET_DEVICE_CLASS (bond_dev_class) = {
   .name = "bond",
index 9479fe9..5842da3 100644 (file)
@@ -67,7 +67,7 @@ typedef enum
 {
   BOND_INPUT_NEXT_DROP,
   BOND_INPUT_N_NEXT,
-} l2output_next_t;
+} bond_output_next_t;
 
 static_always_inline u8
 packet_is_cdp (ethernet_header_t * eth)
@@ -329,7 +329,6 @@ VLIB_NODE_FN (bond_input_node) (vlib_main_t * vm,
       n_left = frame->n_vectors;       /* number of packets to process */
       b = bufs;
       sw_if_index = sw_if_indices;
-      next = nexts;
       bond_packet_trace_t *t0;
 
       while (n_left)
@@ -346,7 +345,6 @@ VLIB_NODE_FN (bond_input_node) (vlib_main_t * vm,
          n_left--;
          b++;
          sw_if_index++;
-         next++;
        }
     }
 
index 5c6ff32..7b8e1ad 100644 (file)
@@ -70,6 +70,11 @@ typedef enum
 #undef _
 } bond_load_balance_t;
 
+enum
+{
+  BOND_SEND_GARP_NA = 1,
+} bond_send_garp_na_process_event_t;
+
 typedef struct
 {
   u8 hw_addr_set;
@@ -325,6 +330,7 @@ typedef struct
 } bond_load_balance_func_t;
 
 extern vlib_node_registration_t bond_input_node;
+extern vlib_node_registration_t bond_process_node;
 extern vnet_device_class_t bond_dev_class;
 extern bond_main_t bond_main;
 
index 249e3b6..682bc2a 100644 (file)
@@ -2526,22 +2526,22 @@ ethernet_arp_change_mac (u32 sw_if_index)
 }
 
 void
-send_ip4_garp (vlib_main_t * vm, const vnet_hw_interface_t * hi)
+send_ip4_garp (vlib_main_t * vm, u32 sw_if_index)
 {
   ip4_main_t *i4m = &ip4_main;
-  ip4_address_t *ip4_addr =
-    ip4_interface_first_address (i4m, hi->sw_if_index, 0);
+  ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0);
 
-  send_ip4_garp_w_addr (vm, ip4_addr, hi);
+  send_ip4_garp_w_addr (vm, ip4_addr, sw_if_index);
 }
 
 void
 send_ip4_garp_w_addr (vlib_main_t * vm,
-                     const ip4_address_t * ip4_addr,
-                     const vnet_hw_interface_t * hi)
+                     const ip4_address_t * ip4_addr, u32 sw_if_index)
 {
   ip4_main_t *i4m = &ip4_main;
-  u32 sw_if_index = hi->sw_if_index;
+  vnet_main_t *vnm = vnet_get_main ();
+  u8 *rewrite, rewrite_len;
+  vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
 
   if (ip4_addr)
     {
@@ -2563,11 +2563,14 @@ send_ip4_garp_w_addr (vlib_main_t * vm,
 
       /* Setup MAC header with ARP Etype and broadcast DMAC */
       vlib_buffer_t *b = vlib_get_buffer (vm, bi);
-      vlib_buffer_advance (b, -sizeof (ethernet_header_t));
+      rewrite =
+       ethernet_build_rewrite (vnm, sw_if_index, VNET_LINK_ARP,
+                               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST);
+      rewrite_len = vec_len (rewrite);
+      vlib_buffer_advance (b, -rewrite_len);
       ethernet_header_t *e = vlib_buffer_get_current (b);
-      e->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
-      clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
-      memset (e->dst_address, 0xff, sizeof (e->dst_address));
+      clib_memcpy (e->dst_address, rewrite, rewrite_len);
+      vec_free (rewrite);
 
       /* Send GARP packet out the specified interface */
       vnet_buffer (b)->sw_if_index[VLIB_RX] =
index d2a23a5..e5f424d 100644 (file)
@@ -168,10 +168,9 @@ ethernet_arp_ip4_entry_t *ip4_neighbors_pool (void);
 ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index);
 u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va);
 
-void send_ip4_garp (vlib_main_t * vm, const vnet_hw_interface_t * hi);
+void send_ip4_garp (vlib_main_t * vm, u32 sw_if_index);
 void send_ip4_garp_w_addr (vlib_main_t * vm,
-                          const ip4_address_t * ip4_addr,
-                          const vnet_hw_interface_t * hi);
+                          const ip4_address_t * ip4_addr, u32 sw_if_index);
 
 #endif /* included_ethernet_arp_packet_h */
 
index 4b24774..6f6724b 100644 (file)
@@ -407,10 +407,9 @@ int vnet_ip6_nd_term (vlib_main_t * vm,
                      ethernet_header_t * eth,
                      ip6_header_t * ip, u32 sw_if_index, u16 bd_index);
 
-void send_ip6_na (vlib_main_t * vm, const vnet_hw_interface_t * hi);
+void send_ip6_na (vlib_main_t * vm, u32 sw_if_index);
 void send_ip6_na_w_addr (vlib_main_t * vm,
-                        const ip6_address_t * addr,
-                        const vnet_hw_interface_t * hi);
+                        const ip6_address_t * addr, u32 sw_if_index);
 
 u8 *format_ip6_forward_next_trace (u8 * s, va_list * args);
 
index 1b37e54..3daea6b 100644 (file)
@@ -4980,22 +4980,23 @@ ethernet_ndp_change_mac (u32 sw_if_index)
 }
 
 void
-send_ip6_na (vlib_main_t * vm, const vnet_hw_interface_t * hi)
+send_ip6_na (vlib_main_t * vm, u32 sw_if_index)
 {
   ip6_main_t *i6m = &ip6_main;
-  u32 sw_if_index = hi->sw_if_index;
   ip6_address_t *ip6_addr = ip6_interface_first_address (i6m, sw_if_index);
 
-  send_ip6_na_w_addr (vm, ip6_addr, hi);
+  send_ip6_na_w_addr (vm, ip6_addr, sw_if_index);
 }
 
 void
 send_ip6_na_w_addr (vlib_main_t * vm,
-                   const ip6_address_t * ip6_addr,
-                   const vnet_hw_interface_t * hi)
+                   const ip6_address_t * ip6_addr, u32 sw_if_index)
 {
   ip6_main_t *i6m = &ip6_main;
-  u32 sw_if_index = hi->sw_if_index;
+  vnet_main_t *vnm = vnet_get_main ();
+  u8 *rewrite, rewrite_len;
+  vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+  u8 dst_address[6];
 
   if (ip6_addr)
     {
@@ -5026,12 +5027,15 @@ send_ip6_na_w_addr (vlib_main_t * vm,
 
       /* Setup MAC header with IP6 Etype and mcast DMAC */
       vlib_buffer_t *b = vlib_get_buffer (vm, bi);
-      vlib_buffer_advance (b, -sizeof (ethernet_header_t));
-      ethernet_header_t *e = vlib_buffer_get_current (b);
-      e->type = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
-      clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
-      ip6_multicast_ethernet_address (e->dst_address,
+      ip6_multicast_ethernet_address (dst_address,
                                      IP6_MULTICAST_GROUP_ID_all_hosts);
+      rewrite =
+       ethernet_build_rewrite (vnm, sw_if_index, VNET_LINK_IP6, dst_address);
+      rewrite_len = vec_len (rewrite);
+      vlib_buffer_advance (b, -rewrite_len);
+      ethernet_header_t *e = vlib_buffer_get_current (b);
+      clib_memcpy (e->dst_address, rewrite, rewrite_len);
+      vec_free (rewrite);
 
       /* Send unsolicited ND advertisement packet out the specified interface */
       vnet_buffer (b)->sw_if_index[VLIB_RX] =