Send GARP/NA on bonded intf slave up/down if in active-backup mode 09/7309/4
authorJohn Lo <loj@cisco.com>
Mon, 26 Jun 2017 05:40:20 +0000 (01:40 -0400)
committerDamjan Marion <dmarion.lists@gmail.com>
Thu, 6 Jul 2017 13:24:26 +0000 (13:24 +0000)
If a bonded interface is in active-backup mode and configured with
IPv4 and/or IPv6 addresses, on slave interface link up/down, send
a GARP packet if configured with an IPv4 address and an unsolcited
NA if configured with an IPv6 address. These packets can help with
faster route convergence in the next hop router/switch.

Change-Id: I68ccb11a4a40cda414704fa08ee0171c952befa2
Signed-off-by: John Lo <loj@cisco.com>
src/plugins/dpdk/device/common.c
src/plugins/dpdk/device/dpdk.h
src/plugins/dpdk/device/init.c
src/vnet/ethernet/arp.c
src/vnet/ethernet/arp_packet.h
src/vnet/ip/ip6.h
src/vnet/ip/ip6_neighbor.c

index 1a9688e..df52c58 100644 (file)
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include <vnet/vnet.h>
 #include <vppinfra/vec.h>
 #include <vppinfra/format.h>
 #include <vlib/unix/cj.h>
 #include <assert.h>
 
+#include <vnet/ip/ip.h>
 #include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/arp_packet.h>
 #include <dpdk/device/dpdk.h>
 
 #include <dpdk/device/dpdk_priv.h>
@@ -178,6 +181,65 @@ dpdk_device_stop (dpdk_device_t * xd)
     }
 }
 
+void
+dpdk_port_state_callback (uint8_t port_id,
+                         enum rte_eth_event_type type, void *param)
+{
+  struct rte_eth_link link;
+  vlib_main_t *vm = vlib_get_main ();
+  dpdk_device_t *xd = &dpdk_main.devices[port_id];
+
+  RTE_SET_USED (param);
+  if (type != RTE_ETH_EVENT_INTR_LSC)
+    {
+      clib_warning ("Unknown event %d received for port %d", type, port_id);
+      return;
+    }
+
+  rte_eth_link_get_nowait (port_id, &link);
+  u8 link_up = link.link_status;
+
+  if (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE)
+    {
+      u8 bd_port = xd->bond_port;
+      int bd_mode = rte_eth_bond_mode_get (bd_port);
+
+      if ((link_up && !(xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP)) ||
+         (!link_up && (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP)))
+       {
+         clib_warning ("Port %d state to %s, "
+                       "slave of port %d BondEthernet%d in mode %d",
+                       port_id, (link_up) ? "UP" : "DOWN",
+                       bd_port, xd->port_id, bd_mode);
+         if (bd_mode == BONDING_MODE_ACTIVE_BACKUP)
+           {
+             rte_eth_link_get_nowait (bd_port, &link);
+             if (link.link_status)     /* bonded interface up */
+               {
+                 u32 hw_if_index = dpdk_main.devices[bd_port].hw_if_index;
+                 vlib_process_signal_event
+                   (vm, send_garp_na_process_node_index, SEND_GARP_NA,
+                    hw_if_index);
+               }
+           }
+       }
+      if (link_up)             /* Update slave link status */
+       xd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE_UP;
+      else
+       xd->flags &= ~DPDK_DEVICE_FLAG_BOND_SLAVE_UP;
+    }
+  else                         /* Should not happen as callback not setup for "normal" links */
+    {
+      if (link_up)
+       clib_warning ("Port %d Link Up - speed %u Mbps - %s",
+                     port_id, (unsigned) link.link_speed,
+                     (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+                     "full-duplex" : "half-duplex");
+      else
+       clib_warning ("Port %d Link Down\n\n", port_id);
+    }
+}
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index d82ba5d..c6fd738 100644 (file)
@@ -173,6 +173,8 @@ typedef struct
 #define DPDK_DEVICE_FLAG_MAYBE_MULTISEG     (1 << 4)
 #define DPDK_DEVICE_FLAG_HAVE_SUBIF         (1 << 5)
 #define DPDK_DEVICE_FLAG_HQOS               (1 << 6)
+#define DPDK_DEVICE_FLAG_BOND_SLAVE         (1 << 7)
+#define DPDK_DEVICE_FLAG_BOND_SLAVE_UP      (1 << 8)
 
   u16 nb_tx_desc;
     CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
@@ -197,6 +199,10 @@ typedef struct
   /* af_packet or BondEthernet instance number */
   u8 port_id;
 
+  /* Bonded interface port# of a slave -
+     only valid if DPDK_DEVICE_FLAG_BOND_SLAVE bit is set */
+  u8 bond_port;
+
   struct rte_eth_link link;
   f64 time_last_link_update;
 
@@ -408,6 +414,8 @@ typedef struct
 void dpdk_device_setup (dpdk_device_t * xd);
 void dpdk_device_start (dpdk_device_t * xd);
 void dpdk_device_stop (dpdk_device_t * xd);
+void dpdk_port_state_callback (uint8_t port_id,
+                              enum rte_eth_event_type type, void *param);
 
 #define foreach_dpdk_error                                             \
   _(NONE, "no error")                                                  \
index 9096807..d9ab075 100755 (executable)
@@ -1373,8 +1373,10 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
     /*
      * Extra set up for bond interfaces:
      *  1. Setup MACs for bond interfaces and their slave links which was set
-     *     in dpdk_device_setup() but needs to be done again here to take effect.
-     *  2. Set up info for bond interface related CLI support.
+     *     in dpdk_device_setup() but needs to be done again here to take
+     *     effect.
+     *  2. Set up info and register slave link state change callback handling.
+     *  3. Set up info for bond interface related CLI support.
      */
     int nports = rte_eth_dev_count ();
     if (nports > 0)
@@ -1399,7 +1401,8 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
                      (slink[0], (struct ether_addr *) addr);
 
                    /* Set MAC of bounded interface to that of 1st slave link */
-                   clib_warning ("Set MAC for bond dev# %d", i);
+                   clib_warning ("Set MAC for bond port %d BondEthernet%d",
+                                 i, xd->port_id);
                    rv = rte_eth_bond_mac_address_set
                      (i, (struct ether_addr *) addr);
                    if (rv)
@@ -1428,34 +1431,38 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
                        /* Add MAC to all slave links except the first one */
                        if (nlink)
                          {
-                           clib_warning ("Add MAC for slave dev# %d", slave);
+                           clib_warning ("Add MAC for slave port %d", slave);
                            rv = rte_eth_dev_mac_addr_add
                              (slave, (struct ether_addr *) addr, 0);
                            if (rv)
                              clib_warning ("Add MAC addr failure rv=%d", rv);
                          }
+                       /* Setup slave link state change callback handling */
+                       rte_eth_dev_callback_register
+                         (slave, RTE_ETH_EVENT_INTR_LSC,
+                          dpdk_port_state_callback, NULL);
+                       dpdk_device_t *sxd = &dm->devices[slave];
+                       sxd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE;
+                       sxd->bond_port = i;
                        /* Set slaves bitmap for bonded interface */
                        bhi->bond_info = clib_bitmap_set
                          (bhi->bond_info, sdev->hw_if_index, 1);
-                       /* Set slave link flags on slave interface */
+                       /* Set MACs and slave link flags on slave interface */
                        shi = vnet_get_hw_interface (vnm, sdev->hw_if_index);
                        ssi = vnet_get_sw_interface
                          (vnm, sdev->vlib_sw_if_index);
                        sei = pool_elt_at_index
                          (em->interfaces, shi->hw_instance);
-
                        shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE;
                        ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE;
                        clib_memcpy (shi->hw_address, addr, 6);
                        clib_memcpy (sei->address, addr, 6);
-
                        /* Set l3 packet size allowed as the lowest of slave */
                        if (bhi->max_l3_packet_bytes[VLIB_RX] >
                            shi->max_l3_packet_bytes[VLIB_RX])
                          bhi->max_l3_packet_bytes[VLIB_RX] =
                            bhi->max_l3_packet_bytes[VLIB_TX] =
                            shi->max_l3_packet_bytes[VLIB_RX];
-
                        /* Set max packet size allowed as the lowest of slave */
                        if (bhi->max_packet_bytes > shi->max_packet_bytes)
                          bhi->max_packet_bytes = shi->max_packet_bytes;
index d5dc9cc..df68175 100644 (file)
@@ -110,6 +110,9 @@ typedef struct
 
 static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
 
+/* Node index for send_garp_na_process */
+u32 send_garp_na_process_node_index;
+
 static void
 set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
                                    * a);
@@ -2378,6 +2381,86 @@ ethernet_arp_change_mac (u32 sw_if_index)
   /* *INDENT-ON* */
 }
 
+void static
+send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi)
+{
+  ip4_main_t *i4m = &ip4_main;
+  u32 sw_if_index = hi->sw_if_index;
+  ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0);
+
+  if (ip4_addr)
+    {
+      clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d",
+                   format_ip4_address, ip4_addr, sw_if_index);
+
+      /* Form GARP packet for output - Gratuitous ARP is an ARP request packet
+         where the interface IP/MAC pair is used for both source and request
+         MAC/IP pairs in the request */
+      u32 bi = 0;
+      ethernet_arp_header_t *h = vlib_packet_template_get_packet
+       (vm, &i4m->ip4_arp_request_packet_template, &bi);
+      clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
+                  sizeof (h->ip4_over_ethernet[0].ethernet));
+      clib_memcpy (h->ip4_over_ethernet[1].ethernet, hi->hw_address,
+                  sizeof (h->ip4_over_ethernet[1].ethernet));
+      h->ip4_over_ethernet[0].ip4 = ip4_addr[0];
+      h->ip4_over_ethernet[1].ip4 = ip4_addr[0];
+
+      /* Setup MAC header with ARP Etype and broadcast DMAC */
+      vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+      vlib_buffer_advance (b, -sizeof (ethernet_header_t));
+      ethernet_header_t *e = vlib_buffer_get_current (b);
+      e->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
+      clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
+      memset (e->dst_address, 0xff, sizeof (e->dst_address));
+
+      /* Send GARP packet out the specified interface */
+      vnet_buffer (b)->sw_if_index[VLIB_RX] =
+       vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+      vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+      u32 *to_next = vlib_frame_vector_args (f);
+      to_next[0] = bi;
+      f->n_vectors = 1;
+      vlib_put_frame_to_node (vm, hi->output_node_index, f);
+    }
+}
+
+static vlib_node_registration_t send_garp_na_proc_node;
+
+static uword
+send_garp_na_process (vlib_main_t * vm,
+                     vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  uword event_type, *event_data = 0;
+
+  send_garp_na_process_node_index = send_garp_na_proc_node.index;
+
+  while (1)
+    {
+      vlib_process_wait_for_event (vm);
+      event_type = vlib_process_get_events (vm, &event_data);
+      if ((event_type == SEND_GARP_NA) && (vec_len (event_data) >= 1))
+       {
+         u32 hw_if_index = event_data[0];
+         vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+         send_ip4_garp (vm, hi);
+         send_ip6_na (vm, hi);
+       }
+      vec_reset_length (event_data);
+    }
+  return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (send_garp_na_proc_node, static) = {
+    .function = send_garp_na_process,
+    .type = VLIB_NODE_TYPE_PROCESS,
+    .name = "send-garp-na-process",
+};
+/* *INDENT-ON* */
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 17e64f4..d740b84 100644 (file)
@@ -167,6 +167,15 @@ typedef struct
 ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index);
 u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va);
 
+/* Node index for send_garp_na_process */
+extern u32 send_garp_na_process_node_index;
+
+/* Even type for send_garp_na_process */
+enum
+{
+  SEND_GARP_NA = 1,
+} dpdk_send_garp_na_process_event_t;
+
 #endif /* included_ethernet_arp_packet_h */
 
 /*
index d623c95..cf52994 100644 (file)
@@ -375,6 +375,8 @@ int vnet_ip6_nd_term (vlib_main_t * vm,
                      ethernet_header_t * eth,
                      ip6_header_t * ip, u32 sw_if_index, u16 bd_index);
 
+void send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi);
+
 u8 *format_ip6_forward_next_trace (u8 * s, va_list * args);
 
 u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0);
index ba7ea14..b8f6f9b 100644 (file)
@@ -4192,6 +4192,59 @@ ethernet_ndp_change_mac (u32 sw_if_index)
   /* *INDENT-ON* */
 }
 
+void
+send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi)
+{
+  ip6_main_t *i6m = &ip6_main;
+  u32 sw_if_index = hi->sw_if_index;
+  ip6_address_t *ip6_addr = ip6_interface_first_address (i6m, sw_if_index);
+  if (ip6_addr)
+    {
+      clib_warning
+       ("Sending unsolicitated NA IP6 address %U on sw_if_idex %d",
+        format_ip6_address, ip6_addr, sw_if_index);
+
+      /* Form unsolicited neighbor advertisement packet from NS pkt template */
+      int bogus_length;
+      u32 bi = 0;
+      icmp6_neighbor_solicitation_header_t *h =
+       vlib_packet_template_get_packet (vm,
+                                        &i6m->discover_neighbor_packet_template,
+                                        &bi);
+      ip6_set_reserved_multicast_address (&h->ip.dst_address,
+                                         IP6_MULTICAST_SCOPE_link_local,
+                                         IP6_MULTICAST_GROUP_ID_all_hosts);
+      h->ip.src_address = ip6_addr[0];
+      h->neighbor.icmp.type = ICMP6_neighbor_advertisement;
+      h->neighbor.target_address = ip6_addr[0];
+      h->neighbor.advertisement_flags = clib_host_to_net_u32
+       (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+      clib_memcpy (h->link_layer_option.ethernet_address,
+                  hi->hw_address, vec_len (hi->hw_address));
+      h->neighbor.icmp.checksum =
+       ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+      ASSERT (bogus_length == 0);
+
+      /* Setup MAC header with IP6 Etype and mcast DMAC */
+      vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+      vlib_buffer_advance (b, -sizeof (ethernet_header_t));
+      ethernet_header_t *e = vlib_buffer_get_current (b);
+      e->type = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
+      clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
+      ip6_multicast_ethernet_address (e->dst_address,
+                                     IP6_MULTICAST_GROUP_ID_all_hosts);
+
+      /* Send unsolicited ND advertisement packet out the specified interface */
+      vnet_buffer (b)->sw_if_index[VLIB_RX] =
+       vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+      vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+      u32 *to_next = vlib_frame_vector_args (f);
+      to_next[0] = bi;
+      f->n_vectors = 1;
+      vlib_put_frame_to_node (vm, hi->output_node_index, f);
+    }
+}
+
 /*
  * fd.io coding-style-patch-verification: ON
  *