Change ARP and IP6-ND nodes to use interface-output node for output 66/766/2
authorJohn Lo <[email protected]>
Tue, 12 Apr 2016 22:20:39 +0000 (18:20 -0400)
committerGerrit Code Review <[email protected]>
Thu, 14 Apr 2016 23:06:54 +0000 (23:06 +0000)
The current mechanism for setting up arp-input and ip6-discover-neighbor
output nodes for interfaces using their interface link up/down callback
function is inefficient and has potential timing issue, as observed for
bonded interface. Now both nodes will setup output interface sw_if_index
in the the sw_if_index[VLIB_TX] field of current packet buffer and then
use the interface-ouput node to tx the packet.

One side effect is that vlib_node_add_next_with_slot() needs to be
modified to allow the same output node-id to be put at the specified
slot, even if another slot contain that same node-id already exist. This
requirement is caused by BVI support where all loopback interfaces set
up as BVIs will have the same output node-id being l2-input while, for
output-interface node, the output slot must match the hw_if_index of the
interface.

Change-Id: I18bd1d4fe9bea047018796f7b8a4d4c20ee31d6e
Signed-off-by: John Lo <[email protected]>
vlib/vlib/node.c
vnet/vnet/ethernet/arp.c
vnet/vnet/interface_output.c
vnet/vnet/ip/ip6.h
vnet/vnet/ip/ip6_forward.c
vnet/vnet/l2/l2_input.c

index 40ef7c7..573b3a7 100644 (file)
@@ -177,10 +177,9 @@ vlib_node_add_next_with_slot (vlib_main_t * vm,
 
   if ((p = hash_get (node->next_slot_by_node, next_node_index)))
     {
-      /* Next already exists: slot must match. */
-      if (slot != ~0)
-       ASSERT (slot == p[0]);
-      return p[0];
+      /* Next already exists: use it if slot not specified or the same. */
+      if ((slot == ~0) || (slot == p[0]))
+         return p[0];
     }
 
   if (slot == ~0)
@@ -190,7 +189,7 @@ vlib_node_add_next_with_slot (vlib_main_t * vm,
   vec_validate (node->n_vectors_by_next_node, slot);
 
   node->next_nodes[slot] = next_node_index;
-  hash_set (node->next_slot_by_node, next_node_index, slot);
+  if (!p) hash_set (node->next_slot_by_node, next_node_index, slot);
 
   vlib_node_runtime_update (vm, node_index, slot);
 
index 205023a..a5b015c 100644 (file)
@@ -70,8 +70,6 @@ typedef struct {
   uword * mac_changes_by_address;
   pending_resolution_t * mac_changes;
 
-  u32 * arp_input_next_index_by_hw_if_index;
-
   ethernet_arp_ip4_entry_t * ip4_entry_pool;
 
   mhash_t ip4_entry_by_key;
@@ -627,6 +625,7 @@ int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
 /* Either we drop the packet or we send a reply to the sender. */
 typedef enum {
   ARP_INPUT_NEXT_DROP,
+  ARP_INPUT_NEXT_REPLY_TX,
   ARP_INPUT_N_NEXT,
 } arp_input_next_t;
 
@@ -704,12 +703,11 @@ static void unset_random_arp_entry (void)
                                     e->key.fib_index, &delme);
 }
   
-static u32 arp_unnumbered (vlib_buffer_t * p0, 
-                           u32 pi0,
-                           ethernet_header_t * eth0,
-                           ip_interface_address_t * ifa0)
+static void arp_unnumbered (vlib_buffer_t * p0, 
+                      u32 pi0,
+                      ethernet_header_t * eth0,
+                      ip_interface_address_t * ifa0)
 {
-  ethernet_arp_main_t * am = &ethernet_arp_main;
   vlib_main_t * vm = vlib_get_main();
   vnet_main_t * vnm = vnet_get_main();
   vnet_interface_main_t * vim = &vnm->interface_main;
@@ -833,13 +831,11 @@ static u32 arp_unnumbered (vlib_buffer_t * p0,
         }
     }
 
-  hi = vnet_get_sup_hw_interface (vnm, broadcast_swifs[0]);
+  /* The regular path outputs the original pkt.. */
+  vnet_buffer (p0)->sw_if_index[VLIB_TX] = broadcast_swifs[0];
 
   vec_free (broadcast_swifs);
   vec_free (buffers);
-
-  /* The regular path outputs the original pkt.. */
-  return vec_elt (am->arp_input_next_index_by_hw_if_index, hi->hw_if_index);
 }
 
 static uword
@@ -982,14 +978,9 @@ arp_input (vlib_main_t * vm,
          vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
          hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
 
-          /* Can happen in a multi-core env. */
-          if (PREDICT_FALSE(hw_if0->hw_if_index >= vec_len (am->arp_input_next_index_by_hw_if_index)))
-            {
-              error0 = ETHERNET_ARP_ERROR_missing_interface_address;
-              goto drop2;
-            }
-
-         next0 = vec_elt (am->arp_input_next_index_by_hw_if_index, hw_if0->hw_if_index);
+         /* Send reply back through input interface */
+         vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+         next0 = ARP_INPUT_NEXT_REPLY_TX;
 
          arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
 
@@ -1015,7 +1006,7 @@ arp_input (vlib_main_t * vm,
                   goto drop2;
                 }
               if (is_unnum0)
-                next0 = arp_unnumbered (p0, pi0, eth0, ifa0);
+                arp_unnumbered (p0, pi0, eth0, ifa0);
               else
                 vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes);
             }
@@ -1117,32 +1108,13 @@ VLIB_REGISTER_NODE (arp_input_node,static) = {
   .n_next_nodes = ARP_INPUT_N_NEXT,
   .next_nodes = {
     [ARP_INPUT_NEXT_DROP] = "error-drop",
+    [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
   },
 
   .format_buffer = format_ethernet_arp_header,
   .format_trace = format_ethernet_arp_input_trace,
 };
 
-clib_error_t *
-ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm,
-                                       u32 hw_if_index,
-                                       u32 flags)
-{
-  ethernet_arp_main_t * am = &ethernet_arp_main;
-  vnet_hw_interface_t * hw_if;
-
-  hw_if = vnet_get_hw_interface (vnm, hw_if_index);
-
-  /* Fill in lookup tables with default table (0). */
-  vec_validate_init_empty (am->arp_input_next_index_by_hw_if_index, hw_if_index, ~0);
-  am->arp_input_next_index_by_hw_if_index[hw_if_index]
-    = vlib_node_add_next (vnm->vlib_main, arp_input_node.index, hw_if->output_node_index);
-
-  return 0;
-}
-
-VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (ethernet_arp_hw_interface_link_up_down);
-
 static int
 ip4_arp_entry_sort (void *a1, void *a2)
 {
index 84dc039..033e9fc 100644 (file)
@@ -1147,11 +1147,6 @@ VLIB_REGISTER_NODE (punt_buffers,static) = {
   .validate_frame = validate_error_frame,
 };
 
-static clib_error_t *
-vnet_per_buffer_interface_output_hw_interface_add_del (vnet_main_t * vnm,
-                                                      u32 hw_if_index,
-                                                      u32 is_create);
-
 VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node,static) = {
   .function = vnet_per_buffer_interface_output,
   .name = "interface-output",
index b32b1b8..ff65d3a 100644 (file)
@@ -163,8 +163,6 @@ typedef struct ip6_main_t {
   /* Template used to generate IP6 neighbor solicitation packets. */
   vlib_packet_template_t discover_neighbor_packet_template;
 
-  u32 * discover_neighbor_next_index_by_hw_if_index;
-
   /* ip6 lookup table config parameters */
   u32 lookup_table_nbuckets;
   uword lookup_table_size;
index a79bae6..4a161e7 100644 (file)
@@ -1768,6 +1768,7 @@ void ip6_register_protocol (u32 protocol, u32 node_index)
 
 typedef enum {
   IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+  IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX,
   IP6_DISCOVER_NEIGHBOR_N_NEXT,
 } ip6_discover_neighbor_next_t;
 
@@ -1933,11 +1934,7 @@ ip6_discover_neighbor (vlib_main_t * vm,
                                      sizeof (ethernet_header_t));
            vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
 
-            /* $$$$ hack in case next0 == 0 */
-            b0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_DROP];
-           next0 = 
-              vec_elt (im->discover_neighbor_next_index_by_hw_if_index, 
-                       hw_if0->hw_if_index);
+           next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX;
 
            vlib_set_next_frame_buffer (vm, node, next0, bi0);
          }
@@ -1969,31 +1966,10 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = {
   .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
   .next_nodes = {
     [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
+    [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
   },
 };
 
-clib_error_t *
-ip6_discover_neighbor_hw_interface_link_up_down (vnet_main_t * vnm,
-                                                 u32 hw_if_index,
-                                                 u32 flags)
-{
-  vlib_main_t * vm = vnm->vlib_main;
-  ip6_main_t * im = &ip6_main;
-  vnet_hw_interface_t * hw_if;
-
-  hw_if = vnet_get_hw_interface (vnm, hw_if_index);
-
-  vec_validate_init_empty 
-    (im->discover_neighbor_next_index_by_hw_if_index, hw_if_index, 0);
-  im->discover_neighbor_next_index_by_hw_if_index[hw_if_index]
-    = vlib_node_add_next (vm, ip6_discover_neighbor_node.index,
-                          hw_if->output_node_index);
-  return 0;
-}
-
-VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION 
-(ip6_discover_neighbor_hw_interface_link_up_down);
-
 clib_error_t *
 ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
 {
index a42fcae..3d3d51f 100644 (file)
 #include <vppinfra/cache.h>
 
 extern clib_error_t *
-ethernet_arp_hw_interface_link_up_down (vnet_main_t * vnm,
-                                        u32 hw_if_index,
-                                        u32 flags);
-
-extern clib_error_t *
-ip6_discover_neighbor_hw_interface_link_up_down (vnet_main_t * vnm,
-                                                 u32 hw_if_index,
-                                                 u32 flags);
+vnet_per_buffer_interface_output_hw_interface_add_del (
+    vnet_main_t * vnm, u32 hw_if_index, u32 is_create);
 
 // Feature graph node names
 static char * l2input_feat_names[] = {
@@ -567,9 +561,9 @@ u32 set_int_l2_mode (vlib_main_t * vm,
       mac = *((u64 *)hi->hw_address);
       l2fib_del_entry (mac, config->bd_index);
 
-      // Let ARP and NDP know that the output node index changed
-      ethernet_arp_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0);
-      ip6_discover_neighbor_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0);
+      // Let interface-output node know that the output node index changed
+      vnet_per_buffer_interface_output_hw_interface_add_del(
+         vnet_main, hi->hw_if_index, 0);
     } 
     l2_if_adjust--;
   } else if (config->xconnect) {
@@ -653,10 +647,10 @@ u32 set_int_l2_mode (vlib_main_t * vm,
         // Disable learning by default. no use since l2fib entry is static.
         config->feature_bitmap &= ~L2INPUT_FEAT_LEARN;
 
-        // Let ARP and NDP know that the output_index_node changed so they
-        // can send requests via BVI to BD
-        ethernet_arp_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0);
-        ip6_discover_neighbor_hw_interface_link_up_down(vnet_main, hi->hw_if_index, 0);
+       // Let interface-output node know that the output node index changed
+        // so output can be sent via BVI to BD
+       vnet_per_buffer_interface_output_hw_interface_add_del(
+           vnet_main, hi->hw_if_index, 0);
       }
 
       // Add interface to bridge-domain flood vector