Gratuitous ARP packet handling
[vpp.git] / src / vnet / ethernet / arp.c
index 49a16f7..37e3e88 100644 (file)
@@ -545,6 +545,62 @@ arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, u32 fib_index)
   fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
 }
 
+void
+arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, u32 fib_index)
+{
+  if (FIB_NODE_INDEX_INVALID != e->fib_entry_index)
+    {
+      fib_prefix_t pfx = {
+       .fp_len = 32,
+       .fp_proto = FIB_PROTOCOL_IP4,
+       .fp_addr.ip4 = e->ip4_address,
+      };
+      u32 fib_index;
+
+      fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
+
+      fib_table_entry_path_remove (fib_index, &pfx,
+                                  FIB_SOURCE_ADJ,
+                                  DPO_PROTO_IP4,
+                                  &pfx.fp_addr,
+                                  e->sw_if_index, ~0, 1,
+                                  FIB_ROUTE_PATH_FLAG_NONE);
+      fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
+    }
+}
+
+static ethernet_arp_ip4_entry_t *
+force_reuse_arp_entry (void)
+{
+  ethernet_arp_ip4_entry_t *e;
+  ethernet_arp_main_t *am = &ethernet_arp_main;
+  u32 count = 0;
+  u32 index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
+  if (index == ~0)             /* Try again from elt 0 */
+    index = pool_next_index (am->ip4_entry_pool, index);
+
+  /* Find a non-static random entry to free up for reuse */
+  do
+    {
+      if ((count++ == 100) || (index == ~0))
+       return NULL;            /* give up after 100 entries */
+      e = pool_elt_at_index (am->ip4_entry_pool, index);
+      am->arp_delete_rotor = index;
+      index = pool_next_index (am->ip4_entry_pool, index);
+    }
+  while (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC);
+
+  /* Remove ARP entry from its interface and update fib */
+  hash_unset
+    (am->ethernet_arp_by_sw_if_index[e->sw_if_index].arp_entries,
+     e->ip4_address.as_u32);
+  arp_adj_fib_remove
+    (e, ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index));
+  adj_nbr_walk_nh4 (e->sw_if_index,
+                   &e->ip4_address, arp_mk_incomplete_walk, NULL);
+  return e;
+}
+
 static int
 vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
                                         vnet_arp_set_ip4_over_ethernet_rpc_args_t
@@ -582,12 +638,18 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
 
   if (make_new_arp_cache_entry)
     {
-      pool_get (am->ip4_entry_pool, e);
-
-      if (NULL == arp_int->arp_entries)
+      if (am->limit_arp_cache_size &&
+         pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
        {
-         arp_int->arp_entries = hash_create (0, sizeof (u32));
+         e = force_reuse_arp_entry ();
+         if (NULL == e)
+           return -2;
        }
+      else
+       pool_get (am->ip4_entry_pool, e);
+
+      if (NULL == arp_int->arp_entries)
+       arp_int->arp_entries = hash_create (0, sizeof (u32));
 
       hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
 
@@ -802,6 +864,7 @@ typedef enum
   _ (l3_type_not_ip4, "L3 type not IP4")                               \
   _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
   _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
+  _ (l3_dst_address_unset, "IP4 destination address is unset")          \
   _ (l3_src_address_is_local, "IP4 source address matches local interface") \
   _ (l3_src_address_learned, "ARP request IP4 source address learned")  \
   _ (replies_received, "ARP replies received")                         \
@@ -820,38 +883,6 @@ typedef enum
     ETHERNET_ARP_N_ERROR,
 } ethernet_arp_input_error_t;
 
-
-static void
-unset_random_arp_entry (void)
-{
-  ethernet_arp_main_t *am = &ethernet_arp_main;
-  ethernet_arp_ip4_entry_t *e;
-  vnet_main_t *vnm = vnet_get_main ();
-  ethernet_arp_ip4_over_ethernet_address_t delme;
-  u32 index;
-
-  index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
-  am->arp_delete_rotor = index;
-
-  /* Try again from elt 0, could happen if an intfc goes down */
-  if (index == ~0)
-    {
-      index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
-      am->arp_delete_rotor = index;
-    }
-
-  /* Nothing left in the pool */
-  if (index == ~0)
-    return;
-
-  e = pool_elt_at_index (am->ip4_entry_pool, index);
-
-  clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
-  delme.ip4.as_u32 = e->ip4_address.as_u32;
-
-  vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
-}
-
 static int
 arp_unnumbered (vlib_buffer_t * p0,
                u32 input_sw_if_index, u32 conn_sw_if_index)
@@ -881,10 +912,6 @@ static u32
 arp_learn (vnet_main_t * vnm,
           ethernet_arp_main_t * am, u32 sw_if_index, void *addr)
 {
-  if (am->limit_arp_cache_size &&
-      pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
-    unset_random_arp_entry ();
-
   vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, addr, 0, 0);
   return (ETHERNET_ARP_ERROR_l3_src_address_learned);
 }
@@ -954,6 +981,9 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
            (arp0->l3_type !=
             clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
             ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
+         error0 =
+           (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
+            ETHERNET_ARP_ERROR_l3_dst_address_unset : error0);
 
          sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
 
@@ -1084,7 +1114,23 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
              }
          }
 
-         if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
+         if (fib_entry_is_sourced (dst_fei, FIB_SOURCE_ADJ))
+           {
+             /*
+              * We matched an adj-fib on ths source subnet (a /32 previously
+              * added as a result of ARP). If this request is a gratuitous
+              * ARP, then learn from it.
+              * The check for matching an adj-fib, is to prevent hosts
+              * from spamming us with gratuitous ARPS that might otherwise
+              * blow our ARP cache
+              */
+             if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
+                 arp0->ip4_over_ethernet[1].ip4.as_u32)
+               error0 = arp_learn (vnm, am, sw_if_index0,
+                                   &arp0->ip4_over_ethernet[0]);
+             goto drop2;
+           }
+         else if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
            {
              error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
              goto drop1;
@@ -1126,11 +1172,17 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          /* Learn or update sender's mapping only for replies to addresses
           * that are local to the subnet */
          if (arp0->opcode ==
-             clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply) &&
-             dst_is_local0)
+             clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
            {
-             error0 = arp_learn (vnm, am, sw_if_index0,
-                                 &arp0->ip4_over_ethernet[0]);
+             if (dst_is_local0)
+               error0 = arp_learn (vnm, am, sw_if_index0,
+                                   &arp0->ip4_over_ethernet[0]);
+             else
+               /* a reply for a non-local destination could be a GARP.
+                * GARPs for hosts we know were handled above, so this one
+                * we drop */
+               error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
+
              goto drop1;
            }
          else if (arp0->opcode ==
@@ -1201,9 +1253,8 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          continue;
 
        drop1:
-         if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
-             (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
-              arp0->ip4_over_ethernet[1].ip4.as_u32))
+         if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
+             arp0->ip4_over_ethernet[1].ip4.as_u32)
            {
              error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
              goto drop2;
@@ -1641,30 +1692,6 @@ arp_add_del_interface_address (ip4_main_t * im,
     }
 }
 
-void
-arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, u32 fib_index)
-{
-  if (FIB_NODE_INDEX_INVALID != e->fib_entry_index)
-    {
-      fib_prefix_t pfx = {
-       .fp_len = 32,
-       .fp_proto = FIB_PROTOCOL_IP4,
-       .fp_addr.ip4 = e->ip4_address,
-      };
-      u32 fib_index;
-
-      fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
-
-      fib_table_entry_path_remove (fib_index, &pfx,
-                                  FIB_SOURCE_ADJ,
-                                  DPO_PROTO_IP4,
-                                  &pfx.fp_addr,
-                                  e->sw_if_index, ~0, 1,
-                                  FIB_ROUTE_PATH_FLAG_NONE);
-      fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
-    }
-}
-
 static void
 arp_table_bind (ip4_main_t * im,
                uword opaque,
@@ -1761,9 +1788,8 @@ arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
 {
   ethernet_arp_main_t *am = &ethernet_arp_main;
 
-  arp_adj_fib_remove (e,
-                     ip4_fib_table_get_index_for_sw_if_index
-                     (e->sw_if_index));
+  arp_adj_fib_remove
+    (e, ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index));
   hash_unset (eai->arp_entries, e->ip4_address.as_u32);
   pool_put (am->ip4_entry_pool, e);
 }
@@ -1786,10 +1812,9 @@ vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
 
   if (NULL != e)
     {
-      arp_entry_free (eai, e);
-
       adj_nbr_walk_nh4 (e->sw_if_index,
                        &e->ip4_address, arp_mk_incomplete_walk, NULL);
+      arp_entry_free (eai, e);
     }
 
   return 0;
@@ -2526,22 +2551,22 @@ ethernet_arp_change_mac (u32 sw_if_index)
 }
 
 void
-send_ip4_garp (vlib_main_t * vm, const vnet_hw_interface_t * hi)
+send_ip4_garp (vlib_main_t * vm, u32 sw_if_index)
 {
   ip4_main_t *i4m = &ip4_main;
-  ip4_address_t *ip4_addr =
-    ip4_interface_first_address (i4m, hi->sw_if_index, 0);
+  ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0);
 
-  send_ip4_garp_w_addr (vm, ip4_addr, hi);
+  send_ip4_garp_w_addr (vm, ip4_addr, sw_if_index);
 }
 
 void
 send_ip4_garp_w_addr (vlib_main_t * vm,
-                     const ip4_address_t * ip4_addr,
-                     const vnet_hw_interface_t * hi)
+                     const ip4_address_t * ip4_addr, u32 sw_if_index)
 {
   ip4_main_t *i4m = &ip4_main;
-  u32 sw_if_index = hi->sw_if_index;
+  vnet_main_t *vnm = vnet_get_main ();
+  u8 *rewrite, rewrite_len;
+  vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
 
   if (ip4_addr)
     {
@@ -2563,11 +2588,14 @@ send_ip4_garp_w_addr (vlib_main_t * vm,
 
       /* Setup MAC header with ARP Etype and broadcast DMAC */
       vlib_buffer_t *b = vlib_get_buffer (vm, bi);
-      vlib_buffer_advance (b, -sizeof (ethernet_header_t));
+      rewrite =
+       ethernet_build_rewrite (vnm, sw_if_index, VNET_LINK_ARP,
+                               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST);
+      rewrite_len = vec_len (rewrite);
+      vlib_buffer_advance (b, -rewrite_len);
       ethernet_header_t *e = vlib_buffer_get_current (b);
-      e->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
-      clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
-      memset (e->dst_address, 0xff, sizeof (e->dst_address));
+      clib_memcpy (e->dst_address, rewrite, rewrite_len);
+      vec_free (rewrite);
 
       /* Send GARP packet out the specified interface */
       vnet_buffer (b)->sw_if_index[VLIB_RX] =