vcl: RX event may lost when accept session repeatedly
[vpp.git] / src / vnet / ethernet / arp.c
index 724cfa8..6b1069e 100644 (file)
@@ -29,6 +29,8 @@
 #include <vnet/mpls/mpls.h>
 #include <vnet/l2/feat_bitmap.h>
 
+#include <vlibmemory/api.h>
+
 /**
  * @file
  * @brief IPv4 ARP.
@@ -38,8 +40,6 @@
  */
 
 
-void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
-
 /**
  * @brief Per-interface ARP configuration and state
  */
@@ -50,6 +50,14 @@ typedef struct ethernet_arp_interface_t_
    * Since this hash table is per-interface, the key is only the IPv4 address.
    */
   uword *arp_entries;
+  /**
+   * Is ARP enabled on this interface
+   */
+  u32 enabled;
+  /**
+   * Is Proxy ARP enabled on this interface
+   */
+  u32 proxy_enabled;
 } ethernet_arp_interface_t;
 
 typedef struct
@@ -97,6 +105,9 @@ typedef struct
 
   uword wc_ip4_arp_publisher_node;
   uword wc_ip4_arp_publisher_et;
+
+  /* ARP feature arc index */
+  u8 feature_arc_index;
 } ethernet_arp_main_t;
 
 static ethernet_arp_main_t ethernet_arp_main;
@@ -253,7 +264,6 @@ format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
   vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
   ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
   vnet_sw_interface_t *si;
-  u8 *flags = 0;
 
   if (!e)
     return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
@@ -261,24 +271,12 @@ format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
 
   si = vnet_get_sw_interface (vnm, e->sw_if_index);
 
-  if (e->flags & IP_NEIGHBOR_FLAG_STATIC)
-    flags = format (flags, "S");
-
-  if (e->flags & IP_NEIGHBOR_FLAG_DYNAMIC)
-    flags = format (flags, "D");
-
-  if (e->flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY)
-    flags = format (flags, "N");
-
-  s = format (s, "%=12U%=16U%=6s%=20U%U",
-             format_vlib_time, vnm->vlib_main, e->time_last_updated,
-             format_ip4_address, &e->ip4_address,
-             flags ? (char *) flags : "",
-             format_mac_address_t, &e->mac,
-             format_vnet_sw_interface_name, vnm, si);
-
-  vec_free (flags);
-  return s;
+  return format (s, "%=12U%=16U%=6U%=20U%U",
+                format_vlib_time, vnm->vlib_main, e->time_last_updated,
+                format_ip4_address, &e->ip4_address,
+                format_ip_neighbor_flags, e->flags,
+                format_mac_address_t, &e->mac,
+                format_vnet_sw_interface_name, vnm, si);
 }
 
 typedef struct
@@ -442,6 +440,81 @@ arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
   return (ADJ_WALK_RC_CONTINUE);
 }
 
+static int
+arp_is_enabled (ethernet_arp_main_t * am, u32 sw_if_index)
+{
+  if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
+    return 0;
+
+  return (am->ethernet_arp_by_sw_if_index[sw_if_index].enabled);
+}
+
+static void
+arp_enable (ethernet_arp_main_t * am, u32 sw_if_index)
+{
+  if (arp_is_enabled (am, sw_if_index))
+    return;
+
+  vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+
+  am->ethernet_arp_by_sw_if_index[sw_if_index].enabled = 1;
+
+  vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 1, NULL, 0);
+  vnet_feature_enable_disable ("arp", "arp-disabled", sw_if_index, 0, NULL,
+                              0);
+}
+
+static int
+vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
+                                          vnet_arp_set_ip4_over_ethernet_rpc_args_t
+                                          * args);
+
+static void
+arp_disable (ethernet_arp_main_t * am, u32 sw_if_index)
+{
+  ethernet_arp_interface_t *eai;
+  ethernet_arp_ip4_entry_t *e;
+  u32 i, *to_delete = 0;
+  hash_pair_t *pair;
+
+  if (!arp_is_enabled (am, sw_if_index))
+    return;
+
+  vnet_feature_enable_disable ("arp", "arp-disabled", sw_if_index, 1, NULL,
+                              0);
+  vnet_feature_enable_disable ("arp", "arp-reply", sw_if_index, 0, NULL, 0);
+
+  eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+
+  /* *INDENT-OFF* */
+  hash_foreach_pair (pair, eai->arp_entries,
+  ({
+    e = pool_elt_at_index(am->ip4_entry_pool,
+                          pair->value[0]);
+    vec_add1 (to_delete, e - am->ip4_entry_pool);
+  }));
+  /* *INDENT-ON* */
+
+  for (i = 0; i < vec_len (to_delete); i++)
+    {
+      e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
+
+      vnet_arp_set_ip4_over_ethernet_rpc_args_t delme = {
+       .ip4.as_u32 = e->ip4_address.as_u32,
+       .sw_if_index = e->sw_if_index,
+       .flags = ETHERNET_ARP_ARGS_FLUSH,
+      };
+      mac_address_copy (&delme.mac, &e->mac);
+
+      vnet_arp_flush_ip4_over_ethernet_internal (vnet_get_main (), &delme);
+    }
+
+  vec_free (to_delete);
+
+  eai->enabled = 0;
+}
+
 void
 arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
 {
@@ -452,7 +525,7 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
 
   adj = adj_get (ai);
 
-  vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+  arp_enable (am, sw_if_index);
   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
   e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
 
@@ -520,7 +593,7 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
         * Complete the remaining fields of the adj's rewrite to direct the
         * complete of the rewrite at switch time by copying in the IP
         * dst address's bytes.
-        * Ofset is 2 bytes into the MAC desintation address.
+        * Offset is 2 bytes into the MAC destination address.
         */
        adj_mcast_update_rewrite (ai, rewrite, offset);
 
@@ -627,7 +700,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
   ethernet_arp_interface_t *arp_int;
   u32 sw_if_index = args->sw_if_index;
 
-  vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+  arp_enable (am, sw_if_index);
 
   arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
 
@@ -871,10 +944,10 @@ vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
 /* Either we drop the packet or we send a reply to the sender. */
 typedef enum
 {
-  ARP_INPUT_NEXT_DROP,
-  ARP_INPUT_NEXT_REPLY_TX,
-  ARP_INPUT_N_NEXT,
-} arp_input_next_t;
+  ARP_REPLY_NEXT_DROP,
+  ARP_REPLY_NEXT_REPLY_TX,
+  ARP_REPLY_N_NEXT,
+} arp_reply_next_t;
 
 #define foreach_ethernet_arp_error                                     \
   _ (replies_sent, "ARP replies sent")                                 \
@@ -900,7 +973,7 @@ typedef enum
   foreach_ethernet_arp_error
 #undef _
     ETHERNET_ARP_N_ERROR,
-} ethernet_arp_input_error_t;
+} ethernet_arp_reply_error_t;
 
 static int
 arp_unnumbered (vlib_buffer_t * p0,
@@ -936,14 +1009,18 @@ arp_learn (vnet_main_t * vnm,
   return (ETHERNET_ARP_ERROR_l3_src_address_learned);
 }
 
+typedef enum arp_input_next_t_
+{
+  ARP_INPUT_NEXT_DROP,
+  ARP_INPUT_NEXT_DISABLED,
+  ARP_INPUT_N_NEXT,
+} arp_input_next_t;
+
 static uword
 arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
+  u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
   ethernet_arp_main_t *am = &ethernet_arp_main;
-  vnet_main_t *vnm = vnet_get_main ();
-  ip4_main_t *im4 = &ip4_main;
-  u32 n_left_from, next_index, *from, *to_next;
-  u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -956,43 +1033,26 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
   while (n_left_from > 0)
     {
-      u32 n_left_to_next;
-
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
       while (n_left_from > 0 && n_left_to_next > 0)
        {
+         const ethernet_arp_header_t *arp0;
+         arp_input_next_t next0;
          vlib_buffer_t *p0;
-         vnet_hw_interface_t *hw_if0;
-         ethernet_arp_header_t *arp0;
-         ethernet_header_t *eth_rx, *eth_tx;
-         const ip4_address_t *if_addr0;
-         ip4_address_t proxy_src;
-         u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
-         u8 is_request0, dst_is_local0, is_unnum0, is_vrrp_reply0;
-         ethernet_proxy_arp_t *pa;
-         fib_node_index_t dst_fei, src_fei;
-         const fib_prefix_t *pfx0;
-         fib_entry_flag_t src_flags, dst_flags;
-         u8 *rewrite0, rewrite0_len;
+         u32 pi0, error0;
 
-         pi0 = from[0];
-         to_next[0] = pi0;
+         pi0 = to_next[0] = from[0];
          from += 1;
          to_next += 1;
          n_left_from -= 1;
          n_left_to_next -= 1;
-         pa = 0;
 
          p0 = vlib_get_buffer (vm, pi0);
          arp0 = vlib_buffer_get_current (p0);
-         /* Fill in ethernet header. */
-         eth_rx = ethernet_buffer_get_header (p0);
-
-         is_request0 = arp0->opcode
-           == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
 
          error0 = ETHERNET_ARP_ERROR_replies_sent;
+         next0 = ARP_INPUT_NEXT_DROP;
 
          error0 =
            (arp0->l2_type !=
@@ -1006,33 +1066,239 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
            (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
             ETHERNET_ARP_ERROR_l3_dst_address_unset : error0);
 
-         sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+         if (ETHERNET_ARP_ERROR_replies_sent == error0)
+           {
+             next0 = ARP_INPUT_NEXT_DISABLED;
+             vnet_feature_arc_start (am->feature_arc_index,
+                                     vnet_buffer (p0)->sw_if_index[VLIB_RX],
+                                     &next0, p0);
+           }
+         else
+           p0->error = node->errors[error0];
 
-         /* not playing the ARP game if the interface is not IPv4 enabled */
-         error0 =
-           (im4->ip_enabled_by_sw_if_index[sw_if_index0] == 0 ?
-            ETHERNET_ARP_ERROR_interface_not_ip_enabled : error0);
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, pi0, next0);
+       }
 
-         if (error0)
-           goto drop2;
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+typedef enum arp_disabled_next_t_
+{
+  ARP_DISABLED_NEXT_DROP,
+  ARP_DISABLED_N_NEXT,
+} arp_disabled_next_t;
+
+#define foreach_arp_disabled_error                                     \
+  _ (DISABLED, "ARP Disabled on this interface")                    \
+
+typedef enum
+{
+#define _(sym,string) ARP_DISABLED_ERROR_##sym,
+  foreach_arp_disabled_error
+#undef _
+    ARP_DISABLED_N_ERROR,
+} arp_disabled_error_t;
+
+static char *arp_disabled_error_strings[] = {
+#define _(sym,string) string,
+  foreach_arp_disabled_error
+#undef _
+};
+
+static uword
+arp_disabled (vlib_main_t * vm,
+             vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  if (node->flags & VLIB_NODE_FLAG_TRACE)
+    vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+                                  /* stride */ 1,
+                                  sizeof (ethernet_arp_input_trace_t));
+
+  while (n_left_from > 0)
+    {
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         arp_disabled_next_t next0 = ARP_DISABLED_NEXT_DROP;
+         vlib_buffer_t *p0;
+         u32 pi0, error0;
+
+         next0 = ARP_DISABLED_NEXT_DROP;
+         error0 = ARP_DISABLED_ERROR_DISABLED;
+
+         pi0 = to_next[0] = from[0];
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         p0 = vlib_get_buffer (vm, pi0);
+         p0->error = node->errors[error0];
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, pi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+static_always_inline u32
+arp_mk_reply (vnet_main_t * vnm,
+             vlib_buffer_t * p0,
+             u32 sw_if_index0,
+             const ip4_address_t * if_addr0,
+             ethernet_arp_header_t * arp0, ethernet_header_t * eth_rx)
+{
+  vnet_hw_interface_t *hw_if0;
+  u8 *rewrite0, rewrite0_len;
+  ethernet_header_t *eth_tx;
+  u32 next0;
+
+  /* Send a reply.
+     An adjacency to the sender is not always present,
+     so we use the interface to build us a rewrite string
+     which will contain all the necessary tags. */
+  rewrite0 = ethernet_build_rewrite (vnm, sw_if_index0,
+                                    VNET_LINK_ARP, eth_rx->src_address);
+  rewrite0_len = vec_len (rewrite0);
+
+  /* Figure out how much to rewind current data from adjacency. */
+  vlib_buffer_advance (p0, -rewrite0_len);
+  eth_tx = vlib_buffer_get_current (p0);
+
+  vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+  hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+  /* Send reply back through input interface */
+  vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+  next0 = ARP_REPLY_NEXT_REPLY_TX;
+
+  arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
+
+  arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
+
+  mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac,
+                         hw_if0->hw_address);
+  clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
+    if_addr0->data_u32;
+
+  /* Hardware must be ethernet-like. */
+  ASSERT (vec_len (hw_if0->hw_address) == 6);
+
+  /* the rx nd tx ethernet headers wil overlap in the case
+   * when we received a tagged VLAN=0 packet, but we are sending
+   * back untagged */
+  clib_memcpy_fast (eth_tx, rewrite0, vec_len (rewrite0));
+  vec_free (rewrite0);
+
+  return (next0);
+}
+
+enum arp_dst_fib_type
+{
+  ARP_DST_FIB_NONE,
+  ARP_DST_FIB_ADJ,
+  ARP_DST_FIB_CONN
+};
+
+/*
+ * we're looking for FIB sources that indicate the destination
+ * is attached. There may be interposed DPO prior to the one
+ * we are looking for
+ */
+static enum arp_dst_fib_type
+arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
+{
+  const fib_entry_t *entry = fib_entry_get (fei);
+  const fib_entry_src_t *entry_src;
+  fib_source_t src;
+  /* *INDENT-OFF* */
+  FOR_EACH_SRC_ADDED(entry, entry_src, src,
+  ({
+    *flags = fib_entry_get_flags_for_source (fei, src);
+    if (fib_entry_is_sourced (fei, FIB_SOURCE_ADJ))
+        return ARP_DST_FIB_ADJ;
+      else if (FIB_ENTRY_FLAG_CONNECTED & *flags)
+        return ARP_DST_FIB_CONN;
+  }))
+  /* *INDENT-ON* */
+
+  return ARP_DST_FIB_NONE;
+}
+
+static uword
+arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  ethernet_arp_main_t *am = &ethernet_arp_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 n_replies_sent = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  if (node->flags & VLIB_NODE_FLAG_TRACE)
+    vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+                                  /* stride */ 1,
+                                  sizeof (ethernet_arp_input_trace_t));
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         vlib_buffer_t *p0;
+         ethernet_arp_header_t *arp0;
+         ethernet_header_t *eth_rx;
+         const ip4_address_t *if_addr0;
+         u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
+         u8 dst_is_local0, is_vrrp_reply0;
+         fib_node_index_t dst_fei, src_fei;
+         const fib_prefix_t *pfx0;
+         fib_entry_flag_t src_flags, dst_flags;
+
+         pi0 = from[0];
+         to_next[0] = pi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         p0 = vlib_get_buffer (vm, pi0);
+         arp0 = vlib_buffer_get_current (p0);
+         /* Fill in ethernet header. */
+         eth_rx = ethernet_buffer_get_header (p0);
+
+         next0 = ARP_REPLY_NEXT_DROP;
+         error0 = ETHERNET_ARP_ERROR_replies_sent;
+         sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
 
          /* Check that IP address is local and matches incoming interface. */
          fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
          if (~0 == fib_index0)
            {
              error0 = ETHERNET_ARP_ERROR_interface_no_table;
-             goto drop2;
+             goto drop;
 
            }
-         dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
-                                         &arp0->ip4_over_ethernet[1].ip4,
-                                         32);
-         dst_flags = fib_entry_get_flags (dst_fei);
-
-         conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei);
-
-         /* Honor unnumbered interface, if any */
-         is_unnum0 = sw_if_index0 != conn_sw_if_index0;
 
          {
            /*
@@ -1086,7 +1352,7 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
                        * So don't drop immediately here, instead go see if this
                        * is a proxy ARP case.
                        */
-                      goto drop1;
+                      goto next_feature;
                     }
                   /* A Source must also be local to subnet of matching
                    * interface address. */
@@ -1131,12 +1397,16 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
                 * (i.e. a /32)
                 */
                error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
-               goto drop2;
+               goto drop;
              }
          }
 
-         if (fib_entry_is_sourced (dst_fei, FIB_SOURCE_ADJ))
+         dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+                                         &arp0->ip4_over_ethernet[1].ip4,
+                                         32);
+         switch (arp_dst_fib_check (dst_fei, &dst_flags))
            {
+           case ARP_DST_FIB_ADJ:
              /*
               * We matched an adj-fib on ths source subnet (a /32 previously
               * added as a result of ARP). If this request is a gratuitous
@@ -1149,22 +1419,14 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
                  arp0->ip4_over_ethernet[1].ip4.as_u32)
                error0 = arp_learn (vnm, am, sw_if_index0,
                                    &arp0->ip4_over_ethernet[0]);
-             goto drop2;
-           }
-         else if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
-           {
+             goto drop;
+           case ARP_DST_FIB_CONN:
+             /* destination is connected, continue to process */
+             break;
+           case ARP_DST_FIB_NONE:
+             /* destination is not connected, stop here */
              error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
-             goto drop1;
-           }
-
-         if (sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
-           {
-             /*
-              * The interface the ARP was received on is not the interface
-              * on which the covering prefix is configured. Maybe this is a
-              * case for unnumbered.
-              */
-             is_unnum0 = 1;
+             goto next_feature;
            }
 
          dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
@@ -1187,7 +1449,7 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
               arp0->ip4_over_ethernet[0].mac.bytes) && !is_vrrp_reply0)
            {
              error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
-             goto drop2;
+             goto drop;
            }
 
          /* Learn or update sender's mapping only for replies to addresses
@@ -1204,65 +1466,40 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
                 * we drop */
                error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
 
-             goto drop1;
+             goto next_feature;
            }
          else if (arp0->opcode ==
                   clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request) &&
                   (dst_is_local0 == 0))
            {
-             goto drop1;
+             goto next_feature;
            }
 
-       send_reply:
-         /* Send a reply.
-            An adjacency to the sender is not always present,
-            so we use the interface to build us a rewrite string
-            which will contain all the necessary tags. */
-         rewrite0 = ethernet_build_rewrite (vnm, sw_if_index0,
-                                            VNET_LINK_ARP,
-                                            eth_rx->src_address);
-         rewrite0_len = vec_len (rewrite0);
-
-         /* Figure out how much to rewind current data from adjacency. */
-         vlib_buffer_advance (p0, -rewrite0_len);
-         eth_tx = vlib_buffer_get_current (p0);
-
-         vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
-         hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
-
-         /* Send reply back through input interface */
-         vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
-         next0 = ARP_INPUT_NEXT_REPLY_TX;
-
-         arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
-
-         arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
-
-         mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac,
-                                 hw_if0->hw_address);
-         clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
-           if_addr0->data_u32;
-
-         /* Hardware must be ethernet-like. */
-         ASSERT (vec_len (hw_if0->hw_address) == 6);
-
-         /* the rx nd tx ethernet headers wil overlap in the case
-          * when we received a tagged VLAN=0 packet, but we are sending
-          * back untagged */
-         clib_memcpy_fast (eth_tx, rewrite0, vec_len (rewrite0));
-         vec_free (rewrite0);
-
-         if (NULL == pa)
+         /* Honor unnumbered interface, if any */
+         conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei);
+         if (sw_if_index0 != conn_sw_if_index0 ||
+             sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
            {
-             if (is_unnum0)
+             /*
+              * The interface the ARP is sent to or was received on is not the
+              * interface on which the covering prefix is configured.
+              * Maybe this is a case for unnumbered.
+              */
+             if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
                {
-                 if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
-                   {
-                     error0 = ETHERNET_ARP_ERROR_unnumbered_mismatch;
-                     goto drop2;
-                   }
+                 error0 = ETHERNET_ARP_ERROR_unnumbered_mismatch;
+                 goto drop;
                }
            }
+         if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
+             arp0->ip4_over_ethernet[1].ip4.as_u32)
+           {
+             error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
+             goto drop;
+           }
+
+         next0 = arp_mk_reply (vnm, p0, sw_if_index0,
+                               if_addr0, arp0, eth_rx);
 
          /* We are going to reply to this request, so, in the absence of
             errors, learn the sender */
@@ -1270,34 +1507,92 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
            error0 = arp_learn (vnm, am, sw_if_index0,
                                &arp0->ip4_over_ethernet[1]);
 
+         n_replies_sent += 1;
+         goto enqueue;
+
+       next_feature:
+         vnet_feature_next (&next0, p0);
+         goto enqueue;
+
+       drop:
+         p0->error = node->errors[error0];
+
+       enqueue:
          vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
                                           n_left_to_next, pi0, next0);
+       }
 
-         n_replies_sent += 1;
-         continue;
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
 
-       drop1:
-         if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
-             arp0->ip4_over_ethernet[1].ip4.as_u32)
+  vlib_error_count (vm, node->node_index,
+                   ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
+
+  return frame->n_vectors;
+}
+
+static uword
+arp_proxy (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  ethernet_arp_main_t *am = &ethernet_arp_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 n_arp_replies_sent = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  if (node->flags & VLIB_NODE_FLAG_TRACE)
+    vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+                                  /* stride */ 1,
+                                  sizeof (ethernet_arp_input_trace_t));
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         vlib_buffer_t *p0;
+         ethernet_arp_header_t *arp0;
+         ethernet_header_t *eth_rx;
+         ip4_address_t proxy_src;
+         u32 pi0, error0, next0, sw_if_index0, fib_index0;
+         u8 is_request0;
+         ethernet_proxy_arp_t *pa;
+
+         pi0 = from[0];
+         to_next[0] = pi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         p0 = vlib_get_buffer (vm, pi0);
+         arp0 = vlib_buffer_get_current (p0);
+         /* Fill in ethernet header. */
+         eth_rx = ethernet_buffer_get_header (p0);
+
+         is_request0 = arp0->opcode
+           == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
+
+         error0 = ETHERNET_ARP_ERROR_replies_sent;
+         sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+         next0 = ARP_REPLY_NEXT_DROP;
+
+         fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+         if (~0 == fib_index0)
            {
-             error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
-             goto drop2;
+             error0 = ETHERNET_ARP_ERROR_interface_no_table;
            }
-         /* See if proxy arp is configured for the address */
-         if (is_request0)
+
+         if (0 == error0 && is_request0)
            {
-             vnet_sw_interface_t *si;
              u32 this_addr = clib_net_to_host_u32
                (arp0->ip4_over_ethernet[1].ip4.as_u32);
-             u32 fib_index0;
-
-             si = vnet_get_sw_interface (vnm, sw_if_index0);
-
-             if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
-               goto drop2;
-
-             fib_index0 = vec_elt (im4->fib_index_by_sw_if_index,
-                                   sw_if_index0);
 
              vec_foreach (pa, am->proxy_arps)
              {
@@ -1314,18 +1609,18 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
                    /*
                     * change the interface address to the proxied
                     */
-                   if_addr0 = &proxy_src;
-                   is_unnum0 = 0;
-                   n_proxy_arp_replies_sent++;
-                   goto send_reply;
+                   n_arp_replies_sent++;
+
+                   next0 =
+                     arp_mk_reply (vnm, p0, sw_if_index0, &proxy_src, arp0,
+                                   eth_rx);
                  }
              }
            }
-
-       drop2:
-
-         next0 = ARP_INPUT_NEXT_DROP;
-         p0->error = node->errors[error0];
+         else
+           {
+             p0->error = node->errors[error0];
+           }
 
          vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
                                           n_left_to_next, pi0, next0);
@@ -1335,12 +1630,8 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
     }
 
   vlib_error_count (vm, node->node_index,
-                   ETHERNET_ARP_ERROR_replies_sent,
-                   n_replies_sent - n_proxy_arp_replies_sent);
+                   ETHERNET_ARP_ERROR_replies_sent, n_arp_replies_sent);
 
-  vlib_error_count (vm, node->node_index,
-                   ETHERNET_ARP_ERROR_proxy_arp_replies_sent,
-                   n_proxy_arp_replies_sent);
   return frame->n_vectors;
 }
 
@@ -1351,6 +1642,7 @@ static char *ethernet_arp_error_strings[] = {
 };
 
 /* *INDENT-OFF* */
+
 VLIB_REGISTER_NODE (arp_input_node, static) =
 {
   .function = arp_input,
@@ -1361,11 +1653,97 @@ VLIB_REGISTER_NODE (arp_input_node, static) =
   .n_next_nodes = ARP_INPUT_N_NEXT,
   .next_nodes = {
     [ARP_INPUT_NEXT_DROP] = "error-drop",
-    [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
+    [ARP_INPUT_NEXT_DISABLED] = "arp-disabled",
+  },
+  .format_buffer = format_ethernet_arp_header,
+  .format_trace = format_ethernet_arp_input_trace,
+};
+
+VLIB_REGISTER_NODE (arp_disabled_node, static) =
+{
+  .function = arp_disabled,
+  .name = "arp-disabled",
+  .vector_size = sizeof (u32),
+  .n_errors = ARP_DISABLED_N_ERROR,
+  .error_strings = arp_disabled_error_strings,
+  .n_next_nodes = ARP_DISABLED_N_NEXT,
+  .next_nodes = {
+    [ARP_INPUT_NEXT_DROP] = "error-drop",
+  },
+  .format_buffer = format_ethernet_arp_header,
+  .format_trace = format_ethernet_arp_input_trace,
+};
+
+VLIB_REGISTER_NODE (arp_reply_node, static) =
+{
+  .function = arp_reply,
+  .name = "arp-reply",
+  .vector_size = sizeof (u32),
+  .n_errors = ETHERNET_ARP_N_ERROR,
+  .error_strings = ethernet_arp_error_strings,
+  .n_next_nodes = ARP_REPLY_N_NEXT,
+  .next_nodes = {
+    [ARP_REPLY_NEXT_DROP] = "error-drop",
+    [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
   },
   .format_buffer = format_ethernet_arp_header,
   .format_trace = format_ethernet_arp_input_trace,
 };
+
+VLIB_REGISTER_NODE (arp_proxy_node, static) =
+{
+  .function = arp_proxy,
+  .name = "arp-proxy",
+  .vector_size = sizeof (u32),
+  .n_errors = ETHERNET_ARP_N_ERROR,
+  .error_strings = ethernet_arp_error_strings,
+  .n_next_nodes = ARP_REPLY_N_NEXT,
+  .next_nodes = {
+    [ARP_REPLY_NEXT_DROP] = "error-drop",
+    [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
+  },
+  .format_buffer = format_ethernet_arp_header,
+  .format_trace = format_ethernet_arp_input_trace,
+};
+
+/* Built-in ARP rx feature path definition */
+VNET_FEATURE_ARC_INIT (arp_feat, static) =
+{
+  .arc_name = "arp",
+  .start_nodes = VNET_FEATURES ("arp-input"),
+  .last_in_arc = "error-drop",
+  .arc_index_ptr = &ethernet_arp_main.feature_arc_index,
+};
+
+VNET_FEATURE_INIT (arp_reply_feat_node, static) =
+{
+  .arc_name = "arp",
+  .node_name = "arp-reply",
+  .runs_before = VNET_FEATURES ("arp-disabled"),
+};
+
+VNET_FEATURE_INIT (arp_proxy_feat_node, static) =
+{
+  .arc_name = "arp",
+  .node_name = "arp-proxy",
+  .runs_after = VNET_FEATURES ("arp-reply"),
+  .runs_before = VNET_FEATURES ("arp-disabled"),
+};
+
+VNET_FEATURE_INIT (arp_disabled_feat_node, static) =
+{
+  .arc_name = "arp",
+  .node_name = "arp-disabled",
+  .runs_before = VNET_FEATURES ("error-drop"),
+};
+
+VNET_FEATURE_INIT (arp_drop_feat_node, static) =
+{
+  .arc_name = "arp",
+  .node_name = "error-drop",
+  .runs_before = 0,    /* last feature */
+};
+
 /* *INDENT-ON* */
 
 static int
@@ -1570,7 +1948,7 @@ vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
 
 /**
  * @brief publish wildcard arp event
- * @param sw_if_index The interface on which the ARP entires are acted
+ * @param sw_if_index The interface on which the ARP entries are acted
  */
 static int
 vnet_arp_wc_publish (u32 sw_if_index,
@@ -1658,6 +2036,23 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
   return (0);
 }
 
+/*
+ * arp_add_del_interface_address
+ *
+ * callback when an interface address is added or deleted
+ */
+static void
+arp_enable_disable_interface (ip4_main_t * im,
+                             uword opaque, u32 sw_if_index, u32 is_enable)
+{
+  ethernet_arp_main_t *am = &ethernet_arp_main;
+
+  if (is_enable)
+    arp_enable (am, sw_if_index);
+  else
+    arp_disable (am, sw_if_index);
+}
+
 /*
  * arp_add_del_interface_address
  *
@@ -1761,12 +2156,8 @@ ethernet_arp_init (vlib_main_t * vm)
 {
   ethernet_arp_main_t *am = &ethernet_arp_main;
   ip4_main_t *im = &ip4_main;
-  clib_error_t *error;
   pg_node_t *pn;
 
-  if ((error = vlib_call_init_function (vm, ethernet_init)))
-    return error;
-
   ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
 
   pn = pg_get_node (arp_input_node.index);
@@ -1802,6 +2193,11 @@ ethernet_arp_init (vlib_main_t * vm)
   cb.function_opaque = 0;
   vec_add1 (im->add_del_interface_address_callbacks, cb);
 
+  ip4_enable_disable_interface_callback_t cbe;
+  cbe.function = arp_enable_disable_interface;
+  cbe.function_opaque = 0;
+  vec_add1 (im->enable_disable_interface_callbacks, cbe);
+
   ip4_table_bind_callback_t cbt;
   cbt.function = arp_table_bind;
   cbt.function_opaque = 0;
@@ -1809,8 +2205,12 @@ ethernet_arp_init (vlib_main_t * vm)
 
   return 0;
 }
-
-VLIB_INIT_FUNCTION (ethernet_arp_init);
+/* *INDENT-OFF* */
+VLIB_INIT_FUNCTION (ethernet_arp_init) =
+{
+  .runs_after = VLIB_INITS("ethernet_init"),
+};
+/* *INDENT-ON* */
 
 static void
 arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
@@ -1859,7 +2259,7 @@ vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
   ethernet_arp_ip4_entry_t *e;
   ethernet_arp_interface_t *eai;
 
-  vec_validate (am->ethernet_arp_by_sw_if_index, args->sw_if_index);
+  arp_enable (am, args->sw_if_index);
   eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
 
   e = arp_entry_find (eai, &args->ip4);
@@ -1994,6 +2394,38 @@ proxy_arp_walk (proxy_arp_walk_t cb, void *data)
   }
 }
 
+int
+vnet_proxy_arp_enable_disable (vnet_main_t * vnm, u32 sw_if_index, u8 enable)
+{
+  ethernet_arp_main_t *am = &ethernet_arp_main;
+  ethernet_arp_interface_t *eai;
+
+  vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+
+  eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+  if (enable)
+    {
+      if (!eai->proxy_enabled)
+       {
+         vnet_feature_enable_disable ("arp", "arp-proxy",
+                                      sw_if_index, 1, NULL, 0);
+       }
+      eai->proxy_enabled = 1;
+    }
+  else
+    {
+      if (eai->proxy_enabled)
+       {
+         vnet_feature_enable_disable ("arp", "arp-proxy",
+                                      sw_if_index, 0, NULL, 0);
+       }
+      eai->proxy_enabled = 0;
+    }
+
+  return (0);
+}
+
 int
 vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
                        ip4_address_t * hi_addr, u32 fib_index, int is_del)
@@ -2031,39 +2463,17 @@ vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
   return 0;
 }
 
-/*
- * Remove any proxy arp entries asdociated with the
- * specificed fib.
- */
-int
-vnet_proxy_arp_fib_reset (u32 fib_id)
+void
+proxy_arp_intfc_walk (proxy_arp_intf_walk_t cb, void *data)
 {
   ethernet_arp_main_t *am = &ethernet_arp_main;
-  ethernet_proxy_arp_t *pa;
-  u32 *entries_to_delete = 0;
-  u32 fib_index;
-  int i;
-
-  fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
-  if (~0 == fib_index)
-    return VNET_API_ERROR_NO_SUCH_ENTRY;
+  ethernet_arp_interface_t *eai;
 
-  vec_foreach (pa, am->proxy_arps)
+  vec_foreach (eai, am->ethernet_arp_by_sw_if_index)
   {
-    if (pa->fib_index == fib_index)
-      {
-       vec_add1 (entries_to_delete, pa - am->proxy_arps);
-      }
+    if (eai->proxy_enabled)
+      cb (eai - am->ethernet_arp_by_sw_if_index, data);
   }
-
-  for (i = 0; i < vec_len (entries_to_delete); i++)
-    {
-      vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
-    }
-
-  vec_free (entries_to_delete);
-
-  return 0;
 }
 
 static clib_error_t *
@@ -2209,15 +2619,15 @@ set_int_proxy_arp_command_fn (vlib_main_t * vm,
 {
   vnet_main_t *vnm = vnet_get_main ();
   u32 sw_if_index;
-  vnet_sw_interface_t *si;
   int enable = 0;
-  int intfc_set = 0;
+
+  sw_if_index = ~0;
 
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
       if (unformat (input, "%U", unformat_vnet_sw_interface,
                    vnm, &sw_if_index))
-       intfc_set = 1;
+       ;
       else if (unformat (input, "enable") || unformat (input, "on"))
        enable = 1;
       else if (unformat (input, "disable") || unformat (input, "off"))
@@ -2226,16 +2636,11 @@ set_int_proxy_arp_command_fn (vlib_main_t * vm,
        break;
     }
 
-  if (intfc_set == 0)
+  if (~0 == sw_if_index)
     return clib_error_return (0, "unknown input '%U'",
                              format_unformat_error, input);
 
-  si = vnet_get_sw_interface (vnm, sw_if_index);
-  ASSERT (si);
-  if (enable)
-    si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
-  else
-    si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
+  vnet_proxy_arp_enable_disable (vnm, sw_if_index, enable);
 
   return 0;
 }
@@ -2614,14 +3019,15 @@ send_ip4_garp_w_addr (vlib_main_t * vm,
 }
 
 /*
- * Remove any arp entries asociated with the specificed interface
+ * Remove any arp entries associated with the specified interface
  */
 static clib_error_t *
 vnet_arp_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
 {
+  ethernet_arp_main_t *am = &ethernet_arp_main;
+
   if (!is_add && sw_if_index != ~0)
     {
-      ethernet_arp_main_t *am = &ethernet_arp_main;
       ethernet_arp_ip4_entry_t *e;
       /* *INDENT-OFF* */
       pool_foreach (e, am->ip4_entry_pool, ({
@@ -2634,6 +3040,12 @@ vnet_arp_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
         vnet_arp_unset_ip4_over_ethernet_internal (vnm, &args);
       }));
       /* *INDENT-ON* */
+      arp_disable (am, sw_if_index);
+    }
+  else if (is_add)
+    {
+      vnet_feature_enable_disable ("arp", "arp-disabled",
+                                  sw_if_index, 1, NULL, 0);
     }
 
   return (NULL);