Move CLI examples from wiki to code - VPP-165
[vpp.git] / vnet / vnet / ip / ip4_forward.c
index 6bfe1cf..751260a 100644 (file)
 
 #include <vnet/vnet.h>
 #include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>    /* for ethernet_header_t */
-#include <vnet/ethernet/arp_packet.h>  /* for ethernet_arp_header_t */
+/** for ethernet_header_t */
+#include <vnet/ethernet/ethernet.h>
+/** for ethernet_arp_header_t */
+#include <vnet/ethernet/arp_packet.h>  
 #include <vnet/ppp/ppp.h>
-#include <vnet/srp/srp.h>      /* for srp_hw_interface_class */
-#include <vnet/api_errno.h>     /* for API error numbers */
+/** for srp_hw_interface_class */
+#include <vnet/srp/srp.h>
+/** for API error numbers */
+#include <vnet/api_errno.h>     
+
+/** @file
+    vnet ip4 forwarding
+*/
 
 /* This is really, really simple but stupid fib. */
 u32
@@ -71,7 +79,7 @@ ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
          goto done;
        }
     }
-    
+
   /* Nothing matches in table. */
   ai = lm->miss_adj_index;
 
@@ -79,6 +87,11 @@ ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
   return ai;
 }
 
+/** @brief Create FIB from table ID and init all hashing.
+    @param im - @ref ip4_main_t
+    @param table_id - table ID
+    @return fib - @ref ip4_fib_t
+*/
 static ip4_fib_t *
 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
 {
@@ -87,6 +100,7 @@ create_fib_with_table_id (ip4_main_t * im, u32 table_id)
   vec_add2 (im->fibs, fib, 1);
   fib->table_id = table_id;
   fib->index = fib - im->fibs;
+  /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */
   fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
   fib->fwd_classify_table_index = ~0;
   fib->rev_classify_table_index = ~0;
@@ -94,20 +108,41 @@ create_fib_with_table_id (ip4_main_t * im, u32 table_id)
   return fib;
 }
 
+/** @brief Find existing or Create new FIB based on index
+    @param im @ref ip4_main_t
+    @param table_index_or_id - overloaded parameter referring
+           to the table or a table's index in the FIB vector
+    @param flags - used to check if table_index_or_id was a table or
+           an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX)
+    @return either the existing or a new ip4_fib_t entry
+*/
 ip4_fib_t *
-find_ip4_fib_by_table_index_or_id (ip4_main_t * im, 
+find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
                                    u32 table_index_or_id, u32 flags)
 {
   uword * p, fib_index;
 
   fib_index = table_index_or_id;
+  /* If this isn't a FIB_INDEX ... */
   if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
     {
+      /* If passed ~0 then request the next table available */
+      if (table_index_or_id == ~0) {
+        table_index_or_id = 0;
+        while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
+          table_index_or_id++;
+        }
+       /* Create the next table and return the ip4_fib_t associated with it */
+       return create_fib_with_table_id (im, table_index_or_id);
+      }
+      /* A specific table_id was requested.. */
       p = hash_get (im->fib_index_by_table_id, table_index_or_id);
+      /* ... and if it doesn't exist create it else grab its index */
       if (! p)
        return create_fib_with_table_id (im, table_index_or_id);
       fib_index = p[0];
     }
+  /* Return the ip4_fib_t associated with this index */
   return vec_elt_at_index (im->fibs, fib_index);
 }
 
@@ -181,7 +216,12 @@ ip4_fib_set_adj_index (ip4_main_t * im,
                        fib->new_hash_values);
 
       p = hash_get (hash, dst_address_u32);
-      memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
+      /* hash_get should never return NULL here */
+      if (p)
+          clib_memcpy (p, fib->new_hash_values, 
+                       vec_bytes (fib->new_hash_values));
+      else
+          ASSERT(0);
     }
 }
 
@@ -242,7 +282,9 @@ void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
   old_adj_index = fib->old_hash_values[0];
 
   /* Avoid spurious reference count increments */
-  if (old_adj_index == adj_index)
+  if (old_adj_index == adj_index
+      && adj_index != ~0
+      && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
     {
       ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
       if (adj->share_count > 0)
@@ -260,6 +302,85 @@ void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
     ip_del_adjacency (lm, old_adj_index);
 }
 
+
+u32
+ip4_route_get_next_hop_adj (ip4_main_t * im,
+                           u32 fib_index,
+                           ip4_address_t *next_hop,
+                           u32 next_hop_sw_if_index,
+                           u32 explicit_fib_index)
+{
+  ip_lookup_main_t * lm = &im->lookup_main;
+  vnet_main_t * vnm = vnet_get_main();
+  uword * nh_hash, * nh_result;
+  int is_interface_next_hop;
+  u32 nh_adj_index;
+  ip4_fib_t * fib;
+
+  fib = vec_elt_at_index (im->fibs, fib_index);
+
+  is_interface_next_hop = next_hop->data_u32 == 0;
+  if (is_interface_next_hop)
+    {
+      nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
+      if (nh_result)
+         nh_adj_index = *nh_result;
+      else
+        {
+          ip_adjacency_t * adj;
+          adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+                                  &nh_adj_index);
+          ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
+          ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
+          hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
+       }
+    }
+  else if (next_hop_sw_if_index == ~0)
+    {
+      /* next-hop is recursive. we always need a indirect adj
+       * for recursive paths. Any LPM we perform now will give
+       * us a valid adj, but without tracking the next-hop we
+       * have no way to keep it valid.
+       */
+      ip_adjacency_t add_adj;
+      memset (&add_adj, 0, sizeof(add_adj));
+      add_adj.n_adj = 1;
+      add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
+      add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
+      add_adj.explicit_fib_index = explicit_fib_index;
+      ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
+    }
+  else
+    {
+      nh_hash = fib->adj_index_by_dst_address[32];
+      nh_result = hash_get (nh_hash, next_hop->data_u32);
+
+      /* Next hop must be known. */
+      if (! nh_result)
+        {
+         ip_adjacency_t * adj;
+
+         /* no /32 exists, get the longest prefix match */
+         nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
+                                                   next_hop, 0);
+         adj = ip_get_adjacency (lm, nh_adj_index);
+         /* if ARP interface adjacency is present, we need to
+            install ARP adjaceny for specific next hop */
+         if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
+             adj->arp.next_hop.ip4.as_u32 == 0)
+           {
+             nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
+           }
+       }
+      else
+        {
+         nh_adj_index = *nh_result;
+       }
+    }
+
+  return (nh_adj_index);
+}
+
 void
 ip4_add_del_route_next_hop (ip4_main_t * im,
                            u32 flags,
@@ -277,11 +398,9 @@ ip4_add_del_route_next_hop (ip4_main_t * im,
   u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
   u32 dst_adj_index, nh_adj_index;
   uword * dst_hash, * dst_result;
-  uword * nh_hash, * nh_result;
   ip_adjacency_t * dst_adj;
   ip_multipath_adjacency_t * old_mp, * new_mp;
   int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
-  int is_interface_next_hop;
   clib_error_t * error = 0;
 
   if (explicit_fib_index == (u32)~0)
@@ -290,41 +409,14 @@ ip4_add_del_route_next_hop (ip4_main_t * im,
       fib_index = explicit_fib_index;
 
   fib = vec_elt_at_index (im->fibs, fib_index);
-  
+
   /* Lookup next hop to be added or deleted. */
-  is_interface_next_hop = next_hop->data_u32 == 0;
   if (adj_index == (u32)~0)
     {
-      if (is_interface_next_hop)
-        {
-          nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
-          if (nh_result)
-            nh_adj_index = *nh_result;
-          else
-            {
-              ip_adjacency_t * adj;
-              adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
-                                      &nh_adj_index);
-              ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
-              ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
-              hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
-            }
-        }
-      else
-        {
-          nh_hash = fib->adj_index_by_dst_address[32];
-          nh_result = hash_get (nh_hash, next_hop->data_u32);
-          
-          /* Next hop must be known. */
-          if (! nh_result)
-            {
-              vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
-              error = clib_error_return (0, "next-hop %U/32 not in FIB",
-                                         format_ip4_address, next_hop);
-              goto done;
-            }
-          nh_adj_index = *nh_result;
-        }
+       nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
+                                                 next_hop,
+                                                 next_hop_sw_if_index,
+                                                 explicit_fib_index);
     }
   else
     {
@@ -369,6 +461,29 @@ ip4_add_del_route_next_hop (ip4_main_t * im,
       goto done;
     }
 
+  /* Destination is not known and default weight is set so add route
+     to existing non-multipath adjacency */
+  if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
+    {
+      /* create / delete additional mapping of existing adjacency */
+      ip4_add_del_route_args_t a;
+
+      a.table_index_or_table_id = fib_index;
+      a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
+                | IP4_ROUTE_FLAG_FIB_INDEX
+                | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
+                | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
+                            | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
+      a.dst_address = dst_address[0];
+      a.dst_address_length = dst_address_length;
+      a.adj_index = nh_adj_index;
+      a.add_adj = 0;
+      a.n_add_adj = 0;
+
+      ip4_add_del_route (im, &a);
+      goto done;
+    }
+
   old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
 
   if (! ip_multipath_adjacency_add_del_next_hop
@@ -393,6 +508,8 @@ ip4_add_del_route_next_hop (ip4_main_t * im,
   if (old_mp != new_mp)
     {
       ip4_add_del_route_args_t a;
+      ip_adjacency_t * adj;
+
       a.table_index_or_table_id = fib_index;
       a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
                 | IP4_ROUTE_FLAG_FIB_INDEX
@@ -405,6 +522,10 @@ ip4_add_del_route_next_hop (ip4_main_t * im,
       a.n_add_adj = 0;
 
       ip4_add_del_route (im, &a);
+
+      adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
+      if (adj->n_adj == 1)
+        adj->share_count += is_del ? -1 : 1;
     }
 
  done:
@@ -506,7 +627,7 @@ void ip4_maybe_remap_adjacencies (ip4_main_t * im,
            else
              {
                /* Remap to new adjacency. */
-               memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
+               clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
 
                /* Set new adjacency value. */
                fib->new_hash_values[0] = p->value[0] = m - 1;
@@ -581,11 +702,18 @@ void ip4_delete_matching_routes (ip4_main_t * im,
   ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
 }
 
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+                        vlib_node_runtime_t * node,
+                        vlib_frame_t * frame,
+                        vlib_rx_or_tx_t which_adj_index);
+
 always_inline uword
 ip4_lookup_inline (vlib_main_t * vm,
                   vlib_node_runtime_t * node,
                   vlib_frame_t * frame,
-                  int lookup_for_responses_to_locally_received_packets)
+                  int lookup_for_responses_to_locally_received_packets,
+                  int is_indirect)
 {
   ip4_main_t * im = &ip4_main;
   ip_lookup_main_t * lm = &im->lookup_main;
@@ -612,6 +740,7 @@ ip4_lookup_inline (vlib_main_t * vm,
          ip_adjacency_t * adj0, * adj1;
          ip4_fib_mtrie_t * mtrie0, * mtrie1;
          ip4_fib_mtrie_leaf_t leaf0, leaf1;
+         ip4_address_t * dst_addr0, *dst_addr1;
          __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
          __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
           u32 flow_hash_config0, flow_hash_config1;
@@ -641,6 +770,20 @@ ip4_lookup_inline (vlib_main_t * vm,
          ip0 = vlib_buffer_get_current (p0);
          ip1 = vlib_buffer_get_current (p1);
 
+         if (is_indirect)
+           {
+             ip_adjacency_t * iadj0, * iadj1;
+             iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
+             iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
+             dst_addr0 = &iadj0->indirect.next_hop.ip4;
+             dst_addr1 = &iadj1->indirect.next_hop.ip4;
+           }
+         else
+           {
+             dst_addr0 = &ip0->dst_address;
+             dst_addr1 = &ip1->dst_address;
+           }
+
          fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
          fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
@@ -656,8 +799,8 @@ ip4_lookup_inline (vlib_main_t * vm,
 
              leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
 
-             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
-             leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0);
+             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
+             leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
            }
 
          tcp0 = (void *) (ip0 + 1);
@@ -670,20 +813,20 @@ ip4_lookup_inline (vlib_main_t * vm,
 
          if (! lookup_for_responses_to_locally_received_packets)
            {
-             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
-             leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1);
+             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
+             leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
            }
 
          if (! lookup_for_responses_to_locally_received_packets)
            {
-             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
-             leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2);
+             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
+             leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
            }
 
          if (! lookup_for_responses_to_locally_received_packets)
            {
-             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
-             leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3);
+             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
+             leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
            }
 
          if (lookup_for_responses_to_locally_received_packets)
@@ -702,10 +845,10 @@ ip4_lookup_inline (vlib_main_t * vm,
            }
 
          ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
-                                                          &ip0->dst_address,
+                                                          dst_addr0,
                                                           /* no_default_route */ 0));
          ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
-                                                          &ip1->dst_address,
+                                                          dst_addr1,
                                                           /* no_default_route */ 0));
          adj0 = ip_get_adjacency (lm, adj_index0);
          adj1 = ip_get_adjacency (lm, adj_index1);
@@ -741,6 +884,15 @@ ip4_lookup_inline (vlib_main_t * vm,
          vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
          vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
 
+         if (is_indirect)
+           {
+             /* ARP for next-hop not packet's destination address */
+             if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)
+               ip0->dst_address.as_u32 = dst_addr0->as_u32;
+              if (adj1->lookup_next_index == IP_LOOKUP_NEXT_ARP)
+                ip1->dst_address.as_u32 = dst_addr1->as_u32;
+           }
+
           vlib_increment_combined_counter 
               (cm, cpu_index, adj_index0, 1,
                vlib_buffer_length_in_chain (vm, p0) 
@@ -801,6 +953,7 @@ ip4_lookup_inline (vlib_main_t * vm,
          ip_adjacency_t * adj0;
          ip4_fib_mtrie_t * mtrie0;
          ip4_fib_mtrie_leaf_t leaf0;
+         ip4_address_t * dst_addr0;
          __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
           u32 flow_hash_config0, hash_c0;
 
@@ -811,6 +964,17 @@ ip4_lookup_inline (vlib_main_t * vm,
 
          ip0 = vlib_buffer_get_current (p0);
 
+         if (is_indirect)
+           {
+             ip_adjacency_t * iadj0;
+             iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
+             dst_addr0 = &iadj0->indirect.next_hop.ip4;
+           }
+         else
+           {
+             dst_addr0 = &ip0->dst_address;
+           }
+
          fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
@@ -821,7 +985,7 @@ ip4_lookup_inline (vlib_main_t * vm,
 
              leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
 
-             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
+             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
            }
 
          tcp0 = (void *) (ip0 + 1);
@@ -830,13 +994,13 @@ ip4_lookup_inline (vlib_main_t * vm,
                         || ip0->protocol == IP_PROTOCOL_UDP);
 
          if (! lookup_for_responses_to_locally_received_packets)
-           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
+           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
 
          if (! lookup_for_responses_to_locally_received_packets)
-           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
+           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
 
          if (! lookup_for_responses_to_locally_received_packets)
-           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
+           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
 
          if (lookup_for_responses_to_locally_received_packets)
            adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
@@ -848,7 +1012,7 @@ ip4_lookup_inline (vlib_main_t * vm,
            }
 
          ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
-                                                          &ip0->dst_address,
+                                                          dst_addr0,
                                                           /* no_default_route */ 0));
 
          adj0 = ip_get_adjacency (lm, adj_index0);
@@ -872,6 +1036,13 @@ ip4_lookup_inline (vlib_main_t * vm,
 
          vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
 
+          if (is_indirect)
+            {
+              /* ARP for next-hop not packet's destination address */
+              if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)
+                ip0->dst_address.as_u32 = dst_addr0->as_u32;
+            }
+
           vlib_increment_combined_counter 
               (cm, cpu_index, adj_index0, 1,
                vlib_buffer_length_in_chain (vm, p0)
@@ -898,15 +1069,52 @@ ip4_lookup_inline (vlib_main_t * vm,
       vlib_put_next_frame (vm, node, next, n_left_to_next);
     }
 
+  if (node->flags & VLIB_NODE_FLAG_TRACE)
+    ip4_forward_next_trace(vm, node, frame, VLIB_TX);
+
   return frame->n_vectors;
 }
 
+/** @brief IPv4 lookup node.
+    @node ip4-lookup
+
+    This is the main IPv4 lookup dispatch node.
+
+    @param vm vlib_main_t corresponding to the current thread
+    @param node vlib_node_runtime_t
+    @param frame vlib_frame_t whose contents should be dispatched
+
+    @par Graph mechanics: buffer metadata, next index usage
+
+    @em Uses:
+    - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+        - Indicates the @c sw_if_index value of the interface that the
+         packet was received on.
+    - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
+        - When the value is @c ~0 then the node performs a longest prefix
+          match (LPM) for the packet destination address in the FIB attached
+          to the receive interface.
+        - Otherwise perform LPM for the packet destination address in the
+          indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
+          value (0, 1, ...) and not a VRF id.
+
+    @em Sets:
+    - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+        - The lookup result adjacency index.
+
+    <em>Next Index:</em>
+    - Dispatches the packet to the node index found in
+      ip_adjacency_t @c adj->lookup_next_index
+      (where @c adj is the lookup result adjacency).
+*/
 static uword
 ip4_lookup (vlib_main_t * vm,
            vlib_node_runtime_t * node,
            vlib_frame_t * frame)
 {
-  return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0);
+  return ip4_lookup_inline (vm, node, frame,
+                           /* lookup_for_responses_to_locally_received_packets */ 0,
+                           /* is_indirect */ 0);
 
 }
 
@@ -934,6 +1142,8 @@ void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
       n = IP_LOOKUP_NEXT_ARP;
       node_index = ip4_arp_node.index;
       adj->if_address_index = if_address_index;
+      adj->arp.next_hop.ip4.as_u32 = 0;
+      ip46_address_reset(&adj->arp.next_hop);
       packet_type = VNET_L3_PACKET_TYPE_ARP;
     }
   else
@@ -1190,6 +1400,133 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
  
 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
 
+/* Built-in ip4 unicast rx feature path definition */
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
+  .node_name = "ip4-inacl", 
+  .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
+  .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
+  .node_name = "ip4-source-check-via-rx",
+  .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
+  .feature_index = 
+  &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
+  .node_name = "ip4-source-check-via-any",
+  .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
+  .feature_index = 
+  &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
+  .node_name = "ip4-source-and-port-range-check-rx",
+  .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
+  .feature_index =
+  &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
+  .node_name = "ip4-policer-classify",
+  .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
+  .feature_index =
+  &ip4_main.ip4_unicast_rx_feature_policer_classify,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
+  .node_name = "ipsec-input-ip4",
+  .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
+  .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
+  .node_name = "vpath-input-ip4",
+  .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
+  .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
+};
+
+VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
+  .node_name = "ip4-lookup",
+  .runs_before = 0, /* not before any other features */
+  .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
+};
+
+/* Built-in ip4 multicast rx feature path definition */
+VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
+  .node_name = "vpath-input-ip4",
+  .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
+  .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
+};
+
+VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
+  .node_name = "ip4-lookup-multicast",
+  .runs_before = 0, /* not before any other features */
+  .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
+};
+
+static char * rx_feature_start_nodes[] = 
+  { "ip4-input", "ip4-input-no-checksum"};
+
+static char * tx_feature_start_nodes[] = 
+{ "ip4-rewrite-transit"};
+
+/* Source and port-range check ip4 tx feature path definition */
+VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
+  .node_name = "ip4-source-and-port-range-check-tx",
+  .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
+  .feature_index =
+  &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
+
+};
+
+/* Built-in ip4 tx feature path definition */
+VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
+  .node_name = "interface-output",
+  .runs_before = 0, /* not before any other features */
+  .feature_index = &ip4_main.ip4_tx_feature_interface_output,
+};
+
+
+static clib_error_t *
+ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
+{
+  ip_lookup_main_t * lm = &im->lookup_main;
+  clib_error_t * error;
+  vnet_cast_t cast;
+  ip_config_main_t * cm;
+  vnet_config_main_t * vcm;
+  char **feature_start_nodes;
+  int feature_start_len;
+
+  for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
+    {
+      cm = &lm->feature_config_mains[cast];
+      vcm = &cm->config_main;
+
+      if (cast < VNET_IP_TX_FEAT)
+        {
+          feature_start_nodes = rx_feature_start_nodes;
+          feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
+        }
+      else
+        {
+          feature_start_nodes = tx_feature_start_nodes;
+          feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
+        }
+      
+      if ((error = ip_feature_init_cast (vm, cm, vcm, 
+                                         feature_start_nodes,
+                                         feature_start_len,
+                                         cast,
+                                         1 /* is_ip4 */)))
+        return error;
+    }
+
+  return 0;
+}
+
 static clib_error_t *
 ip4_sw_interface_add_del (vnet_main_t * vnm,
                          u32 sw_if_index,
@@ -1199,61 +1536,40 @@ ip4_sw_interface_add_del (vnet_main_t * vnm,
   ip4_main_t * im = &ip4_main;
   ip_lookup_main_t * lm = &im->lookup_main;
   u32 ci, cast;
+  u32 feature_index;
 
-  for (cast = 0; cast < VNET_N_CAST; cast++)
+  for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
     {
-      ip_config_main_t * cm = &lm->rx_config_mains[cast];
+      ip_config_main_t * cm = &lm->feature_config_mains[cast];
       vnet_config_main_t * vcm = &cm->config_main;
 
-      if (! vcm->node_index_by_feature_index)
-       {
-         if (cast == VNET_UNICAST)
-           {
-             static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
-             static char * feature_nodes[] = {
-               [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
-               [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
-               [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
-               [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
-               [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
-               [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
-             };
-
-             vnet_config_init (vm, vcm,
-                               start_nodes, ARRAY_LEN (start_nodes),
-                               feature_nodes, ARRAY_LEN (feature_nodes));
-           }
-         else
-           {
-             static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
-             static char * feature_nodes[] = {
-               [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
-               [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
-             };
-
-             vnet_config_init (vm, vcm,
-                               start_nodes, ARRAY_LEN (start_nodes),
-                               feature_nodes, ARRAY_LEN (feature_nodes));
-           }
-       }
-
       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
       ci = cm->config_index_by_sw_if_index[sw_if_index];
 
+      if (cast == VNET_IP_RX_UNICAST_FEAT)
+        feature_index = im->ip4_unicast_rx_feature_lookup;
+      else if (cast == VNET_IP_RX_MULTICAST_FEAT)
+        feature_index = im->ip4_multicast_rx_feature_lookup;
+      else
+        feature_index = im->ip4_tx_feature_interface_output;
+
       if (is_add)
-       ci = vnet_config_add_feature (vm, vcm,
+        ci = vnet_config_add_feature (vm, vcm, 
                                      ci,
-                                     IP4_RX_FEATURE_LOOKUP,
+                                      feature_index,
                                      /* config data */ 0,
                                      /* # bytes of config data */ 0);
       else
        ci = vnet_config_del_feature (vm, vcm,
                                      ci,
-                                     IP4_RX_FEATURE_LOOKUP,
+                                      feature_index,
                                      /* config data */ 0,
                                      /* # bytes of config data */ 0);
 
       cm->config_index_by_sw_if_index[sw_if_index] = ci;
+      /* 
+       * note: do not update the tx feature count here.
+       */
     }
 
   return /* no error */ 0;
@@ -1261,29 +1577,44 @@ ip4_sw_interface_add_del (vnet_main_t * vnm,
 
 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
 
+static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
+
 VLIB_REGISTER_NODE (ip4_lookup_node) = {
   .function = ip4_lookup,
   .name = "ip4-lookup",
   .vector_size = sizeof (u32),
 
-  .n_next_nodes = IP_LOOKUP_N_NEXT,
-  .next_nodes = {
-    [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
-    [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
-    [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
-    [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
-    [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
-    [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
-    [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
-    [IP_LOOKUP_NEXT_MAP] = "ip4-map",
-    [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
-    [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
-    [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
-    [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
-    [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
-  },
+  .format_trace = format_ip4_lookup_trace,
+
+  .n_next_nodes = IP4_LOOKUP_N_NEXT,
+  .next_nodes = IP4_LOOKUP_NEXT_NODES,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
+
+static uword
+ip4_indirect (vlib_main_t * vm,
+               vlib_node_runtime_t * node,
+               vlib_frame_t * frame)
+{
+  return ip4_lookup_inline (vm, node, frame,
+                           /* lookup_for_responses_to_locally_received_packets */ 0,
+                           /* is_indirect */ 1);
+}
+
+VLIB_REGISTER_NODE (ip4_indirect_node) = {
+  .function = ip4_indirect,
+  .name = "ip4-indirect",
+  .vector_size = sizeof (u32),
+  .sibling_of = "ip4-lookup",
+  .format_trace = format_ip4_lookup_trace,
+
+  .n_next_nodes = 0,
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect);
+
+
 /* Global IP4 main. */
 ip4_main_t ip4_main;
 
@@ -1291,6 +1622,7 @@ clib_error_t *
 ip4_lookup_init (vlib_main_t * vm)
 {
   ip4_main_t * im = &ip4_main;
+  clib_error_t * error;
   uword i;
 
   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
@@ -1341,7 +1673,9 @@ ip4_lookup_init (vlib_main_t * vm)
                               "ip4 arp");
   }
 
-  return 0;
+  error = ip4_feature_init (vm, im);
+
+  return error;
 }
 
 VLIB_INIT_FUNCTION (ip4_lookup_init);
@@ -1357,33 +1691,52 @@ typedef struct {
 } ip4_forward_next_trace_t;
 
 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
+  uword indent = format_get_indent (s);
+  s = format (s, "%U%U",
+                format_white_space, indent,
+                format_ip4_header, t->packet_data);
+  return s;
+}
+
+static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
   vnet_main_t * vnm = vnet_get_main();
   ip4_main_t * im = &ip4_main;
-  ip_adjacency_t * adj;
   uword indent = format_get_indent (s);
 
-  adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
-             t->fib_index, t->adj_index, format_ip_adjacency,
-             vnm, &im->lookup_main, t->adj_index, t->flow_hash);
-  switch (adj->lookup_next_index)
-    {
-    case IP_LOOKUP_NEXT_REWRITE:
-      s = format (s, "\n%U%U",
-                 format_white_space, indent,
-                 format_ip_adjacency_packet_data,
-                 vnm, &im->lookup_main, t->adj_index,
-                 t->packet_data, sizeof (t->packet_data));
-      break;
+              t->fib_index, t->adj_index, format_ip_adjacency,
+              vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+  s = format (s, "\n%U%U",
+              format_white_space, indent,
+              format_ip4_header, t->packet_data);
+  return s;
+}
 
-    default:
-      break;
-    }
+static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
+  vnet_main_t * vnm = vnet_get_main();
+  ip4_main_t * im = &ip4_main;
+  uword indent = format_get_indent (s);
 
+  s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
+              t->fib_index, t->adj_index, format_ip_adjacency,
+              vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+  s = format (s, "\n%U%U",
+              format_white_space, indent,
+              format_ip_adjacency_packet_data,
+              vnm, &im->lookup_main, t->adj_index,
+              t->packet_data, sizeof (t->packet_data));
   return s;
 }
 
@@ -1421,9 +1774,12 @@ ip4_forward_next_trace (vlib_main_t * vm,
          t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
          t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
          t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
-         t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
-                             vnet_buffer(b0)->sw_if_index[VLIB_RX]);
-         memcpy (t0->packet_data,
+         t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
+             vnet_buffer(b0)->sw_if_index[VLIB_TX] :
+             vec_elt (im->fib_index_by_sw_if_index,
+                      vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+
+         clib_memcpy (t0->packet_data,
                  vlib_buffer_get_current (b0),
                  sizeof (t0->packet_data));
        }
@@ -1432,9 +1788,11 @@ ip4_forward_next_trace (vlib_main_t * vm,
          t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
          t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
          t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
-         t1->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
-                             vnet_buffer(b1)->sw_if_index[VLIB_RX]);
-         memcpy (t1->packet_data,
+         t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
+             vnet_buffer(b1)->sw_if_index[VLIB_TX] :
+             vec_elt (im->fib_index_by_sw_if_index,
+                      vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+         clib_memcpy (t1->packet_data,
                  vlib_buffer_get_current (b1),
                  sizeof (t1->packet_data));
        }
@@ -1457,9 +1815,11 @@ ip4_forward_next_trace (vlib_main_t * vm,
          t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
          t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
          t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
-         t0->fib_index = vec_elt (im->fib_index_by_sw_if_index, 
-                             vnet_buffer(b0)->sw_if_index[VLIB_RX]);
-         memcpy (t0->packet_data,
+         t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
+             vnet_buffer(b0)->sw_if_index[VLIB_TX] :
+             vec_elt (im->fib_index_by_sw_if_index,
+                      vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+         clib_memcpy (t0->packet_data,
                  vlib_buffer_get_current (b0),
                  sizeof (t0->packet_data));
        }
@@ -1522,6 +1882,8 @@ VLIB_REGISTER_NODE (ip4_drop_node,static) = {
   },
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
+
 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
   .function = ip4_punt,
   .name = "ip4-punt",
@@ -1535,6 +1897,8 @@ VLIB_REGISTER_NODE (ip4_punt_node,static) = {
   },
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
+
 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
   .function = ip4_miss,
   .name = "ip4-miss",
@@ -1548,6 +1912,8 @@ VLIB_REGISTER_NODE (ip4_miss_node,static) = {
   },
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss);
+
 /* Compute TCP/UDP/ICMP4 checksum in software. */
 u16
 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
@@ -1683,8 +2049,10 @@ ip4_local (vlib_main_t * vm,
          leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
          leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
 
-         proto0 = ip0->protocol;
-         proto1 = ip1->protocol;
+         /* Treat IP frag packets as "experimental" protocol for now
+            until support of IP frag reassembly is implemented */
+         proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
+         proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
          is_udp0 = proto0 == IP_PROTOCOL_UDP;
          is_udp1 = proto1 == IP_PROTOCOL_UDP;
          is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
@@ -1876,7 +2244,9 @@ ip4_local (vlib_main_t * vm,
 
          leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
 
-         proto0 = ip0->protocol;
+         /* Treat IP frag packets as "experimental" protocol for now
+            until support of IP frag reassembly is implemented */
+         proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
          is_udp0 = proto0 == IP_PROTOCOL_UDP;
          is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
 
@@ -1986,6 +2356,8 @@ VLIB_REGISTER_NODE (ip4_local_node,static) = {
   },
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
+
 void ip4_register_protocol (u32 protocol, u32 node_index)
 {
   vlib_main_t * vm = vlib_get_main();
@@ -2084,6 +2456,10 @@ ip4_arp (vlib_main_t * vm,
          adj0 = ip_get_adjacency (lm, adj_index0);
          ip0 = vlib_buffer_get_current (p0);
 
+         /* If packet destination is not local, send ARP to next hop */
+         if (adj0->arp.next_hop.ip4.as_u32)
+           ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
+
          /* 
           * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
           * rewrite to this packet, we need to skip it here.
@@ -2166,10 +2542,15 @@ ip4_arp (vlib_main_t * vm,
            hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
 
            /* Src ethernet address in ARP header. */
-           memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
+           clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
                    sizeof (h0->ip4_over_ethernet[0].ethernet));
 
-           ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
+           if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
+               //No source address available
+               p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
+               vlib_buffer_free(vm, &bi0, 1);
+               continue;
+           }
 
            /* Copy in destination address we are requesting. */
            h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
@@ -2196,6 +2577,7 @@ static char * ip4_arp_error_strings[] = {
   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
+  [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
 };
 
 VLIB_REGISTER_NODE (ip4_arp_node) = {
@@ -2279,7 +2661,7 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
 
   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
 
-  memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
+  clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
 
   h->ip4_over_ethernet[0].ip4 = src[0];
   h->ip4_over_ethernet[1].ip4 = dst[0];
@@ -2305,6 +2687,7 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
 typedef enum {
   IP4_REWRITE_NEXT_DROP,
   IP4_REWRITE_NEXT_ARP,
+  IP4_REWRITE_NEXT_ICMP_ERROR,
 } ip4_rewrite_next_t;
 
 always_inline uword
@@ -2318,6 +2701,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
   u32 n_left_from, n_left_to_next, * to_next, next_index;
   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
+  ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
@@ -2335,6 +2719,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
          u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
          u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
           u32 next0_override, next1_override;
+          u32 tx_sw_if_index0, tx_sw_if_index1;
       
           if (rewrite_for_locally_received_packets)
               next0_override = next1_override = 0;
@@ -2374,6 +2759,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
          ip1 = vlib_buffer_get_current (p1);
 
          error0 = error1 = IP4_ERROR_NONE;
+          next0 = next1 = IP4_REWRITE_NEXT_DROP;
 
          /* Decrement TTL & update checksum.
             Works either endian, so no need for byte swap. */
@@ -2400,8 +2786,26 @@ ip4_rewrite_inline (vlib_main_t * vm,
              ip0->ttl = ttl0;
              ip1->ttl = ttl1;
 
-             error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
-             error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
+              /*
+               * If the ttl drops below 1 when forwarding, generate
+               * an ICMP response.
+               */
+              if (PREDICT_FALSE(ttl0 <= 0))
+                {
+                  error0 = IP4_ERROR_TIME_EXPIRED;
+                  vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
+                  icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
+                              ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
+                  next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+                }
+              if (PREDICT_FALSE(ttl1 <= 0))
+                {
+                  error1 = IP4_ERROR_TIME_EXPIRED;
+                  vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
+                  icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
+                              ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
+                  next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
+                }
 
              /* Verify checksum. */
              ASSERT (ip0->checksum == ip4_header_checksum (ip0));
@@ -2437,14 +2841,25 @@ ip4_rewrite_inline (vlib_main_t * vm,
           /* Worth pipelining. No guarantee that adj0,1 are hot... */
          rw_len0 = adj0[0].rewrite_header.data_bytes;
          rw_len1 = adj1[0].rewrite_header.data_bytes;
-         next0 = (error0 == IP4_ERROR_NONE) 
-            ? adj0[0].rewrite_header.next_index : 0;
+          vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
+          vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
+
+          /* Check MTU of outgoing interface. */
+          error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
+                    ? IP4_ERROR_MTU_EXCEEDED
+                    : error0);
+          error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
+                    ? IP4_ERROR_MTU_EXCEEDED
+                    : error1);
+
+          next0 = (error0 == IP4_ERROR_NONE)
+            ? adj0[0].rewrite_header.next_index : next0;
 
           if (rewrite_for_locally_received_packets)
               next0 = next0 && next0_override ? next0_override : next0;
 
-         next1 = (error1 == IP4_ERROR_NONE)
-            ? adj1[0].rewrite_header.next_index : 0;
+          next1 = (error1 == IP4_ERROR_NONE)
+            ? adj1[0].rewrite_header.next_index : next1;
 
           if (rewrite_for_locally_received_packets)
               next1 = next1 && next1_override ? next1_override : next1;
@@ -2466,25 +2881,51 @@ ip4_rewrite_inline (vlib_main_t * vm,
                    /* packet increment */ 0,
                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
 
-         /* Check MTU of outgoing interface. */
-         error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
-                   ? IP4_ERROR_MTU_EXCEEDED
-                   : error0);
-         error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
-                   ? IP4_ERROR_MTU_EXCEEDED
-                   : error1);
-
-         p0->current_data -= rw_len0;
-         p1->current_data -= rw_len1;
+          /* Don't adjust the buffer for ttl issue; icmp-error node wants
+           * to see the IP headerr */
+          if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
+            {
+              p0->current_data -= rw_len0;
+              p0->current_length += rw_len0;
+              tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+              vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+                  tx_sw_if_index0;
+
+              if (PREDICT_FALSE 
+                  (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
+                                    tx_sw_if_index0)))
+                {
+                  p0->current_config_index = 
+                    vec_elt (cm->config_index_by_sw_if_index, 
+                             tx_sw_if_index0);
+                  vnet_get_config_data (&cm->config_main,
+                                        &p0->current_config_index,
+                                        &next0,
+                                        /* # bytes of config data */ 0);
+                }
+            }
+          if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
+            {
+              p1->current_data -= rw_len1;
+              p1->current_length += rw_len1;
 
-         p0->current_length += rw_len0;
-         p1->current_length += rw_len1;
+              tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
+              vnet_buffer (p1)->sw_if_index[VLIB_TX] =
+                  tx_sw_if_index1;
 
-         vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
-         vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
-      
-         p0->error = error_node->errors[error0];
-         p1->error = error_node->errors[error1];
+              if (PREDICT_FALSE 
+                  (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
+                                    tx_sw_if_index1)))
+                {
+                  p1->current_config_index = 
+                    vec_elt (cm->config_index_by_sw_if_index, 
+                             tx_sw_if_index1);
+                  vnet_get_config_data (&cm->config_main,
+                                        &p1->current_config_index,
+                                        &next1,
+                                        /* # bytes of config data */ 0);
+                }
+            }
 
          /* Guess we are only writing on simple Ethernet header. */
          vnet_rewrite_two_headers (adj0[0], adj1[0],
@@ -2503,6 +2944,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
          ip4_header_t * ip0;
          u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
           u32 next0_override;
+          u32 tx_sw_if_index0;
       
           if (rewrite_for_locally_received_packets)
               next0_override = 0;
@@ -2521,7 +2963,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
          ip0 = vlib_buffer_get_current (p0);
 
          error0 = IP4_ERROR_NONE;
-          next0 = 0;            /* drop on error */
+          next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
 
          /* Decrement TTL & update checksum. */
          if (! rewrite_for_locally_received_packets)
@@ -2542,7 +2984,18 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
              ASSERT (ip0->checksum == ip4_header_checksum (ip0));
 
-             error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
+              if (PREDICT_FALSE(ttl0 <= 0))
+                {
+                  /*
+                   * If the ttl drops below 1 when forwarding, generate
+                   * an ICMP response.
+                   */
+                  error0 = IP4_ERROR_TIME_EXPIRED;
+                  next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+                  vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
+                  icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
+                              ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
+                }
            }
 
           if (rewrite_for_locally_received_packets)
@@ -2571,6 +3024,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
           
           /* Update packet buffer attributes/set output interface. */
           rw_len0 = adj0[0].rewrite_header.data_bytes;
+          vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
           
           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
               vlib_increment_combined_counter 
@@ -2584,15 +3038,33 @@ ip4_rewrite_inline (vlib_main_t * vm,
                     > adj0[0].rewrite_header.max_l3_packet_bytes
                     ? IP4_ERROR_MTU_EXCEEDED
                     : error0);
-          
+
          p0->error = error_node->errors[error0];
-          p0->current_data -= rw_len0;
-          p0->current_length += rw_len0;
-          vnet_buffer (p0)->sw_if_index[VLIB_TX] = 
-            adj0[0].rewrite_header.sw_if_index;
-          
-          next0 = (error0 == IP4_ERROR_NONE)
-            ? adj0[0].rewrite_header.next_index : 0;
+
+          /* Don't adjust the buffer for ttl issue; icmp-error node wants
+           * to see the IP headerr */
+          if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
+            {
+              p0->current_data -= rw_len0;
+              p0->current_length += rw_len0;
+              tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+              vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+              next0 = adj0[0].rewrite_header.next_index;
+
+              if (PREDICT_FALSE 
+                  (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
+                                    tx_sw_if_index0)))
+                  {
+                    p0->current_config_index = 
+                      vec_elt (cm->config_index_by_sw_if_index, 
+                               tx_sw_if_index0);
+                    vnet_get_config_data (&cm->config_main,
+                                          &p0->current_config_index,
+                                          &next0,
+                                          /* # bytes of config data */ 0);
+                  }
+            }
 
           if (rewrite_for_locally_received_packets)
               next0 = next0 && next0_override ? next0_override : next0;
@@ -2617,6 +3089,38 @@ ip4_rewrite_inline (vlib_main_t * vm,
   return frame->n_vectors;
 }
 
+
+/** @brief IPv4 transit rewrite node.
+    @node ip4-rewrite-transit
+
+    This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
+    header checksum, fetch the ip adjacency, check the outbound mtu,
+    apply the adjacency rewrite, and send pkts to the adjacency
+    rewrite header's rewrite_next_index.
+
+    @param vm vlib_main_t corresponding to the current thread
+    @param node vlib_node_runtime_t
+    @param frame vlib_frame_t whose contents should be dispatched
+
+    @par Graph mechanics: buffer metadata, next index usage
+
+    @em Uses:
+    - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+        - the rewrite adjacency index
+    - <code>adj->lookup_next_index</code>
+        - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
+          the packet will be dropped. 
+    - <code>adj->rewrite_header</code>
+        - Rewrite string length, rewrite string, next_index
+
+    @em Sets:
+    - <code>b->current_data, b->current_length</code>
+        - Updated net of applying the rewrite string
+
+    <em>Next Indices:</em>
+    - <code> adj->rewrite_header.next_index </code>
+      or @c error-drop 
+*/
 static uword
 ip4_rewrite_transit (vlib_main_t * vm,
                     vlib_node_runtime_t * node,
@@ -2626,6 +3130,39 @@ ip4_rewrite_transit (vlib_main_t * vm,
                             /* rewrite_for_locally_received_packets */ 0);
 }
 
+/** @brief IPv4 local rewrite node.
+    @node ip4-rewrite-local
+
+    This is the IPv4 local rewrite node. Fetch the ip adjacency, check
+    the outbound interface mtu, apply the adjacency rewrite, and send
+    pkts to the adjacency rewrite header's rewrite_next_index. Deal
+    with hemorrhoids of the form "some clown sends an icmp4 w/ src =
+    dst = interface addr."
+
+    @param vm vlib_main_t corresponding to the current thread
+    @param node vlib_node_runtime_t
+    @param frame vlib_frame_t whose contents should be dispatched
+
+    @par Graph mechanics: buffer metadata, next index usage
+
+    @em Uses:
+    - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
+        - the rewrite adjacency index
+    - <code>adj->lookup_next_index</code>
+        - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
+          the packet will be dropped. 
+    - <code>adj->rewrite_header</code>
+        - Rewrite string length, rewrite string, next_index
+
+    @em Sets:
+    - <code>b->current_data, b->current_length</code>
+        - Updated net of applying the rewrite string
+
+    <em>Next Indices:</em>
+    - <code> adj->rewrite_header.next_index </code>
+      or @c error-drop 
+*/
+
 static uword
 ip4_rewrite_local (vlib_main_t * vm,
                   vlib_node_runtime_t * node,
@@ -2640,31 +3177,32 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = {
   .name = "ip4-rewrite-transit",
   .vector_size = sizeof (u32),
 
-  .format_trace = format_ip4_forward_next_trace,
+  .format_trace = format_ip4_rewrite_trace,
 
-  .n_next_nodes = 2,
+  .n_next_nodes = 3,
   .next_nodes = {
     [IP4_REWRITE_NEXT_DROP] = "error-drop",
     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
+    [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
   },
 };
 
-VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit);
+
+VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
   .function = ip4_rewrite_local,
   .name = "ip4-rewrite-local",
   .vector_size = sizeof (u32),
 
   .sibling_of = "ip4-rewrite-transit",
 
-  .format_trace = format_ip4_forward_next_trace,
+  .format_trace = format_ip4_rewrite_trace,
 
-  .n_next_nodes = 2,
-  .next_nodes = {
-    [IP4_REWRITE_NEXT_DROP] = "error-drop",
-    [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
-  },
+  .n_next_nodes = 0,
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local);
+
 static clib_error_t *
 add_del_interface_table (vlib_main_t * vm,
                         unformat_input_t * input,
@@ -2707,6 +3245,20 @@ add_del_interface_table (vlib_main_t * vm,
   return error;
 }
 
+/*?
+ * Place the indicated interface into the supplied VRF
+ *
+ * @cliexpar
+ * @cliexstart{set interface ip table}
+ *
+ *  vpp# set interface ip table GigabitEthernet2/0/0 2
+ *
+ * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
+ * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
+ * Upon RX, packets will be processed in the last IP table ID provisioned.
+ * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
+ * @cliexend
+ ?*/
 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
   .path = "set interface ip table",
   .function = add_del_interface_table,
@@ -2926,6 +3478,9 @@ ip4_lookup_multicast (vlib_main_t * vm,
       vlib_put_next_frame (vm, node, next, n_left_to_next);
     }
 
+  if (node->flags & VLIB_NODE_FLAG_TRACE)
+      ip4_forward_next_trace(vm, node, frame, VLIB_TX);
+
   return frame->n_vectors;
 }
 
@@ -2933,25 +3488,14 @@ VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
   .function = ip4_lookup_multicast,
   .name = "ip4-lookup-multicast",
   .vector_size = sizeof (u32),
+  .sibling_of = "ip4-lookup",
+  .format_trace = format_ip4_lookup_trace,
 
-  .n_next_nodes = IP_LOOKUP_N_NEXT,
-  .next_nodes = {
-    [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
-    [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
-    [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
-    [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
-    [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
-    [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
-    [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
-    [IP_LOOKUP_NEXT_MAP] = "ip4-map",
-    [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
-    [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
-    [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
-    [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop", 
-    [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop", 
-  },
+  .n_next_nodes = 0,
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast);
+
 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
   .function = ip4_drop,
   .name = "ip4-multicast",
@@ -3175,3 +3719,44 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
     .function = set_ip_classify_command_fn,
 };
 
+
+#define TEST_CODE 1
+#if TEST_CODE > 0
+
+static clib_error_t *
+set_interface_output_feature_command_fn (vlib_main_t * vm,
+                                         unformat_input_t * input,
+                                         vlib_cli_command_t * cmd)
+{
+  vnet_main_t * vnm = vnet_get_main();
+  u32 sw_if_index = ~0;
+  int is_add = 1;
+  ip4_main_t * im = &ip4_main;
+  ip_lookup_main_t * lm = &im->lookup_main;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) 
+    {
+      if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+        ;
+      else if (unformat (input, "del"))
+        is_add = 0;
+      else
+        break;
+    }
+
+  if (sw_if_index == ~0)
+    return clib_error_return (0, "unknown interface `%U'",
+                              format_unformat_error, input);
+
+  lm->tx_sw_if_has_ip_output_features =
+    clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (set_interface_output_feature, static) = {
+  .path = "set interface output feature",
+  .function = set_interface_output_feature_command_fn,
+  .short_help = "set interface output feature <intfc>",
+};
+#endif /* TEST_CODE */