fib: midchain adjacency optimisations
[vpp.git] / src / vnet / adj / adj_midchain.c
index e8087f0..4741ec9 100644 (file)
 #include <vnet/adj/adj_midchain.h>
 #include <vnet/ethernet/arp_packet.h>
 #include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/load_balance.h>
 #include <vnet/fib/fib_walk.h>
-
-/**
- * The two midchain tx feature node indices
- */
-static u32 adj_midchain_tx_feature_node[VNET_LINK_NUM];
-static u32 adj_midchain_tx_no_count_feature_node[VNET_LINK_NUM];
+#include <vnet/fib/fib_entry.h>
 
 /**
  * @brief Trace data for packets traversing the midchain tx node
@@ -45,163 +41,158 @@ adj_midchain_tx_inline (vlib_main_t * vm,
                        vlib_frame_t * frame,
                        int interface_count)
 {
-    u32 * from, * to_next, n_left_from, n_left_to_next;
-    u32 next_index;
+    vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+    u16 nexts[VLIB_FRAME_SIZE], *next;
+    u32 * from, n_left, thread_index;
     vnet_main_t *vnm = vnet_get_main ();
     vnet_interface_main_t *im = &vnm->interface_main;
-    u32 cpu_index = vm->cpu_index;
 
-    /* Vector of buffer / pkt indices we're supposed to process */
+    thread_index = vm->thread_index;
+    n_left = frame->n_vectors;
     from = vlib_frame_vector_args (frame);
 
-    /* Number of buffers / pkts */
-    n_left_from = frame->n_vectors;
+    vlib_get_buffers (vm, from, bufs, n_left);
 
-    /* Speculatively send the first buffer to the last disposition we used */
-    next_index = node->cached_next_index;
+    next = nexts;
+    b = bufs;
 
-    while (n_left_from > 0)
+    while (n_left > 8)
     {
-       /* set up to enqueue to our disposition with index = next_index */
-       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+        u32 adj_index0, adj_index1, adj_index2, adj_index3;
+        const ip_adjacency_t *adj0, *adj1, *adj2, *adj3;
+        const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
 
+        /* Prefetch next iteration. */
+        {
+            vlib_prefetch_buffer_header (b[4], LOAD);
+            vlib_prefetch_buffer_header (b[5], LOAD);
+            vlib_prefetch_buffer_header (b[6], LOAD);
+            vlib_prefetch_buffer_header (b[7], LOAD);
+        }
 
-       while (n_left_from >= 4 && n_left_to_next > 2)
-       {
-           u32 bi0, adj_index0, next0;
-           const ip_adjacency_t * adj0;
-           const dpo_id_t *dpo0;
-           vlib_buffer_t * b0;
-           u32 bi1, adj_index1, next1;
-           const ip_adjacency_t * adj1;
-           const dpo_id_t *dpo1;
-           vlib_buffer_t * b1;
-
-           /* Prefetch next iteration. */
-           {
-               vlib_buffer_t * p2, * p3;
-
-               p2 = vlib_get_buffer (vm, from[2]);
-               p3 = vlib_get_buffer (vm, from[3]);
-
-               vlib_prefetch_buffer_header (p2, LOAD);
-               vlib_prefetch_buffer_header (p3, LOAD);
-
-               CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
-               CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
-           }
-
-           bi0 = from[0];
-           to_next[0] = bi0;
-           bi1 = from[1];
-           to_next[1] = bi1;
-
-           from += 2;
-           to_next += 2;
-           n_left_from -= 2;
-           n_left_to_next -= 2;
-
-           b0 = vlib_get_buffer(vm, bi0);
-           b1 = vlib_get_buffer(vm, bi1);
-
-           /* Follow the DPO on which the midchain is stacked */
-           adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
-           adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
-
-           adj0 = adj_get(adj_index0);
-           adj1 = adj_get(adj_index1);
-
-           dpo0 = &adj0->sub_type.midchain.next_dpo;
-           dpo1 = &adj1->sub_type.midchain.next_dpo;
-
-           next0 = dpo0->dpoi_next_node;
-           next1 = dpo1->dpoi_next_node;
-
-           vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-           vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
-           if (interface_count)
-           {
-               vlib_increment_combined_counter (im->combined_sw_if_counters
-                                                + VNET_INTERFACE_COUNTER_TX,
-                                                cpu_index,
-                                                adj0->rewrite_header.sw_if_index,
-                                                1,
-                                                vlib_buffer_length_in_chain (vm, b0));
-               vlib_increment_combined_counter (im->combined_sw_if_counters
-                                                + VNET_INTERFACE_COUNTER_TX,
-                                                cpu_index,
-                                                adj1->rewrite_header.sw_if_index,
-                                                1,
-                                                vlib_buffer_length_in_chain (vm, b1));
-           }
-
-           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-           {
-               adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
-                                                             b0, sizeof (*tr));
-               tr->ai = adj_index0;
-           }
-           if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
-           {
-               adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
-                                                             b1, sizeof (*tr));
-               tr->ai = adj_index1;
-           }
-
-           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-                                            to_next, n_left_to_next,
-                                            bi0, bi1,
-                                            next0, next1);
-       }
-       while (n_left_from > 0 && n_left_to_next > 0)
-       {
-           u32 bi0, adj_index0, next0;
-           const ip_adjacency_t * adj0;
-           const dpo_id_t *dpo0;
-           vlib_buffer_t * b0;
-
-           bi0 = from[0];
-           to_next[0] = bi0;
-           from += 1;
-           to_next += 1;
-           n_left_from -= 1;
-           n_left_to_next -= 1;
-
-           b0 = vlib_get_buffer(vm, bi0);
-
-           /* Follow the DPO on which the midchain is stacked */
-           adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
-           adj0 = adj_get(adj_index0);
-           dpo0 = &adj0->sub_type.midchain.next_dpo;
-           next0 = dpo0->dpoi_next_node;
-           vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
-           if (interface_count)
-           {
-               vlib_increment_combined_counter (im->combined_sw_if_counters
-                                                + VNET_INTERFACE_COUNTER_TX,
-                                                cpu_index,
-                                                adj0->rewrite_header.sw_if_index,
-                                                1,
-                                                vlib_buffer_length_in_chain (vm, b0));
-           }
-
-           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
-           {
-               adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
-                                                             b0, sizeof (*tr));
-               tr->ai = adj_index0;
-           }
-
-           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                            to_next, n_left_to_next,
-                                            bi0, next0);
-       }
+        /* Follow the DPO on which the midchain is stacked */
+        adj_index0 = vnet_buffer(b[0])->ip.adj_index[VLIB_TX];
+        adj_index1 = vnet_buffer(b[1])->ip.adj_index[VLIB_TX];
+        adj_index2 = vnet_buffer(b[2])->ip.adj_index[VLIB_TX];
+        adj_index3 = vnet_buffer(b[3])->ip.adj_index[VLIB_TX];
+
+        adj0 = adj_get(adj_index0);
+        adj1 = adj_get(adj_index1);
+        adj2 = adj_get(adj_index2);
+        adj3 = adj_get(adj_index3);
+
+        dpo0 = &adj0->sub_type.midchain.next_dpo;
+        dpo1 = &adj1->sub_type.midchain.next_dpo;
+        dpo2 = &adj2->sub_type.midchain.next_dpo;
+        dpo3 = &adj3->sub_type.midchain.next_dpo;
+
+        next[0] = dpo0->dpoi_next_node;
+        next[1] = dpo1->dpoi_next_node;
+        next[2] = dpo2->dpoi_next_node;
+        next[3] = dpo3->dpoi_next_node;
+
+        vnet_buffer(b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+        vnet_buffer(b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+        vnet_buffer(b[2])->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+        vnet_buffer(b[3])->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+
+        if (interface_count)
+        {
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_TX,
+                                             thread_index,
+                                             adj0->rewrite_header.sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b[0]));
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_TX,
+                                             thread_index,
+                                             adj1->rewrite_header.sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b[1]));
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_TX,
+                                             thread_index,
+                                             adj2->rewrite_header.sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b[2]));
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_TX,
+                                             thread_index,
+                                             adj3->rewrite_header.sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b[3]));
+        }
+
+        if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE))
+        {
+            if (PREDICT_FALSE(b[0]->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+                                                              b[0], sizeof (*tr));
+                tr->ai = adj_index0;
+            }
+            if (PREDICT_FALSE(b[1]->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+                                                              b[1], sizeof (*tr));
+                tr->ai = adj_index1;
+            }
+            if (PREDICT_FALSE(b[2]->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+                                                              b[2], sizeof (*tr));
+                tr->ai = adj_index2;
+            }
+            if (PREDICT_FALSE(b[3]->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+                                                              b[3], sizeof (*tr));
+                tr->ai = adj_index3;
+            }
+        }
+        n_left -= 4;
+        b += 4;
+        next += 4;
+    }
+
+    while (n_left)
+    {
+        const ip_adjacency_t * adj0;
+        const dpo_id_t *dpo0;
+        u32 adj_index0;
+
+        /* Follow the DPO on which the midchain is stacked */
+        adj_index0 = vnet_buffer(b[0])->ip.adj_index[VLIB_TX];
+        adj0 = adj_get(adj_index0);
+        dpo0 = &adj0->sub_type.midchain.next_dpo;
+        next[0] = dpo0->dpoi_next_node;
+        vnet_buffer(b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+        if (interface_count)
+        {
+            vlib_increment_combined_counter (im->combined_sw_if_counters
+                                             + VNET_INTERFACE_COUNTER_TX,
+                                             thread_index,
+                                             adj0->rewrite_header.sw_if_index,
+                                             1,
+                                             vlib_buffer_length_in_chain (vm, b[0]));
+        }
+
+        if (PREDICT_FALSE(b[0]->flags & VLIB_BUFFER_IS_TRACED))
+        {
+            adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
+                                                          b[0], sizeof (*tr));
+            tr->ai = adj_index0;
+        }
 
-       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+        n_left -= 1;
+        b += 1;
+        next += 1;
     }
 
+    vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
     return frame->n_vectors;
 }
 
@@ -227,7 +218,7 @@ adj_midchain_tx (vlib_main_t * vm,
     return (adj_midchain_tx_inline(vm, node, frame, 1));
 }
 
-VLIB_REGISTER_NODE (adj_midchain_tx_node, static) = {
+VLIB_REGISTER_NODE (adj_midchain_tx_node) = {
     .function = adj_midchain_tx,
     .name = "adj-midchain-tx",
     .vector_size = sizeof (u32),
@@ -248,79 +239,44 @@ adj_midchain_tx_no_count (vlib_main_t * vm,
     return (adj_midchain_tx_inline(vm, node, frame, 0));
 }
 
-VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node, static) = {
+VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node) = {
     .function = adj_midchain_tx_no_count,
     .name = "adj-midchain-tx-no-count",
     .vector_size = sizeof (u32),
 
     .format_trace = format_adj_midchain_tx_trace,
-
-    .n_next_nodes = 1,
-    .next_nodes = {
-       [0] = "error-drop",
-    },
+    .sibling_of = "adj-midchain-tx",
 };
 
-VNET_FEATURE_INIT (adj_midchain_tx_ip4, static) = {
-    .arc_name = "ip4-output",
-    .node_name = "adj-midchain-tx",
-    .runs_before = VNET_FEATURES ("interface-output"),
-    .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP4],
-};
-VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip4, static) = {
-    .arc_name = "ip4-output",
-    .node_name = "adj-midchain-tx-no-count",
-    .runs_before = VNET_FEATURES ("interface-output"),
-    .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP4],
-};
-VNET_FEATURE_INIT (adj_midchain_tx_ip6, static) = {
-    .arc_name = "ip6-output",
-    .node_name = "adj-midchain-tx",
-    .runs_before = VNET_FEATURES ("interface-output"),
-    .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP6],
-};
-VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip6, static) = {
-    .arc_name = "ip6-output",
-    .node_name = "adj-midchain-tx-no-count",
-    .runs_before = VNET_FEATURES ("interface-output"),
-    .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP6],
-};
-VNET_FEATURE_INIT (adj_midchain_tx_mpls, static) = {
-    .arc_name = "mpls-output",
-    .node_name = "adj-midchain-tx",
-    .runs_before = VNET_FEATURES ("interface-output"),
-    .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_MPLS],
-};
-VNET_FEATURE_INIT (adj_midchain_tx_no_count_mpls, static) = {
-    .arc_name = "mpls-output",
-    .node_name = "adj-midchain-tx-no-count",
-    .runs_before = VNET_FEATURES ("interface-output"),
-    .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_MPLS],
-};
-VNET_FEATURE_INIT (adj_midchain_tx_ethernet, static) = {
-    .arc_name = "ethernet-output",
-    .node_name = "adj-midchain-tx",
-    .runs_before = VNET_FEATURES ("error-drop"),
-    .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_ETHERNET],
-};
-VNET_FEATURE_INIT (adj_midchain_tx_no_count_ethernet, static) = {
-    .arc_name = "ethernet-output",
-    .node_name = "adj-midchain-tx-no-count",
-    .runs_before = VNET_FEATURES ("error-drop"),
-    .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_ETHERNET],
-};
-VNET_FEATURE_INIT (adj_midchain_tx_nsh, static) = {
-    .arc_name = "nsh-output",
-    .node_name = "adj-midchain-tx",
-    .runs_before = VNET_FEATURES ("error-drop"),
-    .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_NSH],
-};
-VNET_FEATURE_INIT (adj_midchain_tx_no_count_nsh, static) = {
-    .arc_name = "nsh-output",
-    .node_name = "adj-midchain-tx-no-count",
-    .runs_before = VNET_FEATURES ("error-drop"),
-    .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_NSH],
-};
+#ifndef CLIB_MARCH_VARIANT
+
+u8
+adj_is_midchain (adj_index_t ai)
+{
+    ip_adjacency_t *adj;
+
+    adj = adj_get(ai);
+
+    switch (adj->lookup_next_index)
+    {
+    case IP_LOOKUP_NEXT_MIDCHAIN:
+    case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+        return (1);
+    case IP_LOOKUP_NEXT_ARP:
+    case IP_LOOKUP_NEXT_GLEAN:
+    case IP_LOOKUP_NEXT_BCAST:
+    case IP_LOOKUP_NEXT_MCAST:
+    case IP_LOOKUP_NEXT_DROP:
+    case IP_LOOKUP_NEXT_PUNT:
+    case IP_LOOKUP_NEXT_LOCAL:
+    case IP_LOOKUP_NEXT_REWRITE:
+    case IP_LOOKUP_NEXT_ICMP_ERROR:
+    case IP_LOOKUP_N_NEXT:
+        return (0);
+    }
+
+    return (0);
+}
 
 static inline u32
 adj_get_midchain_node (vnet_link_t link)
@@ -346,7 +302,7 @@ adj_get_midchain_node (vnet_link_t link)
 static u8
 adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
 {
-  u8 arc = (u8) ~0;
+    u8 arc = (u8) ~0;
     switch (adj->ia_link)
     {
     case VNET_LINK_IP4:
@@ -393,49 +349,67 @@ adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
 }
 
 /**
- * adj_nbr_midchain_update_rewrite
+ * adj_midchain_setup
  *
- * Update the adjacency's rewrite string. A NULL string implies the
- * rewrite is reset (i.e. when ARP/ND etnry is gone).
- * NB: the adj being updated may be handling traffic in the DP.
+ * Setup the adj as a mid-chain
  */
 void
-adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
-                                adj_midchain_fixup_t fixup,
-                                adj_flags_t flags,
-                                u8 *rewrite)
+adj_midchain_teardown (ip_adjacency_t *adj)
 {
-    u32 feature_index, tx_node;
+    vlib_main_t *vm = vlib_get_main();
+
+    dpo_reset(&adj->sub_type.midchain.next_dpo);
+
+    vlib_worker_thread_barrier_sync(vm);
+    vnet_feature_modify_end_node(
+        adj_midchain_get_feature_arc_index_for_link_type (adj),
+        adj->rewrite_header.sw_if_index,
+        vlib_get_node_by_name (vlib_get_main(),
+                               (u8*) "interface-output")->index);
+    vlib_worker_thread_barrier_release(vm);
+}
+
+/**
+ * adj_midchain_setup
+ *
+ * Setup the adj as a mid-chain
+ */
+void
+adj_midchain_setup (adj_index_t adj_index,
+                    adj_midchain_fixup_t fixup,
+                    const void *data,
+                    adj_flags_t flags)
+{
+    vlib_main_t *vm = vlib_get_main();
     ip_adjacency_t *adj;
-    u8 arc_index;
+    u32 tx_node;
 
     ASSERT(ADJ_INDEX_INVALID != adj_index);
 
     adj = adj_get(adj_index);
 
-    /*
-     * one time only update. since we don't support chainging the tunnel
-     * src,dst, this is all we need.
-     */
-    ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP);
-    /*
-     * tunnels can always provide a rewrite.
-     */
-    ASSERT(NULL != rewrite);
-
     adj->sub_type.midchain.fixup_func = fixup;
+    adj->sub_type.midchain.fixup_data = data;
+    adj->sub_type.midchain.fei = FIB_NODE_INDEX_INVALID;
     adj->ia_flags |= flags;
 
-    arc_index = adj_midchain_get_feature_arc_index_for_link_type (adj);
-    feature_index = (flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ?
-                    adj_midchain_tx_no_count_feature_node[adj->ia_link] :
-                    adj_midchain_tx_feature_node[adj->ia_link];
+    if (flags & ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR)
+    {
+        adj->rewrite_header.flags |= VNET_REWRITE_FIXUP_IP4_O_4;
+    }
+    else
+    {
+        adj->rewrite_header.flags &= ~VNET_REWRITE_FIXUP_IP4_O_4;
+    }
 
     tx_node = adj_nbr_midchain_get_tx_node(adj);
 
-    vnet_feature_enable_disable_with_index (arc_index, feature_index,
-                                           adj->rewrite_header.sw_if_index,
-                                           1 /* enable */, 0, 0);
+    vlib_worker_thread_barrier_sync(vm);
+    vnet_feature_modify_end_node(
+        adj_midchain_get_feature_arc_index_for_link_type (adj),
+        adj->rewrite_header.sw_if_index,
+        tx_node);
+    vlib_worker_thread_barrier_release(vm);
 
     /*
      * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
@@ -445,19 +419,102 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
      * need to get to the stacked child's node.
      */
     dpo_stack_from_node(tx_node,
-                       &adj->sub_type.midchain.next_dpo,
-                       drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+                        &adj->sub_type.midchain.next_dpo,
+                        drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
+}
+
+/**
+ * adj_nbr_midchain_update_rewrite
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewrite is reset (i.e. when ARP/ND entry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
+                                adj_midchain_fixup_t fixup,
+                                 const void *fixup_data,
+                                adj_flags_t flags,
+                                u8 *rewrite)
+{
+    ip_adjacency_t *adj;
+
+    ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+    adj = adj_get(adj_index);
 
     /*
-     * update the rewirte with the workers paused.
+     * one time only update. since we don't support changing the tunnel
+     * src,dst, this is all we need.
+     */
+    if (adj->lookup_next_index != IP_LOOKUP_NEXT_MIDCHAIN &&
+        adj->lookup_next_index != IP_LOOKUP_NEXT_MCAST_MIDCHAIN)
+    {
+        adj_midchain_setup(adj_index, fixup, fixup_data, flags);
+    }
+
+    /*
+     * update the rewrite with the workers paused.
      */
     adj_nbr_update_rewrite_internal(adj,
                                    IP_LOOKUP_NEXT_MIDCHAIN,
                                    adj_get_midchain_node(adj->ia_link),
-                                   tx_node,
+                                   adj_nbr_midchain_get_tx_node(adj),
                                    rewrite);
 }
 
+void
+adj_nbr_midchain_update_next_node (adj_index_t adj_index,
+                                   u32 next_node)
+{
+    ip_adjacency_t *adj;
+    vlib_main_t * vm;
+
+    ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+    adj = adj_get(adj_index);
+    vm = vlib_get_main();
+
+    vlib_worker_thread_barrier_sync(vm);
+
+    adj->rewrite_header.next_index = vlib_node_add_next(vlib_get_main(),
+                                                        adj->ia_node_index,
+                                                        next_node);
+
+    vnet_feature_modify_end_node(
+        adj_midchain_get_feature_arc_index_for_link_type (adj),
+        adj->rewrite_header.sw_if_index,
+        next_node);
+
+    vlib_worker_thread_barrier_release(vm);
+}
+
+void
+adj_nbr_midchain_reset_next_node(adj_index_t adj_index)
+{
+    ip_adjacency_t *adj;
+    vlib_main_t * vm;
+
+    ASSERT(ADJ_INDEX_INVALID != adj_index);
+
+    adj = adj_get(adj_index);
+    vm = vlib_get_main();
+
+    vlib_worker_thread_barrier_sync(vm);
+
+    adj->rewrite_header.next_index =
+        vlib_node_add_next(vlib_get_main(),
+                           adj->ia_node_index,
+                           adj_nbr_midchain_get_tx_node(adj));
+
+    vnet_feature_modify_end_node(
+        adj_midchain_get_feature_arc_index_for_link_type (adj),
+        adj->rewrite_header.sw_if_index,
+        adj_nbr_midchain_get_tx_node(adj));
+
+    vlib_worker_thread_barrier_release(vm);
+}
+
 /**
  * adj_nbr_midchain_unstack
  *
@@ -466,23 +523,103 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
 void
 adj_nbr_midchain_unstack (adj_index_t adj_index)
 {
+    fib_node_index_t *entry_indicies, tmp;
     ip_adjacency_t *adj;
 
     ASSERT(ADJ_INDEX_INVALID != adj_index);
+    adj = adj_get (adj_index);
 
-    adj = adj_get(adj_index);
+    /*
+     * check to see if this unstacking breaks a recursion loop
+     */
+    entry_indicies = NULL;
+    tmp = adj->sub_type.midchain.fei;
+    adj->sub_type.midchain.fei = FIB_NODE_INDEX_INVALID;
+
+    if (FIB_NODE_INDEX_INVALID != tmp)
+    {
+        fib_entry_recursive_loop_detect(tmp, &entry_indicies);
+        vec_free(entry_indicies);
+    }
 
     /*
      * stack on the drop
      */
     dpo_stack(DPO_ADJACENCY_MIDCHAIN,
-             vnet_link_to_dpo_proto(adj->ia_link),
-             &adj->sub_type.midchain.next_dpo,
-             drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
-
+              vnet_link_to_dpo_proto(adj->ia_link),
+              &adj->sub_type.midchain.next_dpo,
+              drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
     CLIB_MEMORY_BARRIER();
 }
 
+void
+adj_nbr_midchain_stack_on_fib_entry (adj_index_t ai,
+                                     fib_node_index_t fei,
+                                     fib_forward_chain_type_t fct)
+{
+    fib_node_index_t *entry_indicies;
+    dpo_id_t tmp = DPO_INVALID;
+    ip_adjacency_t *adj;
+
+    adj = adj_get (ai);
+
+    /*
+     * check to see if this stacking will form a recursion loop
+     */
+    entry_indicies = NULL;
+    adj->sub_type.midchain.fei = fei;
+
+    if (fib_entry_recursive_loop_detect(adj->sub_type.midchain.fei, &entry_indicies))
+    {
+        /*
+         * loop formed, stack on the drop.
+         */
+        dpo_copy(&tmp, drop_dpo_get(fib_forw_chain_type_to_dpo_proto(fct)));
+    }
+    else
+    {
+        fib_entry_contribute_forwarding (fei, fct, &tmp);
+
+        if ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_IP_STACK) &&
+            (DPO_LOAD_BALANCE == tmp.dpoi_type))
+        {
+            /*
+             * do that hash now and stack on the choice.
+             * If the choice is an incomplete adj then we will need a poke when
+             * it becomes complete. This happens since the adj update walk propagates
+             * as far a recursive paths.
+             */
+            const dpo_id_t *choice;
+            load_balance_t *lb;
+            int hash;
+
+            lb = load_balance_get (tmp.dpoi_index);
+
+            if (FIB_FORW_CHAIN_TYPE_UNICAST_IP4 == fct)
+            {
+                hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai),
+                                              lb->lb_hash_config);
+            }
+            else if (FIB_FORW_CHAIN_TYPE_UNICAST_IP6 == fct)
+            {
+                hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai),
+                                              lb->lb_hash_config);
+            }
+            else
+            {
+                hash = 0;
+                ASSERT(0);
+            }
+
+            choice = load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+            dpo_copy (&tmp, choice);
+        }
+    }
+    adj_nbr_midchain_stack (ai, &tmp);
+    dpo_reset(&tmp);
+    vec_free(entry_indicies);
+}
+
 /**
  * adj_nbr_midchain_stack
  */
@@ -496,13 +633,41 @@ adj_nbr_midchain_stack (adj_index_t adj_index,
 
     adj = adj_get(adj_index);
 
-    ASSERT(IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index);
+    ASSERT((IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index) ||
+           (IP_LOOKUP_NEXT_MCAST_MIDCHAIN == adj->lookup_next_index));
 
     dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj),
                        &adj->sub_type.midchain.next_dpo,
                        next);
 }
 
+int
+adj_ndr_midchain_recursive_loop_detect (adj_index_t ai,
+                                        fib_node_index_t **entry_indicies)
+{
+    fib_node_index_t *entry_index, *entries;
+    ip_adjacency_t * adj;
+
+    adj = adj_get(ai);
+    entries = *entry_indicies;
+
+    vec_foreach(entry_index, entries)
+    {
+        if (*entry_index == adj->sub_type.midchain.fei)
+        {
+            /*
+             * The entry this midchain links to is already in the set
+             * of visited entries, this is a loop
+             */
+            adj->ia_flags |= ADJ_FLAG_MIDCHAIN_LOOPED;
+            return (1);
+        }
+    }
+
+    adj->ia_flags &= ~ADJ_FLAG_MIDCHAIN_LOOPED;
+    return (0);
+}
+
 u8*
 format_adj_midchain (u8* s, va_list *ap)
 {
@@ -511,15 +676,25 @@ format_adj_midchain (u8* s, va_list *ap)
     ip_adjacency_t * adj = adj_get(index);
 
     s = format (s, "%U", format_vnet_link, adj->ia_link);
-    s = format (s, " via %U ",
-               format_ip46_address, &adj->sub_type.nbr.next_hop);
+    if (adj->rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)
+        s = format(s, " [features]");
+    s = format (s, " via %U",
+               format_ip46_address, &adj->sub_type.nbr.next_hop,
+               adj_proto_to_46(adj->ia_nh_proto));
     s = format (s, " %U",
                format_vnet_rewrite,
                &adj->rewrite_header, sizeof (adj->rewrite_data), indent);
-    s = format (s, "\n%Ustacked-on:\n%U%U",
-               format_white_space, indent,
-               format_white_space, indent+2,
-               format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
+    s = format (s, "\n%Ustacked-on",
+                format_white_space, indent);
+
+    if (FIB_NODE_INDEX_INVALID != adj->sub_type.midchain.fei)
+    {
+        s = format (s, " entry:%d", adj->sub_type.midchain.fei);
+
+    }
+    s = format (s, ":\n%U%U",
+                format_white_space, indent+2,
+                format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2);
 
     return (s);
 }
@@ -539,6 +714,7 @@ const static dpo_vft_t adj_midchain_dpo_vft = {
     .dv_lock = adj_dpo_lock,
     .dv_unlock = adj_dpo_unlock,
     .dv_format = format_adj_midchain,
+    .dv_get_urpf = adj_dpo_get_urpf,
 };
 
 /**
@@ -588,3 +764,5 @@ adj_midchain_module_init (void)
 {
     dpo_register(DPO_ADJACENCY_MIDCHAIN, &adj_midchain_dpo_vft, midchain_nodes);
 }
+
+#endif