ip: add support for buffer offload metadata in ip midchain
[vpp.git] / src / vnet / adj / adj_midchain.c
index 050c25a..8e6a940 100644 (file)
 #include <vnet/adj/adj_l2.h>
 #include <vnet/adj/adj_nsh.h>
 #include <vnet/adj/adj_midchain.h>
-#include <vnet/ethernet/arp_packet.h>
 #include <vnet/dpo/drop_dpo.h>
 #include <vnet/dpo/load_balance.h>
 #include <vnet/fib/fib_walk.h>
 #include <vnet/fib/fib_entry.h>
-
-/**
- * @brief Trace data for packets traversing the midchain tx node
- */
-typedef struct adj_midchain_tx_trace_t_
-{
-    /**
-     * @brief the midchain adj we are traversing
-     */
-    adj_index_t ai;
-} adj_midchain_tx_trace_t;
-
-always_inline uword
-adj_midchain_tx_inline (vlib_main_t * vm,
-                       vlib_node_runtime_t * node,
-                       vlib_frame_t * frame,
-                       int interface_count)
-{
-    vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
-    u16 nexts[VLIB_FRAME_SIZE], *next;
-    u32 * from, n_left, thread_index;
-    vnet_main_t *vnm = vnet_get_main ();
-    vnet_interface_main_t *im = &vnm->interface_main;
-
-    thread_index = vm->thread_index;
-    n_left = frame->n_vectors;
-    from = vlib_frame_vector_args (frame);
-
-    vlib_get_buffers (vm, from, bufs, n_left);
-
-    next = nexts;
-    b = bufs;
-
-    while (n_left > 8)
-    {
-        u32 adj_index0, adj_index1, adj_index2, adj_index3;
-        const ip_adjacency_t *adj0, *adj1, *adj2, *adj3;
-        const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
-
-        /* Prefetch next iteration. */
-        {
-            vlib_prefetch_buffer_header (b[4], LOAD);
-            vlib_prefetch_buffer_header (b[5], LOAD);
-            vlib_prefetch_buffer_header (b[6], LOAD);
-            vlib_prefetch_buffer_header (b[7], LOAD);
-        }
-
-        /* Follow the DPO on which the midchain is stacked */
-        adj_index0 = vnet_buffer(b[0])->ip.adj_index[VLIB_TX];
-        adj_index1 = vnet_buffer(b[1])->ip.adj_index[VLIB_TX];
-        adj_index2 = vnet_buffer(b[2])->ip.adj_index[VLIB_TX];
-        adj_index3 = vnet_buffer(b[3])->ip.adj_index[VLIB_TX];
-
-        adj0 = adj_get(adj_index0);
-        adj1 = adj_get(adj_index1);
-        adj2 = adj_get(adj_index2);
-        adj3 = adj_get(adj_index3);
-
-        dpo0 = &adj0->sub_type.midchain.next_dpo;
-        dpo1 = &adj1->sub_type.midchain.next_dpo;
-        dpo2 = &adj2->sub_type.midchain.next_dpo;
-        dpo3 = &adj3->sub_type.midchain.next_dpo;
-
-        next[0] = dpo0->dpoi_next_node;
-        next[1] = dpo1->dpoi_next_node;
-        next[2] = dpo2->dpoi_next_node;
-        next[3] = dpo3->dpoi_next_node;
-
-        vnet_buffer(b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-        vnet_buffer(b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-        vnet_buffer(b[2])->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
-        vnet_buffer(b[3])->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
-
-        if (interface_count)
-        {
-            vlib_increment_combined_counter (im->combined_sw_if_counters
-                                             + VNET_INTERFACE_COUNTER_TX,
-                                             thread_index,
-                                             adj0->rewrite_header.sw_if_index,
-                                             1,
-                                             vlib_buffer_length_in_chain (vm, b[0]));
-            vlib_increment_combined_counter (im->combined_sw_if_counters
-                                             + VNET_INTERFACE_COUNTER_TX,
-                                             thread_index,
-                                             adj1->rewrite_header.sw_if_index,
-                                             1,
-                                             vlib_buffer_length_in_chain (vm, b[1]));
-            vlib_increment_combined_counter (im->combined_sw_if_counters
-                                             + VNET_INTERFACE_COUNTER_TX,
-                                             thread_index,
-                                             adj2->rewrite_header.sw_if_index,
-                                             1,
-                                             vlib_buffer_length_in_chain (vm, b[2]));
-            vlib_increment_combined_counter (im->combined_sw_if_counters
-                                             + VNET_INTERFACE_COUNTER_TX,
-                                             thread_index,
-                                             adj3->rewrite_header.sw_if_index,
-                                             1,
-                                             vlib_buffer_length_in_chain (vm, b[3]));
-        }
-
-        if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE))
-        {
-            if (PREDICT_FALSE(b[0]->flags & VLIB_BUFFER_IS_TRACED))
-            {
-                adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
-                                                              b[0], sizeof (*tr));
-                tr->ai = adj_index0;
-            }
-            if (PREDICT_FALSE(b[1]->flags & VLIB_BUFFER_IS_TRACED))
-            {
-                adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
-                                                              b[1], sizeof (*tr));
-                tr->ai = adj_index1;
-            }
-            if (PREDICT_FALSE(b[2]->flags & VLIB_BUFFER_IS_TRACED))
-            {
-                adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
-                                                              b[2], sizeof (*tr));
-                tr->ai = adj_index2;
-            }
-            if (PREDICT_FALSE(b[3]->flags & VLIB_BUFFER_IS_TRACED))
-            {
-                adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
-                                                              b[3], sizeof (*tr));
-                tr->ai = adj_index3;
-            }
-        }
-        n_left -= 4;
-        b += 4;
-        next += 4;
-    }
-
-    while (n_left)
-    {
-        const ip_adjacency_t * adj0;
-        const dpo_id_t *dpo0;
-        u32 adj_index0;
-
-        /* Follow the DPO on which the midchain is stacked */
-        adj_index0 = vnet_buffer(b[0])->ip.adj_index[VLIB_TX];
-        adj0 = adj_get(adj_index0);
-        dpo0 = &adj0->sub_type.midchain.next_dpo;
-        next[0] = dpo0->dpoi_next_node;
-        vnet_buffer(b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
-        if (interface_count)
-        {
-            vlib_increment_combined_counter (im->combined_sw_if_counters
-                                             + VNET_INTERFACE_COUNTER_TX,
-                                             thread_index,
-                                             adj0->rewrite_header.sw_if_index,
-                                             1,
-                                             vlib_buffer_length_in_chain (vm, b[0]));
-        }
-
-        if (PREDICT_FALSE(b[0]->flags & VLIB_BUFFER_IS_TRACED))
-        {
-            adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node,
-                                                          b[0], sizeof (*tr));
-            tr->ai = adj_index0;
-        }
-
-        n_left -= 1;
-        b += 1;
-        next += 1;
-    }
-
-    vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
-
-    return frame->n_vectors;
-}
-
-static u8 *
-format_adj_midchain_tx_trace (u8 * s, va_list * args)
-{
-    CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-    CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-    adj_midchain_tx_trace_t *tr = va_arg (*args, adj_midchain_tx_trace_t*);
-
-    s = format(s, "adj-midchain:[%d]:%U", tr->ai,
-              format_ip_adjacency, tr->ai,
-              FORMAT_IP_ADJACENCY_NONE);
-
-    return (s);
-}
-
-static uword
-adj_midchain_tx (vlib_main_t * vm,
-                vlib_node_runtime_t * node,
-                vlib_frame_t * frame)
-{
-    return (adj_midchain_tx_inline(vm, node, frame, 1));
-}
-
-VLIB_REGISTER_NODE (adj_midchain_tx_node) = {
-    .function = adj_midchain_tx,
-    .name = "adj-midchain-tx",
-    .vector_size = sizeof (u32),
-
-    .format_trace = format_adj_midchain_tx_trace,
-
-    .n_next_nodes = 1,
-    .next_nodes = {
-       [0] = "error-drop",
-    },
-};
-
-static uword
-adj_midchain_tx_no_count (vlib_main_t * vm,
-                         vlib_node_runtime_t * node,
-                         vlib_frame_t * frame)
-{
-    return (adj_midchain_tx_inline(vm, node, frame, 0));
-}
-
-VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node) = {
-    .function = adj_midchain_tx_no_count,
-    .name = "adj-midchain-tx-no-count",
-    .vector_size = sizeof (u32),
-
-    .format_trace = format_adj_midchain_tx_trace,
-    .sibling_of = "adj-midchain-tx",
-};
-
-#ifndef CLIB_MARCH_VARIANT
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
 
 u8
 adj_is_midchain (adj_index_t ai)
@@ -300,52 +75,37 @@ adj_get_midchain_node (vnet_link_t link)
 }
 
 static u8
-adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
+adj_midchain_get_feature_arc_index (const ip_adjacency_t *adj)
 {
-    u8 arc = (u8) ~0;
     switch (adj->ia_link)
     {
     case VNET_LINK_IP4:
-       {
-           arc = ip4_main.lookup_main.output_feature_arc_index;
-           break;
-       }
+        return ip4_main.lookup_main.output_feature_arc_index;
     case VNET_LINK_IP6:
-       {
-           arc = ip6_main.lookup_main.output_feature_arc_index;
-           break;
-       }
+        return ip6_main.lookup_main.output_feature_arc_index;
     case VNET_LINK_MPLS:
-       {
-           arc = mpls_main.output_feature_arc_index;
-           break;
-       }
+        return mpls_main.output_feature_arc_index;
     case VNET_LINK_ETHERNET:
-       {
-           arc = ethernet_main.output_feature_arc_index;
-           break;
-       }
+        return ethernet_main.output_feature_arc_index;
     case VNET_LINK_NSH:
-        {
-          arc = nsh_main_dummy.output_feature_arc_index;
-          break;
-        }
     case VNET_LINK_ARP:
-       ASSERT(0);
        break;
     }
-
-    ASSERT (arc != (u8) ~0);
-
-    return (arc);
+    ASSERT (0);
+    return (0);
 }
 
 static u32
 adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
 {
-    return ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ?
-            adj_midchain_tx_no_count_node.index :
-            adj_midchain_tx_node.index);
+    return (adj_midchain_tx.index);
+}
+
+static u32
+adj_nbr_midchain_get_next_node (ip_adjacency_t *adj)
+{
+    return (vnet_feature_get_end_node(adj_midchain_get_feature_arc_index(adj),
+                                      adj->rewrite_header.sw_if_index));
 }
 
 /**
@@ -356,17 +116,7 @@ adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
 void
 adj_midchain_teardown (ip_adjacency_t *adj)
 {
-    vlib_main_t *vm = vlib_get_main();
-
     dpo_reset(&adj->sub_type.midchain.next_dpo);
-
-    vlib_worker_thread_barrier_sync(vm);
-    adj->ia_cfg_index = vnet_feature_modify_end_node(
-        adj_midchain_get_feature_arc_index_for_link_type (adj),
-        adj->rewrite_header.sw_if_index,
-        vlib_get_node_by_name (vlib_get_main(),
-                               (u8*) "interface-output")->index);
-    vlib_worker_thread_barrier_release(vm);
 }
 
 /**
@@ -380,9 +130,7 @@ adj_midchain_setup (adj_index_t adj_index,
                     const void *data,
                     adj_flags_t flags)
 {
-    vlib_main_t *vm = vlib_get_main();
     ip_adjacency_t *adj;
-    u32 tx_node;
 
     ASSERT(ADJ_INDEX_INVALID != adj_index);
 
@@ -401,15 +149,10 @@ adj_midchain_setup (adj_index_t adj_index,
     {
         adj->rewrite_header.flags &= ~VNET_REWRITE_FIXUP_IP4_O_4;
     }
-
-    tx_node = adj_nbr_midchain_get_tx_node(adj);
-
-    vlib_worker_thread_barrier_sync(vm);
-    adj->ia_cfg_index = vnet_feature_modify_end_node(
-        adj_midchain_get_feature_arc_index_for_link_type (adj),
-        adj->rewrite_header.sw_if_index,
-        tx_node);
-    vlib_worker_thread_barrier_release(vm);
+    if (!(flags & ADJ_FLAG_MIDCHAIN_FIXUP_FLOW_HASH))
+    {
+        adj->rewrite_header.flags &= ~VNET_REWRITE_FIXUP_FLOW_HASH;
+    }
 
     /*
      * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
@@ -418,7 +161,7 @@ adj_midchain_setup (adj_index_t adj_index,
      * node are any output features, then the midchain-tx.  from there we
      * need to get to the stacked child's node.
      */
-    dpo_stack_from_node(tx_node,
+    dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj),
                         &adj->sub_type.midchain.next_dpo,
                         drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
 }
@@ -459,7 +202,7 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
     adj_nbr_update_rewrite_internal(adj,
                                    IP_LOOKUP_NEXT_MIDCHAIN,
                                    adj_get_midchain_node(adj->ia_link),
-                                   adj_nbr_midchain_get_tx_node(adj),
+                                   adj_nbr_midchain_get_next_node(adj),
                                    rewrite);
 }
 
@@ -481,11 +224,6 @@ adj_nbr_midchain_update_next_node (adj_index_t adj_index,
                                                         adj->ia_node_index,
                                                         next_node);
 
-    adj->ia_cfg_index = vnet_feature_modify_end_node(
-        adj_midchain_get_feature_arc_index_for_link_type (adj),
-        adj->rewrite_header.sw_if_index,
-        next_node);
-
     vlib_worker_thread_barrier_release(vm);
 }
 
@@ -505,12 +243,7 @@ adj_nbr_midchain_reset_next_node (adj_index_t adj_index)
     adj->rewrite_header.next_index =
         vlib_node_add_next(vlib_get_main(),
                            adj->ia_node_index,
-                           adj_nbr_midchain_get_tx_node(adj));
-
-    adj->ia_cfg_index = vnet_feature_modify_end_node(
-        adj_midchain_get_feature_arc_index_for_link_type (adj),
-        adj->rewrite_header.sw_if_index,
-        adj_nbr_midchain_get_tx_node(adj));
+                           adj_nbr_midchain_get_next_node(adj));
 
     vlib_worker_thread_barrier_release(vm);
 }
@@ -580,39 +313,62 @@ adj_nbr_midchain_stack_on_fib_entry (adj_index_t ai,
     {
         fib_entry_contribute_forwarding (fei, fct, &tmp);
 
-        if ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_IP_STACK) &&
-            (DPO_LOAD_BALANCE == tmp.dpoi_type))
+        if (DPO_LOAD_BALANCE == tmp.dpoi_type)
         {
-            /*
-             * do that hash now and stack on the choice.
-             * If the choice is an incomplete adj then we will need a poke when
-             * it becomes complete. This happens since the adj update walk propagates
-             * as far a recursive paths.
-             */
-            const dpo_id_t *choice;
             load_balance_t *lb;
-            int hash;
 
             lb = load_balance_get (tmp.dpoi_index);
 
-            if (FIB_FORW_CHAIN_TYPE_UNICAST_IP4 == fct)
+            if ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_IP_STACK) ||
+                lb->lb_n_buckets == 1)
             {
-                hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai),
-                                              lb->lb_hash_config);
+                /*
+                 * do that hash now and stack on the choice.
+                 * If the choice is an incomplete adj then we will need a poke when
+                 * it becomes complete. This happens since the adj update walk propagates
+                 * as far a recursive paths.
+                 */
+                const dpo_id_t *choice;
+                int hash;
+
+                if (FIB_FORW_CHAIN_TYPE_UNICAST_IP4 == fct)
+                {
+                    hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai),
+                                                  lb->lb_hash_config);
+                }
+                else if (FIB_FORW_CHAIN_TYPE_UNICAST_IP6 == fct)
+                {
+                    hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai),
+                                                  lb->lb_hash_config);
+                }
+                else
+                {
+                    hash = 0;
+                    ASSERT(0);
+                }
+
+                choice = load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+                dpo_copy (&tmp, choice);
             }
-            else if (FIB_FORW_CHAIN_TYPE_UNICAST_IP6 == fct)
+            else if (lb->lb_n_buckets > 1)
             {
-                hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai),
-                                              lb->lb_hash_config);
+                /*
+                 * the client has chosen not to use the stacking to select a
+                 * bucket, and there are more than one buckets. there's no
+                 * value in using the midchain's fixed rewrite string to select
+                 * the path, so force a flow hash on the inner.
+                 */
+                adj->rewrite_header.flags |= VNET_REWRITE_FIXUP_FLOW_HASH;
             }
-            else
+
+            if (adj->ia_flags & ADJ_FLAG_MIDCHAIN_FIXUP_FLOW_HASH)
             {
-                hash = 0;
-                ASSERT(0);
+                /*
+                 * The client, for reasons unbeknownst to adj, wants to force
+                 * a flow hash on the inner, we will oblige.
+                 */
+                adj->rewrite_header.flags |= VNET_REWRITE_FIXUP_FLOW_HASH;
             }
-
-            choice = load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
-            dpo_copy (&tmp, choice);
         }
     }
     adj_nbr_midchain_stack (ai, &tmp);
@@ -715,6 +471,7 @@ const static dpo_vft_t adj_midchain_dpo_vft = {
     .dv_unlock = adj_dpo_unlock,
     .dv_format = format_adj_midchain,
     .dv_get_urpf = adj_dpo_get_urpf,
+    .dv_get_mtu = adj_dpo_get_mtu,
 };
 
 /**
@@ -764,5 +521,3 @@ adj_midchain_module_init (void)
 {
     dpo_register(DPO_ADJACENCY_MIDCHAIN, &adj_midchain_dpo_vft, midchain_nodes);
 }
-
-#endif