MPLS Nodes Dual Loop 05/4205/2
authorNeale Ranns <[email protected]>
Sat, 10 Dec 2016 21:08:09 +0000 (21:08 +0000)
committerDave Barach <[email protected]>
Sun, 11 Dec 2016 14:14:30 +0000 (14:14 +0000)
Change-Id: Ic54d4cb9dec8e91446b9b4d2b40ed69a14bd4355
Signed-off-by: Neale Ranns <[email protected]>
vnet/vnet/dpo/mpls_label_dpo.c
vnet/vnet/dpo/mpls_label_dpo.h
vnet/vnet/mpls/mpls_output.c
vnet/vnet/mpls/node.c

index 606b7ba..bbdc966 100644 (file)
@@ -54,6 +54,7 @@ mpls_label_dpo_create (mpls_label_t *label_stack,
 
     mld = mpls_label_dpo_alloc();
     mld->mld_n_labels = vec_len(label_stack);
+    mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]);
     mld->mld_payload_proto = payload_proto;
 
     /*
@@ -179,6 +180,163 @@ mpls_label_imposition_inline (vlib_main_t * vm,
 
         vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
 
+        while (n_left_from >= 4 && n_left_to_next >= 2)
+        {
+            mpls_unicast_header_t *hdr0, *hdr1;
+            mpls_label_dpo_t *mld0, *mld1;
+            u32 bi0, mldi0, bi1, mldi1;
+            vlib_buffer_t * b0, *b1;
+            u32 next0, next1;
+            u8 ttl0, ttl1;
+
+            bi0 = to_next[0] = from[0];
+            bi1 = to_next[1] = from[1];
+
+            /* Prefetch next iteration. */
+            {
+                vlib_buffer_t * p2, * p3;
+
+                p2 = vlib_get_buffer (vm, from[2]);
+                p3 = vlib_get_buffer (vm, from[3]);
+
+                vlib_prefetch_buffer_header (p2, STORE);
+                vlib_prefetch_buffer_header (p3, STORE);
+
+                CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
+                CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE);
+            }
+
+            from += 2;
+            to_next += 2;
+            n_left_from -= 2;
+            n_left_to_next -= 2;
+
+            b0 = vlib_get_buffer (vm, bi0);
+            b1 = vlib_get_buffer (vm, bi1);
+
+            /* dst lookup was done by ip4 lookup */
+            mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+            mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+            mld0 = mpls_label_dpo_get(mldi0);
+            mld1 = mpls_label_dpo_get(mldi1);
+
+            if (payload_is_ip4)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+                ip4_header_t * ip0 = vlib_buffer_get_current(b0);
+                ip4_header_t * ip1 = vlib_buffer_get_current(b1);
+                u32 checksum0;
+                u32 checksum1;
+
+                checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+                checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+
+                checksum0 += checksum0 >= 0xffff;
+                checksum1 += checksum1 >= 0xffff;
+
+                ip0->checksum = checksum0;
+                ip1->checksum = checksum1;
+
+                ip0->ttl -= 1;
+                ip1->ttl -= 1;
+
+                ttl1 = ip1->ttl;
+                ttl0 = ip0->ttl;
+            }
+            else if (payload_is_ip6)
+            {
+                /*
+                 * decrement the TTL on ingress to the LSP
+                 */
+                ip6_header_t * ip0 = vlib_buffer_get_current(b0);
+                ip6_header_t * ip1 = vlib_buffer_get_current(b1);
+
+
+                ip0->hop_limit -= 1;
+                ip1->hop_limit -= 1;
+
+                ttl0 = ip0->hop_limit;
+                ttl1 = ip1->hop_limit;
+            }
+            else
+            {
+                /*
+                 * else, the packet to be encapped is an MPLS packet
+                 */
+                if (PREDICT_TRUE(vnet_buffer(b0)->mpls.first))
+                {
+                    /*
+                     * The first label to be imposed on the packet. this is a label swap.
+                     * in which case we stashed the TTL and EXP bits in the
+                     * packet in the lookup node
+                     */
+                    ASSERT(0 != vnet_buffer (b0)->mpls.ttl);
+
+                    ttl0 = vnet_buffer(b0)->mpls.ttl - 1;
+                }
+                else
+                {
+                    /*
+                     * not the first label. implying we are recusring down a chain of
+                     * output labels.
+                     * Each layer is considered a new LSP - hence the TTL is reset.
+                     */
+                    ttl0 = 255;
+                }
+                if (PREDICT_TRUE(vnet_buffer(b1)->mpls.first))
+                {
+                    ASSERT(1 != vnet_buffer (b1)->mpls.ttl);
+                    ttl1 = vnet_buffer(b1)->mpls.ttl - 1;
+                }
+                else
+                {
+                    ttl1 = 255;
+                }
+            }
+            vnet_buffer(b0)->mpls.first = 0;
+            vnet_buffer(b1)->mpls.first = 0;
+
+            /* Paint the MPLS header */
+            vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
+            vlib_buffer_advance(b1, -(mld1->mld_n_hdr_bytes));
+
+            hdr0 = vlib_buffer_get_current(b0);
+            hdr1 = vlib_buffer_get_current(b1);
+
+            clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
+            clib_memcpy(hdr1, mld1->mld_hdr, mld1->mld_n_hdr_bytes);
+
+            /* fixup the TTL for the inner most label */
+            hdr0 = hdr0 + (mld0->mld_n_labels - 1);
+            hdr1 = hdr1 + (mld1->mld_n_labels - 1);
+            ((char*)hdr0)[3] = ttl0;
+            ((char*)hdr1)[3] = ttl1;
+
+            next0 = mld0->mld_dpo.dpoi_next_node;
+            next1 = mld1->mld_dpo.dpoi_next_node;
+            vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
+            vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index;
+
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_imposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b0, sizeof (*tr));
+                tr->hdr = *hdr0;
+            }
+            if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+            {
+                mpls_label_imposition_trace_t *tr =
+                    vlib_add_trace (vm, node, b1, sizeof (*tr));
+                tr->hdr = *hdr1;
+            }
+
+            vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+                                            n_left_to_next,
+                                            bi0, bi1, next0, next1);
+        }
+
         while (n_left_from > 0 && n_left_to_next > 0)
         {
             mpls_unicast_header_t *hdr0;
@@ -255,11 +413,9 @@ mpls_label_imposition_inline (vlib_main_t * vm,
             vnet_buffer(b0)->mpls.first = 0;
 
             /* Paint the MPLS header */
-            vlib_buffer_advance(b0, -(sizeof(*hdr0) * mld0->mld_n_labels));
+            vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
             hdr0 = vlib_buffer_get_current(b0);
-
-            clib_memcpy(hdr0, mld0->mld_hdr,
-                        sizeof(*hdr0) * mld0->mld_n_labels);
+            clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
 
             /* fixup the TTL for the inner most label */
             hdr0 = hdr0 + (mld0->mld_n_labels - 1);
@@ -268,7 +424,7 @@ mpls_label_imposition_inline (vlib_main_t * vm,
             next0 = mld0->mld_dpo.dpoi_next_node;
             vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
 
-            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
+            if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
             {
                 mpls_label_imposition_trace_t *tr =
                     vlib_add_trace (vm, node, b0, sizeof (*tr));
index 6580c47..89bcb09 100644 (file)
@@ -45,6 +45,11 @@ typedef struct mpls_label_dpo_t
      */
     u16 mld_n_labels;
 
+    /**
+     * Cached amount of header bytes to paint
+     */
+    u16 mld_n_hdr_bytes;
+
     /**
      * Number of locks/users of the label
      */
index 91514d6..8292a0c 100644 (file)
@@ -58,6 +58,7 @@ mpls_output_inline (vlib_main_t * vm,
 {
   u32 n_left_from, next_index, * from, * to_next, cpu_index;
   vlib_node_runtime_t * error_node;
+  u32 n_left_to_next;
 
   cpu_index = os_get_cpu_number();
   error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
@@ -67,11 +68,146 @@ mpls_output_inline (vlib_main_t * vm,
 
   while (n_left_from > 0)
     {
-      u32 n_left_to_next;
-
       vlib_get_next_frame (vm, node, next_index,
                            to_next, n_left_to_next);
 
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+        {
+          ip_adjacency_t * adj0;
+          mpls_unicast_header_t *hdr0;
+          vlib_buffer_t * p0;
+          u32 pi0, rw_len0, adj_index0, next0, error0;
+
+          ip_adjacency_t * adj1;
+          mpls_unicast_header_t *hdr1;
+          vlib_buffer_t * p1;
+          u32 pi1, rw_len1, adj_index1, next1, error1;
+
+          /* Prefetch next iteration. */
+          {
+            vlib_buffer_t * p2, * p3;
+
+            p2 = vlib_get_buffer (vm, from[2]);
+            p3 = vlib_get_buffer (vm, from[3]);
+
+            vlib_prefetch_buffer_header (p2, STORE);
+            vlib_prefetch_buffer_header (p3, STORE);
+
+            CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
+            CLIB_PREFETCH (p3->data, sizeof (hdr1[0]), STORE);
+          }
+
+          pi0 = to_next[0] = from[0];
+          pi1 = to_next[1] = from[1];
+
+          from += 2;
+          n_left_from -= 2;
+          to_next += 2;
+          n_left_to_next -= 2;
+
+          p0 = vlib_get_buffer (vm, pi0);
+          p1 = vlib_get_buffer (vm, pi1);
+
+          adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+          adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+          /* We should never rewrite a pkt using the MISS adjacency */
+          ASSERT(adj_index0);
+          ASSERT(adj_index1);
+
+          adj0 = adj_get(adj_index0);
+          adj1 = adj_get(adj_index1);
+          hdr0 = vlib_buffer_get_current (p0);
+          hdr1 = vlib_buffer_get_current (p1);
+
+          /* Guess we are only writing on simple Ethernet header. */
+          vnet_rewrite_two_headers (adj0[0], adj1[0], hdr0, hdr1,
+                                   sizeof (ethernet_header_t));
+
+          /* Update packet buffer attributes/set output interface. */
+          rw_len0 = adj0[0].rewrite_header.data_bytes;
+          rw_len1 = adj1[0].rewrite_header.data_bytes;
+
+          if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
+              vlib_increment_combined_counter
+                  (&adjacency_counters,
+                   cpu_index, adj_index0,
+                   /* packet increment */ 0,
+                   /* byte increment */ rw_len0-sizeof(ethernet_header_t));
+          if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
+              vlib_increment_combined_counter
+                  (&adjacency_counters,
+                   cpu_index, adj_index1,
+                   /* packet increment */ 0,
+                   /* byte increment */ rw_len1-sizeof(ethernet_header_t));
+
+          /* Check MTU of outgoing interface. */
+          if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
+                           adj0[0].rewrite_header.max_l3_packet_bytes))
+            {
+              p0->current_data -= rw_len0;
+              p0->current_length += rw_len0;
+
+              vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+                  adj0[0].rewrite_header.sw_if_index;
+              next0 = adj0[0].rewrite_header.next_index;
+              error0 = IP4_ERROR_NONE;
+
+              if (is_midchain)
+                {
+                  adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+                }
+            }
+          else
+            {
+              error0 = IP4_ERROR_MTU_EXCEEDED;
+              next0 = MPLS_OUTPUT_NEXT_DROP;
+            }
+          if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <=
+                           adj1[0].rewrite_header.max_l3_packet_bytes))
+            {
+              p1->current_data -= rw_len1;
+              p1->current_length += rw_len1;
+
+              vnet_buffer (p1)->sw_if_index[VLIB_TX] =
+                  adj1[0].rewrite_header.sw_if_index;
+              next1 = adj1[0].rewrite_header.next_index;
+              error1 = IP4_ERROR_NONE;
+
+              if (is_midchain)
+                {
+                  adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
+                }
+            }
+          else
+            {
+              error1 = IP4_ERROR_MTU_EXCEEDED;
+              next1 = MPLS_OUTPUT_NEXT_DROP;
+            }
+
+          p0->error = error_node->errors[error0];
+          p1->error = error_node->errors[error1];
+
+          if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+              mpls_output_trace_t *tr = vlib_add_trace (vm, node,
+                                                        p0, sizeof (*tr));
+              tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+              tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
+            }
+          if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED))
+            {
+              mpls_output_trace_t *tr = vlib_add_trace (vm, node,
+                                                        p1, sizeof (*tr));
+              tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX];
+              tr->flow_hash = vnet_buffer(p1)->ip.flow_hash;
+            }
+
+          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+                                           to_next, n_left_to_next,
+                                           pi0, pi1, next0, next1);
+        }
+
       while (n_left_from > 0 && n_left_to_next > 0)
         {
          ip_adjacency_t * adj0;
@@ -106,16 +242,8 @@ mpls_output_inline (vlib_main_t * vm,
                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
           
           /* Check MTU of outgoing interface. */
-          error0 = (vlib_buffer_length_in_chain (vm, p0) 
-                    > adj0[0].rewrite_header.max_l3_packet_bytes
-                    ? IP4_ERROR_MTU_EXCEEDED
-                    : IP4_ERROR_NONE);
-
-         p0->error = error_node->errors[error0];
-
-          /* Don't adjust the buffer for ttl issue; icmp-error node wants
-           * to see the IP headerr */
-          if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
+          if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
+                           adj0[0].rewrite_header.max_l3_packet_bytes))
             {
               p0->current_data -= rw_len0;
               p0->current_length += rw_len0;
@@ -123,6 +251,7 @@ mpls_output_inline (vlib_main_t * vm,
               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
                   adj0[0].rewrite_header.sw_if_index;
               next0 = adj0[0].rewrite_header.next_index;
+              error0 = IP4_ERROR_NONE;
 
              if (is_midchain)
                {
@@ -131,8 +260,10 @@ mpls_output_inline (vlib_main_t * vm,
             }
           else
             {
+              error0 = IP4_ERROR_MTU_EXCEEDED;
               next0 = MPLS_OUTPUT_NEXT_DROP;
             }
+         p0->error = error_node->errors[error0];
 
          from += 1;
          n_left_from -= 1;
index 2b0461f..1810091 100644 (file)
@@ -90,6 +90,93 @@ mpls_input_inline (vlib_main_t * vm,
       vlib_get_next_frame (vm, node, next_index,
                           to_next, n_left_to_next);
 
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+        {
+          u32 label0, bi0, next0, sw_if_index0;
+          u32 label1, bi1, next1, sw_if_index1;
+          mpls_unicast_header_t *h0, *h1;
+          vlib_buffer_t *b0, *b1;
+
+          /* Prefetch next iteration. */
+          {
+            vlib_buffer_t * p2, * p3;
+
+            p2 = vlib_get_buffer (vm, from[2]);
+            p3 = vlib_get_buffer (vm, from[3]);
+
+            vlib_prefetch_buffer_header (p2, STORE);
+            vlib_prefetch_buffer_header (p3, STORE);
+
+            CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
+            CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE);
+          }
+
+
+          bi0 = to_next[0] = from[0];
+          bi1 = to_next[1] = from[1];
+
+          from += 2;
+          to_next += 2;
+          n_left_from -= 2;
+          n_left_to_next -= 2;
+
+          b0 = vlib_get_buffer (vm, bi0);
+          b1 = vlib_get_buffer (vm, bi1);
+
+          h0 = vlib_buffer_get_current (b0);
+          h1 = vlib_buffer_get_current (b1);
+
+          sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+          sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+          label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl);
+          label1 = clib_net_to_host_u32 (h1->label_exp_s_ttl);
+
+          /* TTL expired? */
+          if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0))
+           {
+              next0 = MPLS_INPUT_NEXT_DROP;
+              b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+            }
+          else
+            {
+              next0 = MPLS_INPUT_NEXT_LOOKUP;
+              vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
+              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+            }
+
+          if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label1) == 0))
+           {
+              next1 = MPLS_INPUT_NEXT_DROP;
+              b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+            }
+          else
+            {
+              next1 = MPLS_INPUT_NEXT_LOOKUP;
+              vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index1, &next1, b1);
+              vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+            }
+
+          if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+            {
+              mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+                                                       b0, sizeof (*tr));
+              tr->next_index = next0;
+              tr->label_host_byte_order = label0;
+            }
+          if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+            {
+              mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+                                                       b1, sizeof (*tr));
+              tr->next_index = next1;
+              tr->label_host_byte_order = label1;
+            }
+
+          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+                                           to_next, n_left_to_next,
+                                           bi0, bi1, next0, next1);
+        }
+
       while (n_left_from > 0 && n_left_to_next > 0)
        {
          u32 bi0;