mpls: support fragmentation of mpls output packet 53/22553/2
authorRajesh Goel <rajegoel@cisco.com>
Sun, 6 Oct 2019 07:47:36 +0000 (13:17 +0530)
committerOle Trøan <otroan@employees.org>
Wed, 9 Oct 2019 09:20:51 +0000 (09:20 +0000)
Type: fix

Signed-off-by: Rajesh Goel <rajegoel@cisco.com>
Change-Id: Ie4372c5cf58ab215cdec5ce56f8a994daaba2844

src/vnet/buffer.h
src/vnet/dpo/mpls_label_dpo.c
src/vnet/ip/ip_frag.c
src/vnet/ip/ip_frag.h
src/vnet/mpls/error.def
src/vnet/mpls/mpls_output.c
test/test_mpls.py

index 708e399..d160ae8 100644 (file)
@@ -246,8 +246,12 @@ typedef struct
       u8 ttl;
       u8 exp;
       u8 first;
+      u8 pyld_proto:3;         /* dpo_proto_t */
+      u8 rsvd:5;
       /* Rewrite length */
       u32 save_rewrite_length;
+      /* Save the mpls header length including all label stack */
+      u8 mpls_hdr_length;
       /*
        * BIER - the number of bytes in the header.
        *  the len field in the header is not authoritative. It's the
index 1074a95..9d147f9 100644 (file)
@@ -484,6 +484,12 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                         exp2 = ip_dscp_to_mpls_exp(ip2->tos);
                         exp3 = ip_dscp_to_mpls_exp(ip3->tos);
                     }
+
+                    /* save the payload proto information in mpls opaque */
+                    vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP4;
+                    vnet_buffer(b1)->mpls.pyld_proto = DPO_PROTO_IP4;
+                    vnet_buffer(b2)->mpls.pyld_proto = DPO_PROTO_IP4;
+                    vnet_buffer(b3)->mpls.pyld_proto = DPO_PROTO_IP4;
                 }
                 else if (DPO_PROTO_IP6 == dproto)
                 {
@@ -518,6 +524,12 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                         exp3 = ip_dscp_to_mpls_exp(
                             ip6_traffic_class_network_order(ip3));
                     }
+
+                    /* save the payload proto information in mpls opaque */
+                    vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP6;
+                    vnet_buffer(b1)->mpls.pyld_proto = DPO_PROTO_IP6;
+                    vnet_buffer(b2)->mpls.pyld_proto = DPO_PROTO_IP6;
+                    vnet_buffer(b3)->mpls.pyld_proto = DPO_PROTO_IP6;
                 }
                 else
                 {
@@ -787,6 +799,9 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                         ttl0 = ip0->ttl;
                         exp0 = ip_dscp_to_mpls_exp(ip0->tos);
                     }
+
+                    /* save the payload proto information in mpls opaque */
+                    vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP4;
                 }
                 else if (DPO_PROTO_IP6 == dproto)
                 {
@@ -805,6 +820,9 @@ mpls_label_imposition_inline (vlib_main_t * vm,
                         exp0 = ip_dscp_to_mpls_exp(
                             ip6_traffic_class_network_order(ip0));
                     }
+
+                    /* save the payload proto information in mpls opaque */
+                    vnet_buffer(b0)->mpls.pyld_proto = DPO_PROTO_IP6;
                 }
                 else
                 {
index fd5bc6f..230722c 100644 (file)
 
 #include <vnet/ip/ip.h>
 
+/*
+ * Copy the mpls header if present.
+ * The current is pointing to the ip header.
+ * Adjust the buffer and point to the mpls headers on these fragments
+ * before sending the packet back to mpls-output node.
+ */
+static inline void
+copy_mpls_hdr (vlib_buffer_t * to_b, vlib_buffer_t * from_b)
+{
+  if ((vnet_buffer (from_b)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
+    {
+      u8 mpls_hdr_length = vnet_buffer (from_b)->mpls.mpls_hdr_length;
+      u8 *org_from_mpls_packet =
+       from_b->data + (from_b->current_data - mpls_hdr_length);
+      clib_memcpy_fast ((to_b->data - mpls_hdr_length), org_from_mpls_packet,
+                       mpls_hdr_length);
+      vlib_buffer_advance (to_b, -vnet_buffer (to_b)->mpls.mpls_hdr_length);
+    }
+}
 
 typedef struct
 {
@@ -38,8 +57,8 @@ format_ip_frag_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
-  s = format (s, "IPv%s mtu: %u fragments: %u",
-             t->ipv6 ? "6" : "4", t->mtu, t->n_fragments);
+  s = format (s, "IPv%s mtu: %u fragments: %u next: %d",
+             t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next);
   return s;
 }
 
@@ -68,6 +87,14 @@ frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from)
       vnet_buffer2 (to)->qos = vnet_buffer2 (from)->qos;
       to->flags |= VNET_BUFFER_F_QOS_DATA_VALID;
     }
+
+  /* Copy mpls opaque data */
+  if ((vnet_buffer (from)->ip_frag.flags) & IP_FRAG_FLAG_MPLS_HEADER)
+    {
+      vnet_buffer (to)->mpls.pyld_proto = vnet_buffer (from)->mpls.pyld_proto;
+      vnet_buffer (to)->mpls.mpls_hdr_length =
+       vnet_buffer (from)->mpls.mpls_hdr_length;
+    }
 }
 
 static vlib_buffer_t *
@@ -232,6 +259,10 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
            clib_host_to_net_u16 (to_b->current_length -
                                  sizeof (*encap_header6));
        }
+
+      /* Copy mpls header if present */
+      copy_mpls_hdr (to_b, org_from_b);
+
       rem -= len;
       fo += len;
     }
@@ -492,6 +523,9 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
       to_frag_hdr->next_hdr = ip6->protocol;
       to_frag_hdr->rsv = 0;
 
+      /* Copy mpls header if present */
+      copy_mpls_hdr (to_b, org_from_b);
+
       rem -= len;
       fo += len;
     }
@@ -519,6 +553,7 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
     [IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite",
     [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+    [IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
     [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [IP4_FRAG_NEXT_DROP] = "ip4-drop"
   },
@@ -541,6 +576,7 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
     [IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite",
     [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
     [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+    [IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
     [IP6_FRAG_NEXT_DROP] = "ip6-drop"
   },
 };
index 06eeee8..b66db41 100644 (file)
@@ -39,6 +39,7 @@
 
 #define IP_FRAG_FLAG_IP4_HEADER 0x01   //Encapsulating IPv4 header
 #define IP_FRAG_FLAG_IP6_HEADER 0x02   //Encapsulating IPv6 header
+#define IP_FRAG_FLAG_MPLS_HEADER 0x04  //Encapsulating MPLS header
 
 #define IP4_FRAG_NODE_NAME "ip4-frag"
 #define IP6_FRAG_NODE_NAME "ip6-frag"
@@ -51,6 +52,7 @@ typedef enum
   IP4_FRAG_NEXT_IP4_REWRITE,
   IP4_FRAG_NEXT_IP4_LOOKUP,
   IP4_FRAG_NEXT_IP6_LOOKUP,
+  IP4_FRAG_NEXT_MPLS_OUTPUT,
   IP4_FRAG_NEXT_ICMP_ERROR,
   IP4_FRAG_NEXT_DROP,
   IP4_FRAG_N_NEXT
@@ -61,6 +63,7 @@ typedef enum
   IP6_FRAG_NEXT_IP4_LOOKUP,
   IP6_FRAG_NEXT_IP6_LOOKUP,
   IP6_FRAG_NEXT_IP6_REWRITE,
+  IP6_FRAG_NEXT_MPLS_OUTPUT,
   IP6_FRAG_NEXT_DROP,
   IP6_FRAG_N_NEXT
 } ip6_frag_next_t;
index 34a4652..9941b18 100644 (file)
@@ -20,6 +20,7 @@ mpls_error (UNKNOWN_PROTOCOL, "unknown protocol")
 mpls_error (UNSUPPORTED_VERSION, "unsupported version")
 mpls_error (PKTS_DECAP, "MPLS input packets decapsulated")
 mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated")
+mpls_error (PKTS_NEED_FRAG, "MPLS output packets needs fragmentation")
 mpls_error (NO_LABEL, "MPLS no label for fib/dst")
 mpls_error (TTL_EXPIRED, "MPLS ttl expired")
 mpls_error (S_NOT_SET, "MPLS s-bit not set")
index 68577e7..5ede22a 100644 (file)
@@ -19,6 +19,7 @@
 #include <vnet/pg/pg.h>
 #include <vnet/ip/ip.h>
 #include <vnet/mpls/mpls.h>
+#include <vnet/ip/ip_frag.h>
 
 typedef struct {
   /* Adjacency taken. */
@@ -26,8 +27,15 @@ typedef struct {
   u32 flow_hash;
 } mpls_output_trace_t;
 
+typedef enum {
+  MPLS_OUTPUT_MODE,
+  MPLS_OUTPUT_MIDCHAIN_MODE
+}mpls_output_mode_t;
+
 #define foreach_mpls_output_next               \
-_(DROP, "error-drop")
+_(DROP, "error-drop")                           \
+_(IP4_FRAG, "ip4-frag")                         \
+_(IP6_FRAG, "ip6-frag")
 
 typedef enum {
 #define _(s,n) MPLS_OUTPUT_NEXT_##s,
@@ -50,11 +58,36 @@ format_mpls_output_trace (u8 * s, va_list * args)
   return s;
 }
 
+/*
+ * Save the mpls header length and adjust the current to ip header
+ */
+static inline u32
+set_mpls_fragmentation(vlib_buffer_t * p0, ip_adjacency_t * adj0)
+{
+  u32 next0;
+
+  /* advance size of (all) mpls header to ip header before fragmenting */
+  /* save the current pointing to first mpls header. */
+  vnet_buffer (p0)->mpls.mpls_hdr_length = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
+  vlib_buffer_advance (p0, vnet_buffer (p0)->mpls.mpls_hdr_length);
+
+  /* IP fragmentation */
+  ip_frag_set_vnet_buffer (p0, adj0[0].rewrite_header.max_l3_packet_bytes,
+                           IP4_FRAG_NEXT_MPLS_OUTPUT,
+                           ((vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4) ? IP_FRAG_FLAG_IP4_HEADER:IP_FRAG_FLAG_IP6_HEADER));
+
+  /* Tell ip_frag to retain certain mpls parameters after fragmentation of mpls packet */
+  vnet_buffer (p0)->ip_frag.flags = (vnet_buffer (p0)->ip_frag.flags | IP_FRAG_FLAG_MPLS_HEADER);
+  next0 = (vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4)? MPLS_OUTPUT_NEXT_IP4_FRAG:MPLS_OUTPUT_NEXT_IP6_FRAG;
+
+  return next0;
+}
+
 static inline uword
 mpls_output_inline (vlib_main_t * vm,
                     vlib_node_runtime_t * node,
                     vlib_frame_t * from_frame,
-                   int is_midchain)
+                    mpls_output_mode_t mode)
 {
   u32 n_left_from, next_index, * from, * to_next, thread_index;
   vlib_node_runtime_t * error_node;
@@ -162,8 +195,11 @@ mpls_output_inline (vlib_main_t * vm,
             }
           else
             {
-              error0 = IP4_ERROR_MTU_EXCEEDED;
-              next0 = MPLS_OUTPUT_NEXT_DROP;
+             error0 = IP4_ERROR_MTU_EXCEEDED;
+             next0 = set_mpls_fragmentation (p0, adj0);
+              vlib_node_increment_counter (vm, mpls_output_node.index,
+                                           MPLS_ERROR_PKTS_NEED_FRAG,
+                                           1);
             }
           if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <=
                            adj1[0].rewrite_header.max_l3_packet_bytes))
@@ -182,10 +218,13 @@ mpls_output_inline (vlib_main_t * vm,
             }
           else
             {
-              error1 = IP4_ERROR_MTU_EXCEEDED;
-              next1 = MPLS_OUTPUT_NEXT_DROP;
+             error1 = IP4_ERROR_MTU_EXCEEDED;
+             next1 = set_mpls_fragmentation (p1, adj1);
+              vlib_node_increment_counter (vm, mpls_output_node.index,
+                                           MPLS_ERROR_PKTS_NEED_FRAG,
+                                           1);
             }
-          if (is_midchain)
+          if (mode == MPLS_OUTPUT_MIDCHAIN_MODE)
           {
              adj0->sub_type.midchain.fixup_func
                 (vm, adj0, p0,
@@ -221,7 +260,7 @@ mpls_output_inline (vlib_main_t * vm,
       while (n_left_from > 0 && n_left_to_next > 0)
         {
          ip_adjacency_t * adj0;
-          mpls_unicast_header_t *hdr0;
+         mpls_unicast_header_t *hdr0;
          vlib_buffer_t * p0;
          u32 pi0, adj_index0, next0, error0;
           word rw_len0;
@@ -233,7 +272,7 @@ mpls_output_inline (vlib_main_t * vm,
          adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
 
          adj0 = adj_get(adj_index0);
-         hdr0 = vlib_buffer_get_current (p0);
+         hdr0 = vlib_buffer_get_current (p0);
 
          /* Guess we are only writing on simple Ethernet header. */
           vnet_rewrite_one_header (adj0[0], hdr0, 
@@ -268,10 +307,13 @@ mpls_output_inline (vlib_main_t * vm,
             }
           else
             {
-              error0 = IP4_ERROR_MTU_EXCEEDED;
-              next0 = MPLS_OUTPUT_NEXT_DROP;
+             error0 = IP4_ERROR_MTU_EXCEEDED;
+             next0 = set_mpls_fragmentation (p0, adj0);
+              vlib_node_increment_counter (vm, mpls_output_node.index,
+                                           MPLS_ERROR_PKTS_NEED_FRAG,
+                                           1);
             }
-          if (is_midchain)
+          if (mode == MPLS_OUTPUT_MIDCHAIN_MODE)
           {
              adj0->sub_type.midchain.fixup_func
                 (vm, adj0, p0,
@@ -317,7 +359,7 @@ VLIB_NODE_FN (mpls_output_node) (vlib_main_t * vm,
              vlib_node_runtime_t * node,
              vlib_frame_t * from_frame)
 {
-    return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 0));
+    return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MODE));
 }
 
 VLIB_REGISTER_NODE (mpls_output_node) = {
@@ -341,7 +383,7 @@ VLIB_NODE_FN (mpls_midchain_node) (vlib_main_t * vm,
                vlib_node_runtime_t * node,
                vlib_frame_t * from_frame)
 {
-    return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 1));
+    return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MIDCHAIN_MODE));
 }
 
 VLIB_REGISTER_NODE (mpls_midchain_node) = {
index 7388cf4..5b30548 100644 (file)
@@ -379,6 +379,30 @@ class TestMPLS(VppTestCase):
         except:
             raise
 
+    def verify_capture_fragmented_labelled_ip4(self, src_if, capture, sent,
+                                               mpls_labels, ip_ttl=None):
+        try:
+            capture = verify_filter(capture, sent)
+
+            for i in range(len(capture)):
+                tx = sent[0]
+                rx = capture[i]
+                tx_ip = tx[IP]
+                rx_ip = rx[IP]
+
+                verify_mpls_stack(self, rx, mpls_labels)
+
+                self.assertEqual(rx_ip.src, tx_ip.src)
+                self.assertEqual(rx_ip.dst, tx_ip.dst)
+                if not ip_ttl:
+                    # IP processing post pop has decremented the TTL
+                    self.assertEqual(rx_ip.ttl + 1, tx_ip.ttl)
+                else:
+                    self.assertEqual(rx_ip.ttl, ip_ttl)
+
+        except:
+            raise
+
     def test_swap(self):
         """ MPLS label swap tests """
 
@@ -851,6 +875,38 @@ class TestMPLS(VppTestCase):
         route_10_0_0_2.remove_vpp_config()
         route_10_0_0_1.remove_vpp_config()
 
+    def test_imposition_fragmentation(self):
+        """ MPLS label imposition fragmentation test """
+
+        #
+        # Add a ipv4 non-recursive route with a single out label
+        #
+        route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32,
+                                    [VppRoutePath(self.pg0.remote_ip4,
+                                                  self.pg0.sw_if_index,
+                                                  labels=[VppMplsLabel(32)])])
+        route_10_0_0_1.add_vpp_config()
+
+        #
+        # a stream that matches the route for 10.0.0.1
+        # PG0 is in the default table
+        #
+        tx = self.create_stream_ip4(self.pg0, "10.0.0.1")
+        for i in range(0, 257):
+            self.extend_packet(tx[i], 10000)
+
+        #
+        # 5 fragments per packet (257*5=1285)
+        #
+        rx = self.send_and_expect(self.pg0, tx, self.pg0, 1285)
+        self.verify_capture_fragmented_labelled_ip4(self.pg0, rx, tx,
+                                                    [VppMplsLabel(32)])
+
+        #
+        # cleanup
+        #
+        route_10_0_0_1.remove_vpp_config()
+
     def test_tunnel_pipe(self):
         """ MPLS Tunnel Tests - Pipe """