IPIP and IPv6 fragmentation 84/14984/6
authorOle Troan <ot@cisco.com>
Wed, 19 Sep 2018 10:38:51 +0000 (12:38 +0200)
committerNeale Ranns <nranns@cisco.com>
Thu, 27 Sep 2018 08:47:40 +0000 (08:47 +0000)
- Error where ICMPv6 error code doesn't reset VLIB_TX = -1
  Leading to crash for ICMP generated on tunnelled packets
- Missed setting VNET_BUFFER_F_LOCALLY_ORIGINATED, so
  IP in IPv6 packets never got fragmented.
- Add support for fragmentation of buffer chains.
- Remove support for inner fragmentation in frag code itself.

Change-Id: If9a97301b7e35ca97ffa5c0fada2b9e7e7dbfb27
Signed-off-by: Ole Troan <ot@cisco.com>
17 files changed:
src/plugins/map/ip4_map.c
src/plugins/map/ip4_map_t.c
src/plugins/map/ip6_map.c
src/plugins/map/ip6_map_t.c
src/vnet/buffer.h
src/vnet/ip/icmp6.c
src/vnet/ip/ip4_forward.c
src/vnet/ip/ip6_forward.c
src/vnet/ip/ip_frag.c
src/vnet/ip/ip_frag.h
src/vnet/ipip/ipip.c
src/vnet/ipip/node.c
src/vpp-api/python/vpp_papi/vpp_stats.py
test/patches/scapy-2.4/defragment.patch [new file with mode: 0644]
test/test_ipip.py
test/test_mpls.py
test/test_mtu.py

index 715cdef..44392e8 100644 (file)
@@ -167,7 +167,8 @@ ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
 
   if (mm->frag_inner)
     {
-      ip_frag_set_vnet_buffer (b, sizeof (ip6_header_t), mtu,
+      // TODO: Fix inner fragmentation after removed inner support from ip-frag.
+      ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu,
                               IP4_FRAG_NEXT_IP6_LOOKUP,
                               IP_FRAG_FLAG_IP6_HEADER);
       return (IP4_MAP_NEXT_IP4_FRAGMENT);
@@ -183,7 +184,7 @@ ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
          *error = MAP_ERROR_DF_SET;
          return (IP4_MAP_NEXT_ICMP_ERROR);
        }
-      ip_frag_set_vnet_buffer (b, 0, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
+      ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
                               IP_FRAG_FLAG_IP6_HEADER);
       return (IP4_MAP_NEXT_IP6_FRAGMENT);
     }
@@ -621,7 +622,7 @@ ip4_map_reass (vlib_main_t * vm,
               && (clib_net_to_host_u16 (ip60->payload_length) +
                   sizeof (*ip60) > d0->mtu)))
            {
-             vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60);
+             // TODO: vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60);
              vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
              vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
              vnet_buffer (p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
index 0a9903a..c6b0912 100644 (file)
@@ -237,7 +237,6 @@ ip4_map_t_icmp (vlib_main_t * vm,
 
          if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
            {
-             vnet_buffer (p0)->ip_frag.header_offset = 0;
              vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
              vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
              next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
@@ -322,7 +321,6 @@ ip4_map_t_fragmented (vlib_main_t * vm,
            {
              if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
                {
-                 vnet_buffer (p0)->ip_frag.header_offset = 0;
                  vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
                  vnet_buffer (p0)->ip_frag.next_index =
                    IP6_FRAG_NEXT_IP6_LOOKUP;
@@ -391,7 +389,6 @@ ip4_map_t_tcp_udp (vlib_main_t * vm,
              if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
                {
                  //Send to fragmentation node if necessary
-                 vnet_buffer (p0)->ip_frag.header_offset = 0;
                  vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
                  vnet_buffer (p0)->ip_frag.next_index =
                    IP6_FRAG_NEXT_IP6_LOOKUP;
@@ -409,7 +406,6 @@ ip4_map_t_tcp_udp (vlib_main_t * vm,
              if (vnet_buffer (p1)->map_t.mtu < p1->current_length)
                {
                  //Send to fragmentation node if necessary
-                 vnet_buffer (p1)->ip_frag.header_offset = 0;
                  vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
                  vnet_buffer (p1)->ip_frag.next_index =
                    IP6_FRAG_NEXT_IP6_LOOKUP;
@@ -453,7 +449,6 @@ ip4_map_t_tcp_udp (vlib_main_t * vm,
              if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
                {
                  //Send to fragmentation node if necessary
-                 vnet_buffer (p0)->ip_frag.header_offset = 0;
                  vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
                  vnet_buffer (p0)->ip_frag.next_index =
                    IP6_FRAG_NEXT_IP6_LOOKUP;
index 01b2d87..5b80209 100644 (file)
@@ -308,7 +308,6 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
                      (d0->mtu
                       && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
                    {
-                     vnet_buffer (p0)->ip_frag.header_offset = 0;
                      vnet_buffer (p0)->ip_frag.flags = 0;
                      vnet_buffer (p0)->ip_frag.next_index =
                        IP4_FRAG_NEXT_IP4_LOOKUP;
@@ -341,7 +340,6 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
                      (d1->mtu
                       && (clib_host_to_net_u16 (ip41->length) > d1->mtu)))
                    {
-                     vnet_buffer (p1)->ip_frag.header_offset = 0;
                      vnet_buffer (p1)->ip_frag.flags = 0;
                      vnet_buffer (p1)->ip_frag.next_index =
                        IP4_FRAG_NEXT_IP4_LOOKUP;
@@ -495,7 +493,6 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
                      (d0->mtu
                       && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
                    {
-                     vnet_buffer (p0)->ip_frag.header_offset = 0;
                      vnet_buffer (p0)->ip_frag.flags = 0;
                      vnet_buffer (p0)->ip_frag.next_index =
                        IP4_FRAG_NEXT_IP4_LOOKUP;
@@ -935,7 +932,6 @@ ip6_map_ip4_reass (vlib_main_t * vm,
              (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)
               && error0 == MAP_ERROR_NONE && !cached))
            {
-             vnet_buffer (p0)->ip_frag.header_offset = 0;
              vnet_buffer (p0)->ip_frag.flags = 0;
              vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
              vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
index 01ed810..aa4fa4d 100644 (file)
@@ -264,7 +264,6 @@ ip6_map_t_icmp (vlib_main_t * vm,
            {
              //Send to fragmentation node if necessary
              vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
-             vnet_buffer (p0)->ip_frag.header_offset = 0;
              vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
              next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG;
            }
@@ -348,7 +347,6 @@ ip6_map_t_fragmented (vlib_main_t * vm,
                {
                  //Send to fragmentation node if necessary
                  vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
-                 vnet_buffer (p0)->ip_frag.header_offset = 0;
                  vnet_buffer (p0)->ip_frag.next_index =
                    IP4_FRAG_NEXT_IP4_LOOKUP;
                  next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
@@ -366,7 +364,6 @@ ip6_map_t_fragmented (vlib_main_t * vm,
                {
                  //Send to fragmentation node if necessary
                  vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
-                 vnet_buffer (p1)->ip_frag.header_offset = 0;
                  vnet_buffer (p1)->ip_frag.next_index =
                    IP4_FRAG_NEXT_IP4_LOOKUP;
                  next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
@@ -405,7 +402,6 @@ ip6_map_t_fragmented (vlib_main_t * vm,
                {
                  //Send to fragmentation node if necessary
                  vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
-                 vnet_buffer (p0)->ip_frag.header_offset = 0;
                  vnet_buffer (p0)->ip_frag.next_index =
                    IP4_FRAG_NEXT_IP4_LOOKUP;
                  next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
@@ -466,7 +462,6 @@ ip6_map_t_tcp_udp (vlib_main_t * vm,
                {
                  //Send to fragmentation node if necessary
                  vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
-                 vnet_buffer (p0)->ip_frag.header_offset = 0;
                  vnet_buffer (p0)->ip_frag.next_index =
                    IP4_FRAG_NEXT_IP4_LOOKUP;
                  next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
@@ -484,7 +479,6 @@ ip6_map_t_tcp_udp (vlib_main_t * vm,
                {
                  //Send to fragmentation node if necessary
                  vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
-                 vnet_buffer (p1)->ip_frag.header_offset = 0;
                  vnet_buffer (p1)->ip_frag.next_index =
                    IP4_FRAG_NEXT_IP4_LOOKUP;
                  next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
@@ -523,7 +517,6 @@ ip6_map_t_tcp_udp (vlib_main_t * vm,
                {
                  //Send to fragmentation node if necessary
                  vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
-                 vnet_buffer (p0)->ip_frag.header_offset = 0;
                  vnet_buffer (p0)->ip_frag.next_index =
                    IP4_FRAG_NEXT_IP4_LOOKUP;
                  next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
index 2a6feb5..8071081 100644 (file)
@@ -277,7 +277,6 @@ typedef struct
     struct
     {
       u32 pad[2];              /* do not overlay w/ ip.adj_index[0,1] */
-      u16 header_offset;
       u16 mtu;
       u8 next_index;
       u8 flags;                        //See ip_frag.h
index 6ebdef4..4252ac7 100644 (file)
@@ -524,14 +524,24 @@ ip6_icmp_error (vlib_main_t * vm,
                {
                  b = vlib_get_buffer (vm, b->next_buffer);
                  b->current_length = 0;
+                 // XXX: Buffer leak???
                }
            }
 
          /* Add IP header and ICMPv6 header including a 4 byte data field */
-         vlib_buffer_advance (p0,
-                              -sizeof (ip6_header_t) -
-                              sizeof (icmp46_header_t) - 4);
+         int headroom = sizeof (ip6_header_t) + sizeof (icmp46_header_t) + 4;
 
+         /* Verify that we're not falling off the edge */
+         if (p0->current_data - headroom < -VLIB_BUFFER_PRE_DATA_SIZE)
+           {
+             next0 = IP6_ICMP_ERROR_NEXT_DROP;
+             error0 = ICMP6_ERROR_DROP;
+             goto error;
+           }
+
+         vlib_buffer_advance (p0, -headroom);
+         vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0;
+         p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
          p0->current_length =
            p0->current_length > 1280 ? 1280 : p0->current_length;
 
@@ -561,6 +571,7 @@ ip6_icmp_error (vlib_main_t * vm,
            {
              next0 = IP6_ICMP_ERROR_NEXT_DROP;
              error0 = ICMP6_ERROR_DROP;
+             goto error;
            }
 
          /* Fill icmp header fields */
@@ -573,11 +584,11 @@ ip6_icmp_error (vlib_main_t * vm,
            ip6_tcp_udp_icmp_compute_checksum (vm, p0, out_ip0,
                                               &bogus_length);
 
-
-
          /* Update error status */
          if (error0 == ICMP6_ERROR_NONE)
            error0 = icmp6_icmp_type_to_error (icmp0->type);
+
+       error:
          vlib_error_count (vm, node->node_index, error0, 1);
 
          /* Verify speculative enqueue, maybe switch current next frame */
@@ -602,7 +613,7 @@ VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
 
   .n_next_nodes = IP6_ICMP_ERROR_N_NEXT,
   .next_nodes = {
-    [IP6_ICMP_ERROR_NEXT_DROP] = "ip6-drop",
+    [IP6_ICMP_ERROR_NEXT_DROP] = "error-drop",
     [IP6_ICMP_ERROR_NEXT_LOOKUP] = "ip6-lookup",
   },
 
index d5cf011..d5e2cd5 100644 (file)
@@ -2097,7 +2097,7 @@ ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
       else
        {
          /* IP fragmentation */
-         ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
+         ip_frag_set_vnet_buffer (b, adj_packet_bytes,
                                   IP4_FRAG_NEXT_IP4_REWRITE, 0);
          *next = IP4_REWRITE_NEXT_FRAGMENT;
        }
index 5abbba5..e05792f 100644 (file)
@@ -1566,9 +1566,10 @@ ip6_mtu_check (vlib_buffer_t * b, u16 packet_bytes,
       if (is_locally_generated)
        {
          /* IP fragmentation */
-         ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
+         ip_frag_set_vnet_buffer (b, adj_packet_bytes,
                                   IP6_FRAG_NEXT_IP6_REWRITE, 0);
          *next = IP6_REWRITE_NEXT_FRAGMENT;
+         *error = IP6_ERROR_MTU_EXCEEDED;
        }
       else
        {
index eb9bb4a..628d9d6 100644 (file)
@@ -27,7 +27,6 @@
 typedef struct
 {
   u8 ipv6;
-  u16 header_offset;
   u16 mtu;
   u8 next;
   u16 n_fragments;
@@ -39,13 +38,48 @@ format_ip_frag_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
-  s = format (s, "IPv%s offset: %u mtu: %u fragments: %u",
-             t->ipv6 ? "6" : "4", t->header_offset, t->mtu, t->n_fragments);
+  s = format (s, "IPv%s mtu: %u fragments: %u",
+             t->ipv6 ? "6" : "4", t->mtu, t->n_fragments);
   return s;
 }
 
 static u32 running_fragment_id;
 
+static void
+frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from)
+{
+  vnet_buffer (to)->sw_if_index[VLIB_RX] =
+    vnet_buffer (from)->sw_if_index[VLIB_RX];
+  vnet_buffer (to)->sw_if_index[VLIB_TX] =
+    vnet_buffer (from)->sw_if_index[VLIB_TX];
+
+  /* Copy adj_index in case DPO based node is sending for the
+   * fragmentation, the packet would be sent back to the proper
+   * DPO next node and Index
+   */
+  vnet_buffer (to)->ip.adj_index[VLIB_RX] =
+    vnet_buffer (from)->ip.adj_index[VLIB_RX];
+  vnet_buffer (to)->ip.adj_index[VLIB_TX] =
+    vnet_buffer (from)->ip.adj_index[VLIB_TX];
+}
+
+static vlib_buffer_t *
+frag_buffer_alloc (vlib_buffer_t * org_b, u32 * bi)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  if (vlib_buffer_alloc (vm, bi, 1) != 1)
+    return 0;
+
+  vlib_buffer_t *b = vlib_get_buffer (vm, *bi);
+  vlib_buffer_free_list_t *fl =
+    vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+  vlib_buffer_init_for_free_list (b, fl);
+  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+  vlib_buffer_copy_trace_flag (vm, org_b, *bi);
+
+  return b;
+}
+
 /*
  * Limitation: Does follow buffer chains in the packet to fragment,
  * but does not generate buffer chains. I.e. a fragment is always
@@ -58,23 +92,19 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
 {
   vlib_buffer_t *from_b;
   ip4_header_t *ip4;
-  u16 mtu, len, max, rem, offset, ip_frag_id, ip_frag_offset;
+  u16 mtu, len, max, rem, ip_frag_id, ip_frag_offset;
   u8 *org_from_packet, more;
 
   from_b = vlib_get_buffer (vm, from_bi);
-  offset = vnet_buffer (from_b)->ip_frag.header_offset;
   mtu = vnet_buffer (from_b)->ip_frag.mtu;
   org_from_packet = vlib_buffer_get_current (from_b);
-  ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b) + offset;
+  ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b);
 
   rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
-  max =
-    (mtu - sizeof (ip4_header_t) -
-     vnet_buffer (from_b)->ip_frag.header_offset) & ~0x7;
+  max = (mtu - sizeof (ip4_header_t)) & ~0x7;
 
   if (rem >
-      (vlib_buffer_length_in_chain (vm, from_b) - offset -
-       sizeof (ip4_header_t)))
+      (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t)))
     {
       *error = IP_FRAG_ERROR_MALFORMED;
       return;
@@ -110,9 +140,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
 
   u8 *from_data = (void *) (ip4 + 1);
   vlib_buffer_t *org_from_b = from_b;
-  u16 ptr = 0, fo = 0;
-  u16 left_in_from_buffer =
-    from_b->current_length - offset - sizeof (ip4_header_t);
+  u16 fo = 0;
+  u16 left_in_from_buffer = from_b->current_length - sizeof (ip4_header_t);
+  u16 ptr = 0;
 
   /* Do the actual fragmentation */
   while (rem)
@@ -122,40 +152,28 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
       ip4_header_t *to_ip4;
       u8 *to_data;
 
-      len = (rem > (mtu - sizeof (ip4_header_t) - offset) ? max : rem);
+      len = (rem > (mtu - sizeof (ip4_header_t)) ? max : rem);
       if (len != rem)          /* Last fragment does not need to divisible by 8 */
        len &= ~0x7;
-      if (!vlib_buffer_alloc (vm, &to_bi, 1))
+      if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
        {
          *error = IP_FRAG_ERROR_MEMORY;
          return;
        }
       vec_add1 (*buffer, to_bi);
-      to_b = vlib_get_buffer (vm, to_bi);
-      vnet_buffer (to_b)->sw_if_index[VLIB_RX] =
-       vnet_buffer (org_from_b)->sw_if_index[VLIB_RX];
-      vnet_buffer (to_b)->sw_if_index[VLIB_TX] =
-       vnet_buffer (org_from_b)->sw_if_index[VLIB_TX];
-      /* Copy adj_index in case DPO based node is sending for the
-       * fragmentation, the packet would be sent back to the proper
-       * DPO next node and Index
-       */
-      vnet_buffer (to_b)->ip.adj_index[VLIB_RX] =
-       vnet_buffer (org_from_b)->ip.adj_index[VLIB_RX];
-      vnet_buffer (to_b)->ip.adj_index[VLIB_TX] =
-       vnet_buffer (org_from_b)->ip.adj_index[VLIB_TX];
-
-      /* Copy offset and ip4 header */
-      clib_memcpy (to_b->data, org_from_packet,
-                  offset + sizeof (ip4_header_t));
-      to_ip4 = vlib_buffer_get_current (to_b) + offset;
+      frag_set_sw_if_index (to_b, org_from_b);
+
+      /* Copy ip4 header */
+      clib_memcpy (to_b->data, org_from_packet, sizeof (ip4_header_t));
+      to_ip4 = vlib_buffer_get_current (to_b);
       to_data = (void *) (to_ip4 + 1);
 
       /* Spin through from buffers filling up the to buffer */
-      u16 to_ptr = 0;
-      u16 bytes_to_copy, left_in_to_buffer = len;
+      u16 left_in_to_buffer = len, to_ptr = 0;
       while (1)
        {
+         u16 bytes_to_copy;
+
          /* Figure out how many bytes we can safely copy */
          bytes_to_copy = left_in_to_buffer <= left_in_from_buffer ?
            left_in_to_buffer : left_in_from_buffer;
@@ -166,7 +184,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
          if (left_in_to_buffer == 0)
            break;
 
-         ASSERT (left_in_from_buffer == 0);
+         ASSERT (left_in_from_buffer <= 0);
          /* Move buffer */
          if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
            {
@@ -180,7 +198,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
          to_ptr += bytes_to_copy;
        }
 
-      to_b->current_length = offset + len + sizeof (ip4_header_t);
+      to_b->current_length = len + sizeof (ip4_header_t);
 
       to_ip4->fragment_id = ip_frag_id;
       to_ip4->flags_and_fragment_offset =
@@ -214,21 +232,20 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
 }
 
 void
-ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
-                        u8 next_index, u8 flags)
+ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu, u8 next_index, u8 flags)
 {
-  vnet_buffer (b)->ip_frag.header_offset = offset;
   vnet_buffer (b)->ip_frag.mtu = mtu;
   vnet_buffer (b)->ip_frag.next_index = next_index;
   vnet_buffer (b)->ip_frag.flags = flags;
 }
 
-static uword
-ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+
+static inline uword
+frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+                 vlib_frame_t * frame, u32 node_index, bool is_ip6)
 {
   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
-  vlib_node_runtime_t *error_node =
-    vlib_node_get_runtime (vm, ip4_frag_node.index);
+  vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, node_index);
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
@@ -244,58 +261,68 @@ ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          u32 pi0, *frag_from, frag_left;
          vlib_buffer_t *p0;
          ip_frag_error_t error0;
-         ip4_frag_next_t next0;
+         int next0;
 
-         //Note: The packet is not enqueued now.
-         //It is instead put in a vector where other fragments
-         //will be put as well.
+         /*
+          * Note: The packet is not enqueued now. It is instead put
+          * in a vector where other fragments will be put as well.
+          */
          pi0 = from[0];
          from += 1;
          n_left_from -= 1;
          error0 = IP_FRAG_ERROR_NONE;
 
          p0 = vlib_get_buffer (vm, pi0);
-         ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
+         if (is_ip6)
+           ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
+         else
+           ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
 
          if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
            {
              ip_frag_trace_t *tr =
                vlib_add_trace (vm, node, p0, sizeof (*tr));
-             tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
              tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
-             tr->ipv6 = 0;
+             tr->ipv6 = is_ip6 ? 1 : 0;
              tr->n_fragments = vec_len (buffer);
              tr->next = vnet_buffer (p0)->ip_frag.next_index;
            }
 
-         if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+         if (!is_ip6 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
            {
              icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
                                           ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
                                           vnet_buffer (p0)->ip_frag.mtu);
-             vlib_buffer_advance (p0,
-                                  vnet_buffer (p0)->ip_frag.header_offset);
              next0 = IP4_FRAG_NEXT_ICMP_ERROR;
            }
          else
            {
-              /* *INDENT-OFF* */
-              next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
-                ip_frag.next_index : IP4_FRAG_NEXT_DROP;
-              /* *INDENT-ON* */
+             if (is_ip6)
+               next0 =
+                 (error0 ==
+                  IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+                 ip_frag.next_index : IP6_FRAG_NEXT_DROP;
+             else
+               next0 =
+                 (error0 ==
+                  IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+                 ip_frag.next_index : IP4_FRAG_NEXT_DROP;
            }
 
          if (error0 == IP_FRAG_ERROR_NONE)
            {
              /* Free original buffer chain */
-             vlib_buffer_free_one (vm, pi0);
              frag_sent += vec_len (buffer);
              small_packets += (vec_len (buffer) == 1);
+             vlib_buffer_free_one (vm, pi0);   /* Free original packet */
            }
          else
-           vlib_error_count (vm, ip4_frag_node.index, error0, 1);
+           {
+             vlib_error_count (vm, node_index, error0, 1);
+             vec_add1 (buffer, pi0);   /* Get rid of the original buffer */
+           }
 
-         //Send fragments that were added in the frame
+         /* Send fragments that were added in the frame */
          frag_from = buffer;
          frag_left = vec_len (buffer);
 
@@ -325,262 +352,143 @@ ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
     }
   vec_free (buffer);
 
-  vlib_node_increment_counter (vm, ip4_frag_node.index,
+  vlib_node_increment_counter (vm, node_index,
                               IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
-  vlib_node_increment_counter (vm, ip4_frag_node.index,
+  vlib_node_increment_counter (vm, node_index,
                               IP_FRAG_ERROR_SMALL_PACKET, small_packets);
 
   return frame->n_vectors;
 }
 
 
+
+static uword
+ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  return frag_node_inline (vm, node, frame, ip4_frag_node.index,
+                          0 /* is_ip6 */ );
+}
+
+static uword
+ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  return frag_node_inline (vm, node, frame, ip6_frag_node.index,
+                          1 /* is_ip6 */ );
+}
+
+/*
+ * Fragments the packet given in from_bi. Fragments are returned in the buffer vector.
+ * Caller must ensure the original packet is freed.
+ */
 void
-ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
+ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
                      ip_frag_error_t * error)
 {
-  vlib_buffer_t *p;
-  ip6_header_t *ip6_hdr;
-  ip6_frag_hdr_t *frag_hdr;
-  u8 *payload, *next_header;
-
-  p = vlib_get_buffer (vm, pi);
-
-  //Parsing the IPv6 headers
-  ip6_hdr =
-    vlib_buffer_get_current (p) + vnet_buffer (p)->ip_frag.header_offset;
-  payload = (u8 *) (ip6_hdr + 1);
-  next_header = &ip6_hdr->protocol;
-  if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
-    {
-      next_header = payload;
-      payload += payload[1] * 8;
-    }
+  vlib_buffer_t *from_b;
+  ip6_header_t *ip6;
+  u16 mtu, len, max, rem, ip_frag_id;
 
-  if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS)
-    {
-      next_header = payload;
-      payload += payload[1] * 8;
-    }
+  from_b = vlib_get_buffer (vm, from_bi);
+  mtu = vnet_buffer (from_b)->ip_frag.mtu;
+  ip6 = (ip6_header_t *) vlib_buffer_get_current (from_b);
 
-  if (*next_header == IP_PROTOCOL_IPV6_ROUTE)
-    {
-      next_header = payload;
-      payload += payload[1] * 8;
-    }
+  rem = clib_net_to_host_u16 (ip6->payload_length);
+  max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7;        // TODO: Is max correct??
 
-  if (PREDICT_FALSE
-      (payload >= (u8 *) vlib_buffer_get_current (p) + p->current_length))
+  if (rem >
+      (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
     {
-      //A malicious packet could set an extension header with a too big size
-      //and make us modify another vlib_buffer
       *error = IP_FRAG_ERROR_MALFORMED;
       return;
     }
 
-  if (p->flags & VLIB_BUFFER_NEXT_PRESENT)
+  /* TODO: Look through header chain for fragmentation header */
+  if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
     {
       *error = IP_FRAG_ERROR_MALFORMED;
       return;
     }
 
-  u8 has_more;
-  u16 initial_offset;
-  if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION)
-    {
-      //The fragmentation header is already there
-      frag_hdr = (ip6_frag_hdr_t *) payload;
-      has_more = ip6_frag_hdr_more (frag_hdr);
-      initial_offset = ip6_frag_hdr_offset (frag_hdr);
-    }
-  else
-    {
-      //Insert a fragmentation header in the packet
-      u8 nh = *next_header;
-      *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION;
-      vlib_buffer_advance (p, -sizeof (*frag_hdr));
-      u8 *start = vlib_buffer_get_current (p);
-      memmove (start, start + sizeof (*frag_hdr),
-              payload - (start + sizeof (*frag_hdr)));
-      frag_hdr = (ip6_frag_hdr_t *) (payload - sizeof (*frag_hdr));
-      frag_hdr->identification = ++running_fragment_id;
-      frag_hdr->next_hdr = nh;
-      frag_hdr->rsv = 0;
-      has_more = 0;
-      initial_offset = 0;
-    }
-  payload = (u8 *) (frag_hdr + 1);
-
-  u16 headers_len = payload - (u8 *) vlib_buffer_get_current (p);
-  u16 max_payload = vnet_buffer (p)->ip_frag.mtu - headers_len;
-  u16 rem = p->current_length - headers_len;
+  u8 *from_data = (void *) (ip6 + 1);
+  vlib_buffer_t *org_from_b = from_b;
+  u16 fo = 0;
+  u16 left_in_from_buffer = from_b->current_length - sizeof (ip6_header_t);
   u16 ptr = 0;
 
-  if (max_payload < 8)
-    {
-      *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
-      return;
-    }
+  ip_frag_id = ++running_fragment_id;  // Fix
 
+  /* Do the actual fragmentation */
   while (rem)
     {
-      u32 bi;
-      vlib_buffer_t *b;
-      u16 len = (rem > max_payload) ? (max_payload & ~0x7) : rem;
-      rem -= len;
-
-      if (ptr != 0)
-       {
-         if (!vlib_buffer_alloc (vm, &bi, 1))
-           {
-             *error = IP_FRAG_ERROR_MEMORY;
-             return;
-           }
-         b = vlib_get_buffer (vm, bi);
-         vnet_buffer (b)->sw_if_index[VLIB_RX] =
-           vnet_buffer (p)->sw_if_index[VLIB_RX];
-         vnet_buffer (b)->sw_if_index[VLIB_TX] =
-           vnet_buffer (p)->sw_if_index[VLIB_TX];
-
-         /* Copy Adj_index in case DPO based node is sending for the fragmentation,
-            the packet would be sent back to the proper DPO next node and Index */
-         vnet_buffer (b)->ip.adj_index[VLIB_RX] =
-           vnet_buffer (p)->ip.adj_index[VLIB_RX];
-         vnet_buffer (b)->ip.adj_index[VLIB_TX] =
-           vnet_buffer (p)->ip.adj_index[VLIB_TX];
-
-         clib_memcpy (vlib_buffer_get_current (b),
-                      vlib_buffer_get_current (p), headers_len);
-         clib_memcpy (vlib_buffer_get_current (b) + headers_len,
-                      payload + ptr, len);
-         frag_hdr =
-           vlib_buffer_get_current (b) + headers_len - sizeof (*frag_hdr);
-       }
-      else
-       {
-         bi = pi;
-         b = vlib_get_buffer (vm, bi);
-         //frag_hdr already set here
-       }
+      u32 to_bi;
+      vlib_buffer_t *to_b;
+      ip6_header_t *to_ip6;
+      ip6_frag_hdr_t *to_frag_hdr;
+      u8 *to_data;
 
-      ip6_hdr =
-       vlib_buffer_get_current (b) + vnet_buffer (p)->ip_frag.header_offset;
-      frag_hdr->fragment_offset_and_more =
-       ip6_frag_hdr_offset_and_more (initial_offset + (ptr >> 3),
-                                     (rem || has_more));
-      b->current_length = headers_len + len;
-      ip6_hdr->payload_length =
-       clib_host_to_net_u16 (b->current_length -
-                             vnet_buffer (p)->ip_frag.header_offset -
-                             sizeof (*ip6_hdr));
-
-      if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
-       {
-         //Encapsulating ipv4 header
-         ip4_header_t *encap_header4 =
-           (ip4_header_t *) vlib_buffer_get_current (b);
-         encap_header4->length = clib_host_to_net_u16 (b->current_length);
-         encap_header4->checksum = ip4_header_checksum (encap_header4);
-       }
-      else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
+      len =
+       (rem >
+        (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) ? max : rem);
+      if (len != rem)          /* Last fragment does not need to divisible by 8 */
+       len &= ~0x7;
+      if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
        {
-         //Encapsulating ipv6 header
-         ip6_header_t *encap_header6 =
-           (ip6_header_t *) vlib_buffer_get_current (b);
-         encap_header6->payload_length =
-           clib_host_to_net_u16 (b->current_length -
-                                 sizeof (*encap_header6));
+         *error = IP_FRAG_ERROR_MEMORY;
+         return;
        }
+      vec_add1 (*buffer, to_bi);
+      frag_set_sw_if_index (to_b, org_from_b);
 
-      vec_add1 (*buffer, bi);
-
-      ptr += len;
-    }
-}
-
-static uword
-ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
-  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
-  vlib_node_runtime_t *error_node =
-    vlib_node_get_runtime (vm, ip6_frag_node.index);
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-  u32 frag_sent = 0, small_packets = 0;
-  u32 *buffer = 0;
-
-  while (n_left_from > 0)
-    {
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+      /* Copy ip6 header */
+      clib_memcpy (to_b->data, ip6, sizeof (ip6_header_t));
+      to_ip6 = vlib_buffer_get_current (to_b);
+      to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
+      to_data = (void *) (to_frag_hdr + 1);
 
-      while (n_left_from > 0 && n_left_to_next > 0)
+      /* Spin through from buffers filling up the to buffer */
+      u16 left_in_to_buffer = len, to_ptr = 0;
+      while (1)
        {
-         u32 pi0, *frag_from, frag_left;
-         vlib_buffer_t *p0;
-         ip_frag_error_t error0;
-         ip6_frag_next_t next0;
-
-         pi0 = from[0];
-         from += 1;
-         n_left_from -= 1;
-         error0 = IP_FRAG_ERROR_NONE;
+         u16 bytes_to_copy;
 
-         p0 = vlib_get_buffer (vm, pi0);
-         ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
+         /* Figure out how many bytes we can safely copy */
+         bytes_to_copy = left_in_to_buffer <= left_in_from_buffer ?
+           left_in_to_buffer : left_in_from_buffer;
+         clib_memcpy (to_data + to_ptr, from_data + ptr, bytes_to_copy);
+         left_in_to_buffer -= bytes_to_copy;
+         ptr += bytes_to_copy;
+         left_in_from_buffer -= bytes_to_copy;
+         if (left_in_to_buffer == 0)
+           break;
 
-         if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+         ASSERT (left_in_from_buffer <= 0);
+         /* Move buffer */
+         if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
            {
-             ip_frag_trace_t *tr =
-               vlib_add_trace (vm, node, p0, sizeof (*tr));
-             tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
-             tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
-             tr->ipv6 = 1;
-             tr->n_fragments = vec_len (buffer);
-             tr->next = vnet_buffer (p0)->ip_frag.next_index;
+             *error = IP_FRAG_ERROR_MALFORMED;
+             return;
            }
+         from_b = vlib_get_buffer (vm, from_b->next_buffer);
+         from_data = (u8 *) vlib_buffer_get_current (from_b);
+         ptr = 0;
+         left_in_from_buffer = from_b->current_length;
+         to_ptr += bytes_to_copy;
+       }
 
-          /* *INDENT-OFF* */
-         next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
-           ip_frag.next_index : IP6_FRAG_NEXT_DROP;
-          /* *INDENT-ON* */
-
-         frag_sent += vec_len (buffer);
-         small_packets += (vec_len (buffer) == 1);
-
-         //Send fragments that were added in the frame
-         frag_from = buffer;
-         frag_left = vec_len (buffer);
-         while (frag_left > 0)
-           {
-             while (frag_left > 0 && n_left_to_next > 0)
-               {
-                 u32 i;
-                 i = to_next[0] = frag_from[0];
-                 frag_from += 1;
-                 frag_left -= 1;
-                 to_next += 1;
-                 n_left_to_next -= 1;
+      to_b->current_length =
+       len + sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t);
+      to_ip6->payload_length =
+       clib_host_to_net_u16 (len + sizeof (ip6_frag_hdr_t));
+      to_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+      to_frag_hdr->fragment_offset_and_more =
+       ip6_frag_hdr_offset_and_more ((fo >> 3), len != rem);
+      to_frag_hdr->identification = ip_frag_id;
+      to_frag_hdr->next_hdr = ip6->protocol;
+      to_frag_hdr->rsv = 0;
 
-                 vlib_get_buffer (vm, i)->error = error_node->errors[error0];
-                 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                                  to_next, n_left_to_next, i,
-                                                  next0);
-               }
-             vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-             vlib_get_next_frame (vm, node, next_index, to_next,
-                                  n_left_to_next);
-           }
-         vec_reset_length (buffer);
-       }
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+      rem -= len;
+      fo += len;
     }
-  vec_free (buffer);
-  vlib_node_increment_counter (vm, ip6_frag_node.index,
-                              IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
-  vlib_node_increment_counter (vm, ip6_frag_node.index,
-                              IP_FRAG_ERROR_SMALL_PACKET, small_packets);
-
-  return frame->n_vectors;
 }
 
 static char *ip4_frag_error_strings[] = {
index ef5eb4c..06eeee8 100644 (file)
@@ -84,7 +84,7 @@ typedef enum
     IP_FRAG_N_ERROR,
 } ip_frag_error_t;
 
-void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
+void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu,
                              u8 next_index, u8 flags);
 void
 ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
index c49be09..718463d 100644 (file)
@@ -138,6 +138,10 @@ ipip6_fixup (vlib_main_t * vm, ip_adjacency_t * adj, vlib_buffer_t * b,
   ip6_header_t *ip6;
   const ipip_tunnel_t *t = data;
 
+  /* Must set locally originated otherwise we're not allowed to
+     fragment the packet later */
+  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
   ip6 = vlib_buffer_get_current (b);
   ip6->payload_length =
     clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
index 60d6223..6171d3e 100644 (file)
@@ -99,6 +99,14 @@ ipip_input (vlib_main_t * vm, vlib_node_runtime_t * node,
          if (is_ipv6)
            {
              ip60 = vlib_buffer_get_current (b0);
+             /* Check for outer fragmentation */
+             if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+               {
+                 next0 = IPIP_INPUT_NEXT_DROP;
+                 b0->error = node->errors[IPIP_ERROR_FRAGMENTED_PACKET];
+                 goto drop;
+               }
+
              vlib_buffer_advance (b0, sizeof (*ip60));
              ip_set (&src0, &ip60->src_address, false);
              ip_set (&dst0, &ip60->dst_address, false);
index 456312b..8c1aaf2 100644 (file)
@@ -134,7 +134,8 @@ class VPPStats:
         for i in range(rv_len):
             n = ffi.string(rv[i].name).decode()
             e = stat_entry_to_python(self.api, rv[i])
-            stats[n] = e
+            if e != None:
+                stats[n] = e
         return stats
 
     def get_counter(self, name):
diff --git a/test/patches/scapy-2.4/defragment.patch b/test/patches/scapy-2.4/defragment.patch
new file mode 100644 (file)
index 0000000..4cc3b5f
--- /dev/null
@@ -0,0 +1,12 @@
+diff --git a/scapy/layers/inet6.py b/scapy/layers/inet6.py
+index b58978f4..c4a3084d 100644
+--- a/scapy/layers/inet6.py
++++ b/scapy/layers/inet6.py
+@@ -1073,6 +1073,7 @@ def defragment6(packets):
+     q = res[0]
+     nh = q[IPv6ExtHdrFragment].nh
+     q[IPv6ExtHdrFragment].underlayer.nh = nh
++    q[IPv6ExtHdrFragment].underlayer.plen = len(fragmentable)
+     del q[IPv6ExtHdrFragment].underlayer.payload
+     q /= conf.raw_layer(load=fragmentable)
+
index efaaf52..b7a0179 100644 (file)
@@ -2,8 +2,8 @@
 """IP{4,6} over IP{v,6} tunnel functional tests"""
 
 import unittest
-from scapy.layers.inet6 import IPv6, Ether, IP, UDP
-from scapy.all import fragment, RandShort
+from scapy.layers.inet6 import IPv6, Ether, IP, UDP, IPv6ExtHdrFragment
+from scapy.all import fragment, fragment6, RandShort, defragment6
 from framework import VppTestCase, VppTestRunner
 from vpp_ip import DpoProto
 from vpp_ip_route import VppIpRoute, VppRoutePath, VppIpTable
@@ -17,6 +17,7 @@ IPIP tests.
 """
 
 
+# Replace by deframent from scapy.
 def reassemble(listoffragments):
     buffer = StringIO.StringIO()
     first = listoffragments[0]
@@ -61,7 +62,7 @@ class TestIPIP(VppTestCase):
     def validate(self, rx, expected):
         self.assertEqual(rx, expected.__class__(str(expected)))
 
-    def generate_frags(self, payload_length, fragment_size):
+    def generate_ip4_frags(self, payload_length, fragment_size):
         p_ether = Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac)
         p_payload = UDP(sport=1234, dport=1234) / self.payload(payload_length)
         p_ip4 = IP(src="1.2.3.4", dst=self.pg0.remote_ip4)
@@ -171,7 +172,7 @@ class TestIPIP(VppTestCase):
             enable_ip4=1)
 
         # Send lots of fragments, verify reassembled packet
-        frags, p4_reply = self.generate_frags(3131, 1400)
+        frags, p4_reply = self.generate_ip4_frags(3131, 1400)
         f = []
         for i in range(0, 1000):
             f.extend(frags)
@@ -190,7 +191,7 @@ class TestIPIP(VppTestCase):
         f = []
         r = []
         for i in range(1, 90):
-            frags, p4_reply = self.generate_frags(i * 100, 1000)
+            frags, p4_reply = self.generate_ip4_frags(i * 100, 1000)
             f.extend(frags)
             r.extend(p4_reply)
         self.pg_enable_capture()
@@ -208,7 +209,7 @@ class TestIPIP(VppTestCase):
         # to reassemble and then refragment
         #
         self.vapi.sw_interface_set_mtu(self.pg0.sw_if_index, [576, 0, 0, 0])
-        frags, p4_reply = self.generate_frags(3123, 1200)
+        frags, p4_reply = self.generate_ip4_frags(3123, 1200)
         self.pg_enable_capture()
         self.pg1.add_stream(frags)
         self.pg_start()
@@ -219,7 +220,7 @@ class TestIPIP(VppTestCase):
         self.validate(reass_pkt, p4_reply)
 
         self.vapi.sw_interface_set_mtu(self.pg0.sw_if_index, [1600, 0, 0, 0])
-        frags, p4_reply = self.generate_frags(3123, 1200)
+        frags, p4_reply = self.generate_ip4_frags(3123, 1200)
         self.pg_enable_capture()
         self.pg1.add_stream(frags)
         self.pg_start()
@@ -229,20 +230,66 @@ class TestIPIP(VppTestCase):
         p4_reply.id = 512
         self.validate(reass_pkt, p4_reply)
 
-    def test_ipip6(self):
-        """ ip{v4,v6} over ip6 test """
-        p_ether = Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac)
-        p_ip6 = IPv6(src="1::1", dst="DEAD::1", tc=42, nh='UDP')
-        p_ip4 = IP(src="1.2.3.4", dst="130.67.0.1", tos=42)
-        p_payload = UDP(sport=1234, dport=1234)
+    def test_ipip_create(self):
+        """ ipip create / delete interface test """
+        rv = self.vapi.ipip_add_tunnel(
+            src_address=inet_pton(AF_INET, '1.2.3.4'),
+            dst_address=inet_pton(AF_INET, '2.3.4.5'), is_ipv6=0)
+        sw_if_index = rv.sw_if_index
+        self.vapi.ipip_del_tunnel(sw_if_index)
+
+    def test_ipip_vrf_create(self):
+        """ ipip create / delete interface VRF test """
+
+        t = VppIpTable(self, 20)
+        t.add_vpp_config()
+        rv = self.vapi.ipip_add_tunnel(
+            src_address=inet_pton(AF_INET, '1.2.3.4'),
+            dst_address=inet_pton(AF_INET, '2.3.4.5'), is_ipv6=0,
+            table_id=20)
+        sw_if_index = rv.sw_if_index
+        self.vapi.ipip_del_tunnel(sw_if_index)
+
+    def payload(self, len):
+        return 'x' * len
 
+
+class TestIPIP6(VppTestCase):
+    """ IPIP6 Test Case """
+
+    @classmethod
+    def setUpClass(cls):
+        super(TestIPIP6, cls).setUpClass()
+        cls.create_pg_interfaces(range(2))
+        cls.interfaces = list(cls.pg_interfaces)
+
+    def setUp(self):
+        for i in self.interfaces:
+            i.admin_up()
+            i.config_ip4()
+            i.config_ip6()
+            i.disable_ipv6_ra()
+            i.resolve_arp()
+            i.resolve_ndp()
+        self.setup_tunnel()
+
+    def tearDown(self):
+        if not self.vpp_dead:
+            self.destroy_tunnel()
+            for i in self.pg_interfaces:
+                i.unconfig_ip4()
+                i.unconfig_ip6()
+                i.admin_down()
+            super(TestIPIP6, self).tearDown()
+
+    def setup_tunnel(self):
         # IPv6 transport
         rv = self.vapi.ipip_add_tunnel(
             src_address=self.pg0.local_ip6n,
             dst_address=self.pg1.remote_ip6n, tc_tos=255)
 
         sw_if_index = rv.sw_if_index
-
+        self.tunnel_if_index = sw_if_index
         self.vapi.sw_interface_set_flags(sw_if_index, 1)
         self.vapi.sw_interface_set_unnumbered(
             ip_sw_if_index=self.pg0.sw_if_index, sw_if_index=sw_if_index)
@@ -262,12 +309,57 @@ class TestIPIP(VppTestCase):
                           proto=DpoProto.DPO_PROTO_IP6)], is_ip6=1)
         ip6_via_tunnel.add_vpp_config()
 
-        # Encapsulation
+        self.tunnel_ip6_via_tunnel = ip6_via_tunnel
+        self.tunnel_ip4_via_tunnel = ip4_via_tunnel
+
+    def destroy_tunnel(self):
+        # IPv6 transport
+        self.tunnel_ip4_via_tunnel.remove_vpp_config()
+        self.tunnel_ip6_via_tunnel.remove_vpp_config()
+
+        rv = self.vapi.ipip_del_tunnel(sw_if_index=self.tunnel_if_index)
+
+    def validate(self, rx, expected):
+        self.assertEqual(rx, expected.__class__(str(expected)))
+
+    def generate_ip6_frags(self, payload_length, fragment_size):
+        p_ether = Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac)
+        p_payload = UDP(sport=1234, dport=1234) / self.payload(payload_length)
+        p_ip6 = IPv6(src="1::1", dst=self.pg0.remote_ip6)
+        outer_ip6 = (p_ether / IPv6(src=self.pg1.remote_ip6,
+                                    dst=self.pg0.local_ip6) /
+                     IPv6ExtHdrFragment() / p_ip6 / p_payload)
+        frags = fragment6(outer_ip6, fragment_size)
+        p6_reply = (p_ip6 / p_payload)
+        p6_reply.hlim -= 1
+        return frags, p6_reply
+
+    def generate_ip6_hairpin_frags(self, payload_length, fragment_size):
+        p_ether = Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac)
+        p_payload = UDP(sport=1234, dport=1234) / self.payload(payload_length)
+        p_ip6 = IPv6(src="1::1", dst="dead::1")
+        outer_ip6 = (p_ether / IPv6(src=self.pg1.remote_ip6,
+                                    dst=self.pg0.local_ip6) /
+                     IPv6ExtHdrFragment() / p_ip6 / p_payload)
+        frags = fragment6(outer_ip6, fragment_size)
+        p_ip6.hlim -= 1
+        p6_reply = (IPv6(src=self.pg0.local_ip6, dst=self.pg1.remote_ip6,
+                         hlim=63) / p_ip6 / p_payload)
+
+        return frags, p6_reply
+
+    def test_encap(self):
+        """ ip{v4,v6} over ip6 test encap """
+        p_ether = Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac)
+        p_ip6 = IPv6(src="1::1", dst="DEAD::1", tc=42, nh='UDP')
+        p_ip4 = IP(src="1.2.3.4", dst="130.67.0.1", tos=42)
+        p_payload = UDP(sport=1234, dport=1234)
 
+        # Encapsulation
         # IPv6 in to IPv6 tunnel
         p6 = (p_ether / p_ip6 / p_payload)
         p6_reply = (IPv6(src=self.pg0.local_ip6, dst=self.pg1.remote_ip6,
-                         hlim=63, tc=42) /
+                         hlim=64, tc=42) /
                     p_ip6 / p_payload)
         p6_reply[1].hlim -= 1
         rx = self.send_and_expect(self.pg0, p6*11, self.pg1)
@@ -277,19 +369,24 @@ class TestIPIP(VppTestCase):
         # IPv4 in to IPv6 tunnel
         p4 = (p_ether / p_ip4 / p_payload)
         p4_reply = (IPv6(src=self.pg0.local_ip6,
-                         dst=self.pg1.remote_ip6, hlim=63, tc=42) /
+                         dst=self.pg1.remote_ip6, hlim=64, tc=42) /
                     p_ip4 / p_payload)
         p4_reply[1].ttl -= 1
         rx = self.send_and_expect(self.pg0, p4*11, self.pg1)
         for p in rx:
             self.validate(p[1], p4_reply)
 
-        # Decapsulation
+    def test_decap(self):
+        """ ip{v4,v6} over ip6 test decap """
 
         p_ether = Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac)
+        p_ip6 = IPv6(src="1::1", dst="DEAD::1", tc=42, nh='UDP')
+        p_ip4 = IP(src="1.2.3.4", dst=self.pg0.remote_ip4)
+        p_payload = UDP(sport=1234, dport=1234)
 
+        # Decapsulation
         # IPv6 tunnel to IPv4
-        p_ip4 = IP(src="1.2.3.4", dst=self.pg0.remote_ip4)
+
         p4 = (p_ether / IPv6(src=self.pg1.remote_ip6,
                              dst=self.pg0.local_ip6) / p_ip4 / p_payload)
         p4_reply = (p_ip4 / p_payload)
@@ -308,6 +405,96 @@ class TestIPIP(VppTestCase):
         for p in rx:
             self.validate(p[1], p6_reply)
 
+    def test_frag(self):
+        """ ip{v4,v6} over ip6 test frag """
+
+        p_ether = Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac)
+        p_ip6 = IPv6(src="1::1", dst="DEAD::1", tc=42, nh='UDP')
+        p_ip4 = IP(src="1.2.3.4", dst=self.pg0.remote_ip4)
+        p_payload = UDP(sport=1234, dport=1234)
+
+        #
+        # Fragmentation / Reassembly and Re-fragmentation
+        #
+        rv = self.vapi.ip_reassembly_enable_disable(
+            sw_if_index=self.pg1.sw_if_index,
+            enable_ip6=1)
+
+        # Send lots of fragments, verify reassembled packet
+        before_cnt = self.statistics.get_counter(
+            '/err/ipip6-input/packets decapsulated')
+        frags, p6_reply = self.generate_ip6_frags(3131, 1400)
+        f = []
+        for i in range(0, 1000):
+            f.extend(frags)
+        self.pg1.add_stream(f)
+        self.pg_enable_capture()
+        self.pg_start()
+        rx = self.pg0.get_capture(1000)
+
+        for p in rx:
+            self.validate(p[1], p6_reply)
+
+        cnt = self.statistics.get_counter(
+            '/err/ipip6-input/packets decapsulated')
+        self.assertEqual(cnt, before_cnt + 1000)
+
+        f = []
+        r = []
+        # TODO: Check out why reassembly of atomic fragments don't work
+        for i in range(10, 90):
+            frags, p6_reply = self.generate_ip6_frags(i * 100, 1000)
+            f.extend(frags)
+            r.extend(p6_reply)
+        self.pg_enable_capture()
+        self.pg1.add_stream(f)
+        self.pg_start()
+        rx = self.pg0.get_capture(80)
+        i = 0
+        for p in rx:
+            self.validate(p[1], r[i])
+            i += 1
+
+        # Simple fragmentation
+        p_ether = Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac)
+        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, [1280, 0, 0, 0])
+
+        # IPv6 in to IPv6 tunnel
+        p_payload = UDP(sport=1234, dport=1234) / self.payload(1300)
+
+        p6 = (p_ether / p_ip6 / p_payload)
+        p6_reply = (IPv6(src=self.pg0.local_ip6, dst=self.pg1.remote_ip6,
+                         hlim=63, tc=42) /
+                    p_ip6 / p_payload)
+        p6_reply[1].hlim -= 1
+        self.pg_enable_capture()
+        self.pg0.add_stream(p6)
+        self.pg_start()
+        rx = self.pg1.get_capture(2)
+
+        # Scapy defragment doesn't deal well with multiple layers
+        # of samy type / Ethernet header first
+        f = [p[1] for p in rx]
+        reass_pkt = defragment6(f)
+        self.validate(reass_pkt, p6_reply)
+
+        # Now try with re-fragmentation
+        #
+        # Send large fragments to tunnel head-end, for the tunnel head end
+        # to reassemble and then refragment out the tunnel again.
+        # Hair-pinning
+        #
+        self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index, [1280, 0, 0, 0])
+        frags, p6_reply = self.generate_ip6_hairpin_frags(8000, 1200)
+        self.pg_enable_capture()
+        self.pg1.add_stream(frags)
+        self.pg_start()
+        rx = self.pg1.get_capture(7)
+        f = [p[1] for p in rx]
+        reass_pkt = defragment6(f)
+        p6_reply.id = 256
+        self.validate(reass_pkt, p6_reply)
+
     def test_ipip_create(self):
         """ ipip create / delete interface test """
         rv = self.vapi.ipip_add_tunnel(
index 2595b03..ad2620b 100644 (file)
@@ -360,7 +360,7 @@ class TestMPLS(VppTestCase):
                 # ICMP sourced from the interface's address
                 self.assertEqual(rx_ip.src, src_if.local_ip6)
                 # hop-limit reset to 255 for IMCP packet
-                self.assertEqual(rx_ip.hlim, 254)
+                self.assertEqual(rx_ip.hlim, 255)
 
                 icmp = rx[ICMPv6TimeExceeded]
 
index a70d513..e9b4ffe 100644 (file)
@@ -188,16 +188,16 @@ class TestMTU(VppTestCase):
         p_icmp6 = ICMPv6PacketTooBig(mtu=1280, cksum=0x4c7a)
         icmp6_reply = (IPv6(src=self.pg0.local_ip6,
                             dst=self.pg0.remote_ip6,
-                            hlim=254, plen=1240) /
+                            hlim=255, plen=1240) /
                        p_icmp6 / p_ip6 / p_payload)
         icmp6_reply[2].hlim -= 1
         n = icmp6_reply.__class__(str(icmp6_reply))
         s = str(icmp6_reply)
-        icmp6_reply = s[0:1280]
+        icmp6_reply_str = s[0:1280]
 
         rx = self.send_and_expect(self.pg0, p6*9, self.pg0)
         for p in rx:
-            self.validate_bytes(str(p[1]), icmp6_reply)
+            self.validate_bytes(str(p[1]), icmp6_reply_str)
 
         # Reset MTU
         self.vapi.sw_interface_set_mtu(self.pg1.sw_if_index,