ip: functional interface to ip fragmentation
[vpp.git] / src / plugins / map / ip4_map.c
index ad94907..f2a0090 100644 (file)
@@ -26,8 +26,6 @@ enum ip4_map_next_e
 #ifdef MAP_SKIP_IP6_LOOKUP
   IP4_MAP_NEXT_IP6_REWRITE,
 #endif
-  IP4_MAP_NEXT_IP4_FRAGMENT,
-  IP4_MAP_NEXT_IP6_FRAGMENT,
   IP4_MAP_NEXT_ICMP_ERROR,
   IP4_MAP_NEXT_DROP,
   IP4_MAP_N_NEXT,
@@ -117,17 +115,26 @@ ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
 }
 
 static u32
-ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
+ip4_map_fragment (vlib_main_t * vm, u32 bi, u16 mtu, bool df, u32 ** buffers,
+                 u8 * error)
 {
   map_main_t *mm = &map_main;
+  vlib_buffer_t *b = vlib_get_buffer (vm, bi);
 
   if (mm->frag_inner)
     {
-      // TODO: Fix inner fragmentation after removed inner support from ip-frag.
-      ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu,
-                              IP4_FRAG_NEXT_IP6_LOOKUP,
-                              IP_FRAG_FLAG_IP6_HEADER);
-      return (IP4_MAP_NEXT_IP4_FRAGMENT);
+      /* IPv4 fragmented packets inside of IPv6 */
+      ip4_frag_do_fragment (vm, bi, mtu, sizeof (ip6_header_t), buffers);
+
+      /* Fixup */
+      u32 *i;
+      vec_foreach (i, *buffers)
+      {
+       vlib_buffer_t *p = vlib_get_buffer (vm, *i);
+       ip6_header_t *ip6 = vlib_buffer_get_current (p);
+       ip6->payload_length =
+         clib_host_to_net_u16 (p->current_length - sizeof (ip6_header_t));
+      }
     }
   else
     {
@@ -140,10 +147,11 @@ ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
          *error = MAP_ERROR_DF_SET;
          return (IP4_MAP_NEXT_ICMP_ERROR);
        }
-      ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
-                              IP_FRAG_FLAG_IP6_HEADER);
-      return (IP4_MAP_NEXT_IP6_FRAGMENT);
+
+      /* Create IPv6 fragments here */
+      ip6_frag_do_fragment (vm, bi, mtu, 0, buffers);
     }
+  return (IP4_MAP_NEXT_IP6_LOOKUP);
 }
 
 /*
@@ -165,189 +173,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   while (n_left_from > 0)
     {
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      /* Dual loop */
-      while (n_left_from >= 4 && n_left_to_next >= 2)
-       {
-         u32 pi0, pi1;
-         vlib_buffer_t *p0, *p1;
-         map_domain_t *d0, *d1;
-         u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE;
-         ip4_header_t *ip40, *ip41;
-         u16 port0 = 0, port1 = 0;
-         ip6_header_t *ip6h0, *ip6h1;
-         u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
-         u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 =
-           IP4_MAP_NEXT_IP6_LOOKUP;
-
-         /* Prefetch next iteration. */
-         {
-           vlib_buffer_t *p2, *p3;
-
-           p2 = vlib_get_buffer (vm, from[2]);
-           p3 = vlib_get_buffer (vm, from[3]);
-
-           vlib_prefetch_buffer_header (p2, STORE);
-           vlib_prefetch_buffer_header (p3, STORE);
-           /* IPv4 + 8 = 28. possibly plus -40 */
-           CLIB_PREFETCH (p2->data - 40, 68, STORE);
-           CLIB_PREFETCH (p3->data - 40, 68, STORE);
-         }
-
-         pi0 = to_next[0] = from[0];
-         pi1 = to_next[1] = from[1];
-         from += 2;
-         n_left_from -= 2;
-         to_next += 2;
-         n_left_to_next -= 2;
-
-         p0 = vlib_get_buffer (vm, pi0);
-         p1 = vlib_get_buffer (vm, pi1);
-         ip40 = vlib_buffer_get_current (p0);
-         ip41 = vlib_buffer_get_current (p1);
-         d0 =
-           ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
-                               &error0);
-         d1 =
-           ip4_map_get_domain (&ip41->dst_address, &map_domain_index1,
-                               &error1);
-
-         /*
-          * Shared IPv4 address
-          */
-         port0 = ip4_map_port_and_security_check (d0, p0, &error0);
-         port1 = ip4_map_port_and_security_check (d1, p1, &error1);
-
-         /* Decrement IPv4 TTL */
-         ip4_map_decrement_ttl (ip40, &error0);
-         ip4_map_decrement_ttl (ip41, &error1);
-         bool df0 =
-           ip40->flags_and_fragment_offset &
-           clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
-         bool df1 =
-           ip41->flags_and_fragment_offset &
-           clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
-
-         /* MAP calc */
-         u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
-         u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32);
-         u16 dp40 = clib_net_to_host_u16 (port0);
-         u16 dp41 = clib_net_to_host_u16 (port1);
-         u64 dal60 = map_get_pfx (d0, da40, dp40);
-         u64 dal61 = map_get_pfx (d1, da41, dp41);
-         u64 dar60 = map_get_sfx (d0, da40, dp40);
-         u64 dar61 = map_get_sfx (d1, da41, dp41);
-         if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
-           error0 = MAP_ERROR_NO_BINDING;
-         if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE)
-           error1 = MAP_ERROR_NO_BINDING;
-
-         /* construct ipv6 header */
-         vlib_buffer_advance (p0, -sizeof (ip6_header_t));
-         vlib_buffer_advance (p1, -sizeof (ip6_header_t));
-         ip6h0 = vlib_buffer_get_current (p0);
-         ip6h1 = vlib_buffer_get_current (p1);
-         vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
-         vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
-
-         ip6h0->ip_version_traffic_class_and_flow_label =
-           ip4_map_vtcfl (ip40, p0);
-         ip6h1->ip_version_traffic_class_and_flow_label =
-           ip4_map_vtcfl (ip41, p1);
-         ip6h0->payload_length = ip40->length;
-         ip6h1->payload_length = ip41->length;
-         ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
-         ip6h1->protocol = IP_PROTOCOL_IP_IN_IP;
-         ip6h0->hop_limit = 0x40;
-         ip6h1->hop_limit = 0x40;
-         ip6h0->src_address = d0->ip6_src;
-         ip6h1->src_address = d1->ip6_src;
-         ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
-         ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
-         ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61);
-         ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61);
-
-         /*
-          * Determine next node. Can be one of:
-          * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
-          */
-         if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
-           {
-             if (PREDICT_FALSE
-                 (d0->mtu
-                  && (clib_net_to_host_u16 (ip6h0->payload_length) +
-                      sizeof (*ip6h0) > d0->mtu)))
-               {
-                 next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
-               }
-             else
-               {
-                 next0 =
-                   ip4_map_ip6_lookup_bypass (p0,
-                                              ip40) ?
-                   IP4_MAP_NEXT_IP6_REWRITE : next0;
-                 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-                                                  thread_index,
-                                                  map_domain_index0, 1,
-                                                  clib_net_to_host_u16
-                                                  (ip6h0->payload_length) +
-                                                  40);
-               }
-           }
-         else
-           {
-             next0 = IP4_MAP_NEXT_DROP;
-           }
-
-         /*
-          * Determine next node. Can be one of:
-          * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
-          */
-         if (PREDICT_TRUE (error1 == MAP_ERROR_NONE))
-           {
-             if (PREDICT_FALSE
-                 (d1->mtu
-                  && (clib_net_to_host_u16 (ip6h1->payload_length) +
-                      sizeof (*ip6h1) > d1->mtu)))
-               {
-                 next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1);
-               }
-             else
-               {
-                 next1 =
-                   ip4_map_ip6_lookup_bypass (p1,
-                                              ip41) ?
-                   IP4_MAP_NEXT_IP6_REWRITE : next1;
-                 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-                                                  thread_index,
-                                                  map_domain_index1, 1,
-                                                  clib_net_to_host_u16
-                                                  (ip6h1->payload_length) +
-                                                  40);
-               }
-           }
-         else
-           {
-             next1 = IP4_MAP_NEXT_DROP;
-           }
-
-         if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
-           {
-             map_add_trace (vm, node, p0, map_domain_index0, port0);
-           }
-         if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
-           {
-             map_add_trace (vm, node, p1, map_domain_index1, port0);
-           }
-
-         p0->error = error_node->errors[error0];
-         p1->error = error_node->errors[error1];
-
-         vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
-                                          n_left_to_next, pi0, pi1, next0,
-                                          next1);
-       }
-
       while (n_left_from > 0 && n_left_to_next > 0)
        {
          u32 pi0;
@@ -359,12 +184,13 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          ip6_header_t *ip6h0;
          u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
          u32 map_domain_index0 = ~0;
+         u32 *buffer0 = 0;
+         bool free_original_buffer0 = false;
+         u32 *frag_from0, frag_left0;
 
          pi0 = to_next[0] = from[0];
          from += 1;
          n_left_from -= 1;
-         to_next += 1;
-         n_left_to_next -= 1;
 
          p0 = vlib_get_buffer (vm, pi0);
          ip40 = vlib_buffer_get_current (p0);
@@ -413,7 +239,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
          /*
           * Determine next node. Can be one of:
-          * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
+          * ip6-lookup, ip6-rewrite, error-drop
           */
          if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
            {
@@ -422,7 +248,14 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
                   && (clib_net_to_host_u16 (ip6h0->payload_length) +
                       sizeof (*ip6h0) > d0->mtu)))
                {
-                 next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
+                 next0 =
+                   ip4_map_fragment (vm, pi0, d0->mtu, df0, &buffer0,
+                                     &error0);
+
+                 if (error0 == MAP_ERROR_NONE)
+                   {
+                     free_original_buffer0 = true;
+                   }
                }
              else
                {
@@ -450,8 +283,41 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
          p0->error = error_node->errors[error0];
        exit:
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-                                          n_left_to_next, pi0, next0);
+         /* Send fragments that were added in the frame */
+         if (free_original_buffer0)
+           {
+             vlib_buffer_free_one (vm, pi0);   /* Free original packet */
+           }
+         else
+           {
+             vec_add1 (buffer0, pi0);
+           }
+
+         frag_from0 = buffer0;
+         frag_left0 = vec_len (buffer0);
+
+         while (frag_left0 > 0)
+           {
+             while (frag_left0 > 0 && n_left_to_next > 0)
+               {
+                 u32 i0;
+                 i0 = to_next[0] = frag_from0[0];
+                 frag_from0 += 1;
+                 frag_left0 -= 1;
+                 to_next += 1;
+                 n_left_to_next -= 1;
+
+                 vlib_get_buffer (vm, i0)->error =
+                   error_node->errors[error0];
+                 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                                  to_next, n_left_to_next,
+                                                  i0, next0);
+               }
+             vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+             vlib_get_next_frame (vm, node, next_index, to_next,
+                                  n_left_to_next);
+           }
+         vec_reset_length (buffer0);
        }
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
@@ -491,8 +357,6 @@ VLIB_REGISTER_NODE(ip4_map_node) = {
 #ifdef MAP_SKIP_IP6_LOOKUP
     [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
 #endif
-    [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
-    [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
     [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [IP4_MAP_NEXT_DROP] = "error-drop",
   },