map: use SVR for MAP-E 75/22175/10
authorKlement Sekera <ksekera@cisco.com>
Wed, 21 Aug 2019 10:53:14 +0000 (10:53 +0000)
committerOle Trøan <otroan@employees.org>
Thu, 26 Sep 2019 16:34:02 +0000 (16:34 +0000)
This change is part of an effort to unify reassembly code. By removing
shallow virtual reassembly functionality in MAP and using the common
vnet provided shallow virtual reassembly, code size and complexity is
reduced.

Type: refactor
Change-Id: I431f47d4db97154fecaeaecd6719cfc3b83cfc4a
Signed-off-by: Klement Sekera <ksekera@cisco.com>
src/plugins/map/ip4_map.c
src/plugins/map/ip6_map.c
src/plugins/map/map.h
src/plugins/map/map_api.c
src/plugins/map/test/test_map.py

index 64da602..2466f53 100644 (file)
@@ -20,8 +20,6 @@
 #include <vnet/ip/ip_frag.h>
 #include <vnet/ip/ip4_to_ip6.h>
 
-vlib_node_registration_t ip4_map_reass_node;
-
 enum ip4_map_next_e
 {
   IP4_MAP_NEXT_IP6_LOOKUP,
@@ -30,79 +28,38 @@ enum ip4_map_next_e
 #endif
   IP4_MAP_NEXT_IP4_FRAGMENT,
   IP4_MAP_NEXT_IP6_FRAGMENT,
-  IP4_MAP_NEXT_REASS,
   IP4_MAP_NEXT_ICMP_ERROR,
   IP4_MAP_NEXT_DROP,
   IP4_MAP_N_NEXT,
 };
 
-enum ip4_map_reass_next_t
-{
-  IP4_MAP_REASS_NEXT_IP6_LOOKUP,
-  IP4_MAP_REASS_NEXT_IP4_FRAGMENT,
-  IP4_MAP_REASS_NEXT_DROP,
-  IP4_MAP_REASS_N_NEXT,
-};
-
-typedef struct
+static_always_inline u16
+ip4_map_port_and_security_check (map_domain_t * d, vlib_buffer_t * b0,
+                                u8 * error)
 {
-  u32 map_domain_index;
   u16 port;
-  u8 cached;
-} map_ip4_map_reass_trace_t;
+  if (d->psid_length > 0)
+    {
+      ip4_header_t *ip = vlib_buffer_get_current (b0);
 
-u8 *
-format_ip4_map_reass_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  map_ip4_map_reass_trace_t *t = va_arg (*args, map_ip4_map_reass_trace_t *);
-  return format (s, "MAP domain index: %d L4 port: %u Status: %s",
-                t->map_domain_index, t->port,
-                t->cached ? "cached" : "forwarded");
-}
+      if (PREDICT_FALSE
+         ((ip->ip_version_and_header_length != 0x45)
+          || clib_host_to_net_u16 (ip->length) < 28))
+       {
+         return 0;
+       }
 
-static_always_inline u16
-ip4_map_port_and_security_check (map_domain_t * d, ip4_header_t * ip,
-                                u32 * next, u8 * error)
-{
-  u16 port = 0;
+      port = vnet_buffer (b0)->ip.reass.l4_dst_port;
 
-  if (d->psid_length > 0)
-    {
-      if (ip4_get_fragment_offset (ip) == 0)
+      /* Verify that port is not among the well-known ports */
+      if ((d->psid_offset > 0)
+         && (clib_net_to_host_u16 (port) < (0x1 << (16 - d->psid_offset))))
        {
-         if (PREDICT_FALSE
-             ((ip->ip_version_and_header_length != 0x45)
-              || clib_host_to_net_u16 (ip->length) < 28))
-           {
-             return 0;
-           }
-         port = ip4_get_port (ip, 0);
-         if (port)
-           {
-             /* Verify that port is not among the well-known ports */
-             if ((d->psid_offset > 0)
-                 && (clib_net_to_host_u16 (port) <
-                     (0x1 << (16 - d->psid_offset))))
-               {
-                 *error = MAP_ERROR_ENCAP_SEC_CHECK;
-               }
-             else
-               {
-                 if (ip4_get_fragment_more (ip))
-                   *next = IP4_MAP_NEXT_REASS;
-                 return (port);
-               }
-           }
-         else
-           {
-             *error = MAP_ERROR_BAD_PROTOCOL;
-           }
+         *error = MAP_ERROR_ENCAP_SEC_CHECK;
        }
       else
        {
-         *next = IP4_MAP_NEXT_REASS;
+         return port;
        }
     }
   return (0);
@@ -258,8 +215,8 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          /*
           * Shared IPv4 address
           */
-         port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0);
-         port1 = ip4_map_port_and_security_check (d1, ip41, &next1, &error1);
+         port0 = ip4_map_port_and_security_check (d0, p0, &error0);
+         port1 = ip4_map_port_and_security_check (d1, p1, &error1);
 
          /* Decrement IPv4 TTL */
          ip4_map_decrement_ttl (ip40, &error0);
@@ -280,11 +237,9 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          u64 dal61 = map_get_pfx (d1, da41, dp41);
          u64 dar60 = map_get_sfx (d0, da40, dp40);
          u64 dar61 = map_get_sfx (d1, da41, dp41);
-         if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE
-             && next0 != IP4_MAP_NEXT_REASS)
+         if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
            error0 = MAP_ERROR_NO_BINDING;
-         if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE
-             && next1 != IP4_MAP_NEXT_REASS)
+         if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE)
            error1 = MAP_ERROR_NO_BINDING;
 
          /* construct ipv6 header */
@@ -314,7 +269,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
          /*
           * Determine next node. Can be one of:
-          * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+          * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
           */
          if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
            {
@@ -346,7 +301,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
          /*
           * Determine next node. Can be one of:
-          * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+          * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
           */
          if (PREDICT_TRUE (error1 == MAP_ERROR_NONE))
            {
@@ -430,7 +385,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          /*
           * Shared IPv4 address
           */
-         port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0);
+         port0 = ip4_map_port_and_security_check (d0, p0, &error0);
 
          /* Decrement IPv4 TTL */
          ip4_map_decrement_ttl (ip40, &error0);
@@ -443,8 +398,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          u16 dp40 = clib_net_to_host_u16 (port0);
          u64 dal60 = map_get_pfx (d0, da40, dp40);
          u64 dar60 = map_get_sfx (d0, da40, dp40);
-         if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE
-             && next0 != IP4_MAP_NEXT_REASS)
+         if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
            error0 = MAP_ERROR_NO_BINDING;
 
          /* construct ipv6 header */
@@ -463,7 +417,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
          /*
           * Determine next node. Can be one of:
-          * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+          * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
           */
          if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
            {
@@ -511,195 +465,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   return frame->n_vectors;
 }
 
-/*
- * ip4_map_reass
- */
-static uword
-ip4_map_reass (vlib_main_t * vm,
-              vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
-  u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
-  vlib_node_runtime_t *error_node =
-    vlib_node_get_runtime (vm, ip4_map_reass_node.index);
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-  map_main_t *mm = &map_main;
-  vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 thread_index = vm->thread_index;
-  u32 *fragments_to_drop = NULL;
-  u32 *fragments_to_loopback = NULL;
-
-  while (n_left_from > 0)
-    {
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 pi0;
-         vlib_buffer_t *p0;
-         map_domain_t *d0;
-         u8 error0 = MAP_ERROR_NONE;
-         ip4_header_t *ip40;
-         i32 port0 = 0;
-         ip6_header_t *ip60;
-         u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP;
-         u32 map_domain_index0 = ~0;
-         u8 cached = 0;
-
-         pi0 = to_next[0] = from[0];
-         from += 1;
-         n_left_from -= 1;
-         to_next += 1;
-         n_left_to_next -= 1;
-
-         p0 = vlib_get_buffer (vm, pi0);
-         ip60 = vlib_buffer_get_current (p0);
-         ip40 = (ip4_header_t *) (ip60 + 1);
-         d0 =
-           ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
-                               &error0);
-
-         map_ip4_reass_lock ();
-         map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32,
-                                                 ip40->dst_address.as_u32,
-                                                 ip40->fragment_id,
-                                                 ip40->protocol,
-                                                 &fragments_to_drop);
-         if (PREDICT_FALSE (!r))
-           {
-             // Could not create a caching entry
-             error0 = MAP_ERROR_FRAGMENT_MEMORY;
-           }
-         else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40)))
-           {
-             if (r->port >= 0)
-               {
-                 // We know the port already
-                 port0 = r->port;
-               }
-             else if (map_ip4_reass_add_fragment (r, pi0))
-               {
-                 // Not enough space for caching
-                 error0 = MAP_ERROR_FRAGMENT_MEMORY;
-                 map_ip4_reass_free (r, &fragments_to_drop);
-               }
-             else
-               {
-                 cached = 1;
-               }
-           }
-         else if ((port0 = ip4_get_port (ip40, 0)) == 0)
-           {
-             // Could not find port. We'll free the reassembly.
-             error0 = MAP_ERROR_BAD_PROTOCOL;
-             port0 = 0;
-             map_ip4_reass_free (r, &fragments_to_drop);
-           }
-         else
-           {
-             r->port = port0;
-             map_ip4_reass_get_fragments (r, &fragments_to_loopback);
-           }
-
-#ifdef MAP_IP4_REASS_COUNT_BYTES
-         if (!cached && r)
-           {
-             r->forwarded += clib_host_to_net_u16 (ip40->length) - 20;
-             if (!ip4_get_fragment_more (ip40))
-               r->expected_total =
-                 ip4_get_fragment_offset (ip40) * 8 +
-                 clib_host_to_net_u16 (ip40->length) - 20;
-             if (r->forwarded >= r->expected_total)
-               map_ip4_reass_free (r, &fragments_to_drop);
-           }
-#endif
-
-         map_ip4_reass_unlock ();
-
-         // NOTE: Most operations have already been performed by ip4_map
-         // All we need is the right destination address
-         ip60->dst_address.as_u64[0] =
-           map_get_pfx_net (d0, ip40->dst_address.as_u32, port0);
-         ip60->dst_address.as_u64[1] =
-           map_get_sfx_net (d0, ip40->dst_address.as_u32, port0);
-
-         if (PREDICT_FALSE
-             (d0->mtu
-              && (clib_net_to_host_u16 (ip60->payload_length) +
-                  sizeof (*ip60) > d0->mtu)))
-           {
-             // TODO: vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60);
-             vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
-             vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
-             vnet_buffer (p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
-             next0 = IP4_MAP_REASS_NEXT_IP4_FRAGMENT;
-           }
-
-         if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
-           {
-             map_ip4_map_reass_trace_t *tr =
-               vlib_add_trace (vm, node, p0, sizeof (*tr));
-             tr->map_domain_index = map_domain_index0;
-             tr->port = port0;
-             tr->cached = cached;
-           }
-
-         if (cached)
-           {
-             //Dequeue the packet
-             n_left_to_next++;
-             to_next--;
-           }
-         else
-           {
-             if (error0 == MAP_ERROR_NONE)
-               vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-                                                thread_index,
-                                                map_domain_index0, 1,
-                                                clib_net_to_host_u16
-                                                (ip60->payload_length) + 40);
-             next0 =
-               (error0 == MAP_ERROR_NONE) ? next0 : IP4_MAP_REASS_NEXT_DROP;
-             p0->error = error_node->errors[error0];
-             vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-                                              n_left_to_next, pi0, next0);
-           }
-
-         //Loopback when we reach the end of the inpu vector
-         if (n_left_from == 0 && vec_len (fragments_to_loopback))
-           {
-             from = vlib_frame_vector_args (frame);
-             u32 len = vec_len (fragments_to_loopback);
-             if (len <= VLIB_FRAME_SIZE)
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback,
-                                   sizeof (u32) * len);
-                 n_left_from = len;
-                 vec_reset_length (fragments_to_loopback);
-               }
-             else
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback +
-                                   (len - VLIB_FRAME_SIZE),
-                                   sizeof (u32) * VLIB_FRAME_SIZE);
-                 n_left_from = VLIB_FRAME_SIZE;
-                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
-               }
-           }
-       }
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-
-  map_send_all_to_node (vm, fragments_to_drop, node,
-                       &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
-                       IP4_MAP_REASS_NEXT_DROP);
-
-  vec_free (fragments_to_drop);
-  vec_free (fragments_to_loopback);
-  return frame->n_vectors;
-}
-
 static char *map_error_strings[] = {
 #define _(sym,string) string,
   foreach_map_error
@@ -712,8 +477,8 @@ VNET_FEATURE_INIT (ip4_map_feature, static) =
 {
   .arc_name = "ip4-unicast",
   .node_name = "ip4-map",
-  .runs_before =
-  VNET_FEATURES ("ip4-flow-classify"),
+  .runs_before = VNET_FEATURES ("ip4-flow-classify"),
+  .runs_after = VNET_FEATURES("ip4-sv-reassembly-feature"),
 };
 
 VLIB_REGISTER_NODE(ip4_map_node) = {
@@ -734,33 +499,12 @@ VLIB_REGISTER_NODE(ip4_map_node) = {
 #endif
     [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
     [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
-    [IP4_MAP_NEXT_REASS] = "ip4-map-reass",
     [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
     [IP4_MAP_NEXT_DROP] = "error-drop",
   },
 };
 /* *INDENT-ON* */
 
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE(ip4_map_reass_node) = {
-  .function = ip4_map_reass,
-  .name = "ip4-map-reass",
-  .vector_size = sizeof(u32),
-  .format_trace = format_ip4_map_reass_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-
-  .n_errors = MAP_N_ERROR,
-  .error_strings = map_error_strings,
-
-  .n_next_nodes = IP4_MAP_REASS_N_NEXT,
-  .next_nodes = {
-    [IP4_MAP_REASS_NEXT_IP6_LOOKUP] = "ip6-lookup",
-    [IP4_MAP_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
-    [IP4_MAP_REASS_NEXT_DROP] = "error-drop",
-  },
-};
-/* *INDENT-ON* */
-
 /*
  * fd.io coding-style-patch-verification: ON
  *
index d9db602..492d5f8 100644 (file)
@@ -17,6 +17,7 @@
 #include <vnet/ip/ip_frag.h>
 #include <vnet/ip/ip4_to_ip6.h>
 #include <vnet/ip/ip6_to_ip4.h>
+#include <vnet/ip/reass/ip4_sv_reass.h>
 
 enum ip6_map_next_e
 {
@@ -41,12 +42,12 @@ enum ip6_map_ip6_reass_next_e
   IP6_MAP_IP6_REASS_N_NEXT,
 };
 
-enum ip6_map_ip4_reass_next_e
+enum ip6_map_post_ip4_reass_next_e
 {
-  IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP,
-  IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT,
-  IP6_MAP_IP4_REASS_NEXT_DROP,
-  IP6_MAP_IP4_REASS_N_NEXT,
+  IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP,
+  IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT,
+  IP6_MAP_POST_IP4_REASS_NEXT_DROP,
+  IP6_MAP_POST_IP4_REASS_N_NEXT,
 };
 
 enum ip6_icmp_relay_next_e
@@ -56,7 +57,7 @@ enum ip6_icmp_relay_next_e
   IP6_ICMP_RELAY_N_NEXT,
 };
 
-vlib_node_registration_t ip6_map_ip4_reass_node;
+vlib_node_registration_t ip6_map_post_ip4_reass_node;
 vlib_node_registration_t ip6_map_ip6_reass_node;
 static vlib_node_registration_t ip6_map_icmp_relay_node;
 
@@ -68,14 +69,14 @@ typedef struct
 } map_ip6_map_ip4_reass_trace_t;
 
 u8 *
-format_ip6_map_ip4_reass_trace (u8 * s, va_list * args)
+format_ip6_map_post_ip4_reass_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   map_ip6_map_ip4_reass_trace_t *t =
     va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
   return format (s, "MAP domain index: %d L4 port: %u Status: %s",
-                t->map_domain_index, t->port,
+                t->map_domain_index, clib_net_to_host_u16 (t->port),
                 t->cached ? "cached" : "forwarded");
 }
 
@@ -117,8 +118,9 @@ ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4,
 }
 
 static_always_inline void
-ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4,
-                       ip6_header_t * ip6, u32 * next, u8 * error)
+ip6_map_security_check (map_domain_t * d, vlib_buffer_t * b0,
+                       ip4_header_t * ip4, ip6_header_t * ip6, u32 * next,
+                       u8 * error)
 {
   map_main_t *mm = &map_main;
   if (d->ea_bits_len || d->rules)
@@ -143,7 +145,12 @@ ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4,
            }
          else
            {
-             *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next;
+             if (mm->sec_check_frag)
+               {
+                 vnet_buffer (b0)->ip.reass.next_index =
+                   map_main.ip4_sv_reass_custom_next_index;
+                 *next = IP6_MAP_NEXT_IP4_REASS;
+               }
            }
        }
     }
@@ -297,7 +304,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          if (d0)
            {
              /* MAP inbound security check */
-             ip6_map_security_check (d0, ip40, ip60, &next0, &error0);
+             ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
 
              if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
                                next0 == IP6_MAP_NEXT_IP4_LOOKUP))
@@ -329,7 +336,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          if (d1)
            {
              /* MAP inbound security check */
-             ip6_map_security_check (d1, ip41, ip61, &next1, &error1);
+             ip6_map_security_check (d1, p1, ip41, ip61, &next1, &error1);
 
              if (PREDICT_TRUE (error1 == MAP_ERROR_NONE &&
                                next1 == IP6_MAP_NEXT_IP4_LOOKUP))
@@ -484,7 +491,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          if (d0)
            {
              /* MAP inbound security check */
-             ip6_map_security_check (d0, ip40, ip60, &next0, &error0);
+             ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
 
              if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
                                next0 == IP6_MAP_NEXT_IP4_LOOKUP))
@@ -657,15 +664,6 @@ map_ip6_drop_pi (u32 pi)
   vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
 }
 
-void
-map_ip4_drop_pi (u32 pi)
-{
-  vlib_main_t *vm = vlib_get_main ();
-  vlib_node_runtime_t *n =
-    vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
-  vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi);
-}
-
 /*
  * ip6_reass
  * TODO: We should count the number of successfully
@@ -811,20 +809,18 @@ ip6_map_ip6_reass (vlib_main_t * vm,
 }
 
 /*
- * ip6_map_ip4_reass
+ * ip6_map_post_ip4_reass
  */
 static uword
-ip6_map_ip4_reass (vlib_main_t * vm,
-                  vlib_node_runtime_t * node, vlib_frame_t * frame)
+ip6_map_post_ip4_reass (vlib_main_t * vm,
+                       vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
   vlib_node_runtime_t *error_node =
-    vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
+    vlib_node_get_runtime (vm, ip6_map_post_ip4_reass_node.index);
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
   u32 thread_index = vm->thread_index;
-  u32 *fragments_to_drop = NULL;
-  u32 *fragments_to_loopback = NULL;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -844,8 +840,7 @@ ip6_map_ip4_reass (vlib_main_t * vm,
          ip6_header_t *ip60;
          i32 port0 = 0;
          u32 map_domain_index0 = ~0;
-         u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP;
-         u8 cached = 0;
+         u32 next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP;
 
          pi0 = to_next[0] = from[0];
          from += 1;
@@ -861,65 +856,7 @@ ip6_map_ip4_reass (vlib_main_t * vm,
            ip4_map_get_domain ((ip4_address_t *) & ip40->src_address.as_u32,
                                &map_domain_index0, &error0);
 
-         map_ip4_reass_lock ();
-         //This node only deals with fragmented ip4
-         map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32,
-                                                 ip40->dst_address.as_u32,
-                                                 ip40->fragment_id,
-                                                 ip40->protocol,
-                                                 &fragments_to_drop);
-         if (PREDICT_FALSE (!r))
-           {
-             // Could not create a caching entry
-             error0 = MAP_ERROR_FRAGMENT_MEMORY;
-           }
-         else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40)))
-           {
-             // This is a fragment
-             if (r->port >= 0)
-               {
-                 // We know the port already
-                 port0 = r->port;
-               }
-             else if (map_ip4_reass_add_fragment (r, pi0))
-               {
-                 // Not enough space for caching
-                 error0 = MAP_ERROR_FRAGMENT_MEMORY;
-                 map_ip4_reass_free (r, &fragments_to_drop);
-               }
-             else
-               {
-                 cached = 1;
-               }
-           }
-         else if ((port0 = ip4_get_port (ip40, 1)) == 0)
-           {
-             // Could not find port from first fragment. Stop reassembling.
-             error0 = MAP_ERROR_BAD_PROTOCOL;
-             port0 = 0;
-             map_ip4_reass_free (r, &fragments_to_drop);
-           }
-         else
-           {
-             // Found port. Remember it and loopback saved fragments
-             r->port = port0;
-             map_ip4_reass_get_fragments (r, &fragments_to_loopback);
-           }
-
-#ifdef MAP_IP4_REASS_COUNT_BYTES
-         if (!cached && r)
-           {
-             r->forwarded += clib_host_to_net_u16 (ip40->length) - 20;
-             if (!ip4_get_fragment_more (ip40))
-               r->expected_total =
-                 ip4_get_fragment_offset (ip40) * 8 +
-                 clib_host_to_net_u16 (ip40->length) - 20;
-             if (r->forwarded >= r->expected_total)
-               map_ip4_reass_free (r, &fragments_to_drop);
-           }
-#endif
-
-         map_ip4_reass_unlock ();
+         port0 = vnet_buffer (p0)->ip.reass.l4_src_port;
 
          if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
            error0 =
@@ -929,12 +866,12 @@ ip6_map_ip4_reass (vlib_main_t * vm,
 
          if (PREDICT_FALSE
              (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)
-              && error0 == MAP_ERROR_NONE && !cached))
+              && error0 == MAP_ERROR_NONE))
            {
              vnet_buffer (p0)->ip_frag.flags = 0;
              vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
              vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
-             next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT;
+             next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT;
            }
 
          if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
@@ -943,61 +880,24 @@ ip6_map_ip4_reass (vlib_main_t * vm,
                vlib_add_trace (vm, node, p0, sizeof (*tr));
              tr->map_domain_index = map_domain_index0;
              tr->port = port0;
-             tr->cached = cached;
            }
 
-         if (cached)
-           {
-             //Dequeue the packet
-             n_left_to_next++;
-             to_next--;
-           }
-         else
-           {
-             if (error0 == MAP_ERROR_NONE)
-               vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-                                                thread_index,
-                                                map_domain_index0, 1,
-                                                clib_net_to_host_u16
-                                                (ip40->length));
-             next0 =
-               (error0 ==
-                MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP;
-             p0->error = error_node->errors[error0];
-             vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
-                                              n_left_to_next, pi0, next0);
-           }
+         if (error0 == MAP_ERROR_NONE)
+           vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+                                            thread_index,
+                                            map_domain_index0, 1,
+                                            clib_net_to_host_u16
+                                            (ip40->length));
+         next0 =
+           (error0 ==
+            MAP_ERROR_NONE) ? next0 : IP6_MAP_POST_IP4_REASS_NEXT_DROP;
+         p0->error = error_node->errors[error0];
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, pi0, next0);
 
-         //Loopback when we reach the end of the inpu vector
-         if (n_left_from == 0 && vec_len (fragments_to_loopback))
-           {
-             from = vlib_frame_vector_args (frame);
-             u32 len = vec_len (fragments_to_loopback);
-             if (len <= VLIB_FRAME_SIZE)
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback,
-                                   sizeof (u32) * len);
-                 n_left_from = len;
-                 vec_reset_length (fragments_to_loopback);
-               }
-             else
-               {
-                 clib_memcpy_fast (from, fragments_to_loopback +
-                                   (len - VLIB_FRAME_SIZE),
-                                   sizeof (u32) * VLIB_FRAME_SIZE);
-                 n_left_from = VLIB_FRAME_SIZE;
-                 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
-               }
-           }
        }
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
-  map_send_all_to_node (vm, fragments_to_drop, node,
-                       &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
-                       IP6_MAP_IP4_REASS_NEXT_DROP);
-
-  vec_free (fragments_to_drop);
-  vec_free (fragments_to_loopback);
   return frame->n_vectors;
 }
 
@@ -1195,7 +1095,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = {
     [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance",
 #endif
     [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
-    [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",
+    [IP6_MAP_NEXT_IP4_REASS] = "ip4-sv-reassembly-custom-next",
     [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
     [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
     [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
@@ -1223,19 +1123,19 @@ VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = {
 /* *INDENT-ON* */
 
 /* *INDENT-OFF* */
-VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = {
-  .function = ip6_map_ip4_reass,
-  .name = "ip6-map-ip4-reass",
+VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = {
+  .function = ip6_map_post_ip4_reass,
+  .name = "ip6-map-post-ip4-reass",
   .vector_size = sizeof(u32),
-  .format_trace = format_ip6_map_ip4_reass_trace,
+  .format_trace = format_ip6_map_post_ip4_reass_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
   .n_errors = MAP_N_ERROR,
   .error_strings = map_error_strings,
-  .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT,
+  .n_next_nodes = IP6_MAP_POST_IP4_REASS_N_NEXT,
   .next_nodes = {
-    [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
-    [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
-    [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop",
+    [IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
+    [IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
+    [IP6_MAP_POST_IP4_REASS_NEXT_DROP] = "error-drop",
   },
 };
 /* *INDENT-ON* */
@@ -1257,6 +1157,19 @@ VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
 };
 /* *INDENT-ON* */
 
+clib_error_t *
+ip6_map_init (vlib_main_t * vm)
+{
+  map_main.ip4_sv_reass_custom_next_index =
+    ip4_sv_reass_custom_register_next_node
+    (ip6_map_post_ip4_reass_node.index);
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_map_init) =
+{
+.runs_after = VLIB_INITS ("map_init"),};
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index a65a16a..a4aced5 100644 (file)
@@ -342,6 +342,8 @@ typedef struct {
   lpm_t *ip4_prefix_tbl;
   lpm_t *ip6_prefix_tbl;
   lpm_t *ip6_src_prefix_tbl;
+
+  uword ip4_sv_reass_custom_next_index;
 } map_main_t;
 
 /*
index 159c9d7..c29a2a1 100644 (file)
@@ -22,6 +22,8 @@
 #include <map/map.api_enum.h>
 #include <map/map.api_types.h>
 #include <vnet/ip/ip.h>
+#include <vnet/ip/reass/ip4_sv_reass.h>
+#include <vnet/ip/reass/ip6_sv_reass.h>
 #include <vnet/fib/fib_table.h>
 #include <vlibmemory/api.h>
 
@@ -614,6 +616,7 @@ map_if_enable_disable (bool is_enable, u32 sw_if_index, bool is_translation)
 
   if (is_translation == false)
     {
+      ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_enable);
       vnet_feature_enable_disable ("ip4-unicast", "ip4-map", sw_if_index,
                                   is_enable ? 1 : 0, 0, 0);
       vnet_feature_enable_disable ("ip6-unicast", "ip6-map", sw_if_index,
index f1388b3..cf1e6f8 100644 (file)
@@ -7,6 +7,7 @@ from ipaddress import IPv6Network, IPv4Network
 from framework import VppTestCase, VppTestRunner
 from vpp_ip import DpoProto
 from vpp_ip_route import VppIpRoute, VppRoutePath
+from util import fragment_rfc791
 
 import scapy.compat
 from scapy.layers.l2 import Ether, Raw
@@ -49,22 +50,25 @@ class TestMAP(VppTestCase):
             i.unconfig_ip6()
             i.admin_down()
 
-    def send_and_assert_encapped(self, tx, ip6_src, ip6_dst, dmac=None):
+    def send_and_assert_encapped(self, packets, ip6_src, ip6_dst, dmac=None):
         if not dmac:
             dmac = self.pg1.remote_mac
 
-        self.pg0.add_stream(tx)
+        self.pg0.add_stream(packets)
 
         self.pg_enable_capture(self.pg_interfaces)
         self.pg_start()
 
-        rx = self.pg1.get_capture(1)
-        rx = rx[0]
+        capture = self.pg1.get_capture(len(packets))
+        for rx, tx in zip(capture, packets):
+            self.assertEqual(rx[Ether].dst, dmac)
+            self.assertEqual(rx[IP].src, tx[IP].src)
+            self.assertEqual(rx[IPv6].src, ip6_src)
+            self.assertEqual(rx[IPv6].dst, ip6_dst)
 
-        self.assertEqual(rx[Ether].dst, dmac)
-        self.assertEqual(rx[IP].src, tx[IP].src)
-        self.assertEqual(rx[IPv6].src, ip6_src)
-        self.assertEqual(rx[IPv6].dst, ip6_dst)
+    def send_and_assert_encapped_one(self, packet, ip6_src, ip6_dst,
+                                     dmac=None):
+        return self.send_and_assert_encapped([packet], ip6_src, ip6_dst, dmac)
 
     def test_api_map_domain_dump(self):
         map_dst = '2001::/64'
@@ -75,7 +79,6 @@ class TestMAP(VppTestCase):
                                          ip6_prefix=map_dst,
                                          ip6_src=map_src,
                                          tag=tag).index
-
         rv = self.vapi.map_domain_dump()
 
         # restore the state early so as to not impact subsequent tests.
@@ -101,7 +104,7 @@ class TestMAP(VppTestCase):
         # Add a route to the MAP-BR
         #
         map_br_pfx = "2001::"
-        map_br_pfx_len = 64
+        map_br_pfx_len = 32
         map_route = VppIpRoute(self,
                                map_br_pfx,
                                map_br_pfx_len,
@@ -112,15 +115,21 @@ class TestMAP(VppTestCase):
         #
         # Add a domain that maps from pg0 to pg1
         #
-        map_dst = '2001::/64'
+        map_dst = '2001::/32'
         map_src = '3000::1/128'
         client_pfx = '192.168.0.0/16'
+        map_translated_addr = '2001:0:101:7000:0:c0a8:101:7'
         tag = 'MAP-E tag.'
         self.vapi.map_add_domain(ip4_prefix=client_pfx,
                                  ip6_prefix=map_dst,
                                  ip6_src=map_src,
+                                 ea_bits_len=20,
+                                 psid_offset=4,
+                                 psid_length=4,
                                  tag=tag)
 
+        self.vapi.map_param_set_security_check(enable=1, fragments=1)
+
         # Enable MAP on interface.
         self.vapi.map_if_enable_disable(is_enable=1,
                                         sw_if_index=self.pg0.sw_if_index,
@@ -137,6 +146,8 @@ class TestMAP(VppTestCase):
         for p in rx:
             self.validate(p[1], v4_reply)
 
+        self.logger.debug("show trace")
+
         #
         # Fire in a v4 packet that will be encapped to the BR
         #
@@ -145,7 +156,23 @@ class TestMAP(VppTestCase):
               UDP(sport=20000, dport=10000) /
               Raw('\xa5' * 100))
 
-        self.send_and_assert_encapped(v4, "3000::1", "2001::c0a8:0:0")
+        self.send_and_assert_encapped_one(v4, "3000::1", map_translated_addr)
+
+        self.logger.debug("show trace")
+        #
+        # Verify reordered fragments are able to pass as well
+        #
+        v4 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
+              IP(id=1, src=self.pg0.remote_ip4, dst='192.168.1.1') /
+              UDP(sport=20000, dport=10000) /
+              Raw('\xa5' * 1000))
+
+        frags = fragment_rfc791(v4, 400)
+        frags.reverse()
+
+        self.send_and_assert_encapped(frags, "3000::1", map_translated_addr)
+
+        self.logger.debug("show trace")
 
         # Enable MAP on interface.
         self.vapi.map_if_enable_disable(is_enable=1,
@@ -165,12 +192,12 @@ class TestMAP(VppTestCase):
 
         #
         # Fire in a V6 encapped packet.
-        #  expect a decapped packet on the inside ip4 link
+        # expect a decapped packet on the inside ip4 link
         #
         p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) /
-             IPv6(dst='3000::1', src="2001::1") /
+             IPv6(dst='3000::1', src=map_translated_addr) /
              IP(dst=self.pg0.remote_ip4, src='192.168.1.1') /
-             UDP(sport=20000, dport=10000) /
+             UDP(sport=10000, dport=20000) /
              Raw('\xa5' * 100))
 
         self.pg1.add_stream(p)
@@ -185,6 +212,33 @@ class TestMAP(VppTestCase):
         self.assertEqual(rx[IP].src, p[IP].src)
         self.assertEqual(rx[IP].dst, p[IP].dst)
 
+        #
+        # Verify encapped reordered fragments pass as well
+        #
+        p = (IP(id=1, dst=self.pg0.remote_ip4, src='192.168.1.1') /
+             UDP(sport=10000, dport=20000) /
+             Raw('\xa5' * 1500))
+        frags = fragment_rfc791(p, 400)
+        frags.reverse()
+
+        stream = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) /
+                  IPv6(dst='3000::1', src=map_translated_addr) /
+                  x for x in frags)
+
+        self.pg1.add_stream(stream)
+
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+
+        rx = self.pg0.get_capture(len(frags))
+
+        for r in rx:
+            self.assertFalse(r.haslayer(IPv6))
+            self.assertEqual(r[IP].src, p[IP].src)
+            self.assertEqual(r[IP].dst, p[IP].dst)
+
+        return
+
         #
         # Pre-resolve. No API for this!!
         #
@@ -202,9 +256,9 @@ class TestMAP(VppTestCase):
                                                  self.pg1.sw_if_index)])
         pre_res_route.add_vpp_config()
 
-        self.send_and_assert_encapped(v4, "3000::1",
-                                      "2001::c0a8:0:0",
-                                      dmac=self.pg1.remote_hosts[2].mac)
+        self.send_and_assert_encapped_one(v4, "3000::1",
+                                          "2001::c0a8:0:0",
+                                          dmac=self.pg1.remote_hosts[2].mac)
 
         #
         # change the route to the pre-solved next-hop
@@ -213,9 +267,9 @@ class TestMAP(VppTestCase):
                                            self.pg1.sw_if_index)])
         pre_res_route.add_vpp_config()
 
-        self.send_and_assert_encapped(v4, "3000::1",
-                                      "2001::c0a8:0:0",
-                                      dmac=self.pg1.remote_hosts[3].mac)
+        self.send_and_assert_encapped_one(v4, "3000::1",
+                                          "2001::c0a8:0:0",
+                                          dmac=self.pg1.remote_hosts[3].mac)
 
         #
         # cleanup. The test infra's object registry will ensure