SNAT: IP fragmentation (VPP-890)
[vpp.git] / src / plugins / nat / nat.c
index 887fab7..7e651e5 100644 (file)
@@ -23,6 +23,8 @@
 #include <nat/nat_ipfix_logging.h>
 #include <nat/nat_det.h>
 #include <nat/nat64.h>
+#include <nat/dslite.h>
+#include <nat/nat_reass.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/fib/ip4_fib.h>
 
@@ -42,6 +44,11 @@ VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
   .node_name = "nat44-out2in",
   .runs_before = VNET_FEATURES ("ip4-lookup"),
 };
+VNET_FEATURE_INIT (ip4_nat_classify, static) = {
+  .arc_name = "ip4-unicast",
+  .node_name = "nat44-classify",
+  .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
 VNET_FEATURE_INIT (ip4_snat_det_in2out, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-det-in2out",
@@ -52,6 +59,11 @@ VNET_FEATURE_INIT (ip4_snat_det_out2in, static) = {
   .node_name = "nat44-det-out2in",
   .runs_before = VNET_FEATURES ("ip4-lookup"),
 };
+VNET_FEATURE_INIT (ip4_nat_det_classify, static) = {
+  .arc_name = "ip4-unicast",
+  .node_name = "nat44-det-classify",
+  .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
 VNET_FEATURE_INIT (ip4_snat_in2out_worker_handoff, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-in2out-worker-handoff",
@@ -62,6 +74,11 @@ VNET_FEATURE_INIT (ip4_snat_out2in_worker_handoff, static) = {
   .node_name = "nat44-out2in-worker-handoff",
   .runs_before = VNET_FEATURES ("ip4-lookup"),
 };
+VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
+  .arc_name = "ip4-unicast",
+  .node_name = "nat44-handoff-classify",
+  .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
 VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
   .arc_name = "ip4-unicast",
   .node_name = "nat44-in2out-fast",
@@ -111,6 +128,159 @@ VLIB_PLUGIN_REGISTER () = {
 };
 /* *INDENT-ON* */
 
+vlib_node_registration_t nat44_classify_node;
+vlib_node_registration_t nat44_det_classify_node;
+vlib_node_registration_t nat44_handoff_classify_node;
+
+typedef enum {
+  NAT44_CLASSIFY_NEXT_IN2OUT,
+  NAT44_CLASSIFY_NEXT_OUT2IN,
+  NAT44_CLASSIFY_N_NEXT,
+} nat44_classify_next_t;
+
+static inline uword
+nat44_classify_node_fn_inline (vlib_main_t * vm,
+                               vlib_node_runtime_t * node,
+                               vlib_frame_t * frame)
+{
+  u32 n_left_from, * from, * to_next;
+  nat44_classify_next_t next_index;
+  snat_main_t *sm = &snat_main;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index,
+                          to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+          u32 bi0;
+         vlib_buffer_t *b0;
+          u32 next0 = NAT44_CLASSIFY_NEXT_IN2OUT;
+          ip4_header_t *ip0;
+          snat_address_t *ap;
+          snat_session_key_t m_key0;
+          clib_bihash_kv_8_8_t kv0, value0;
+
+          /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+          ip0 = vlib_buffer_get_current (b0);
+
+          vec_foreach (ap, sm->addresses)
+            {
+              if (ip0->dst_address.as_u32 == ap->addr.as_u32)
+                {
+                  next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
+                  break;
+                }
+            }
+
+          if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
+            {
+              m_key0.addr = ip0->dst_address;
+              m_key0.port = 0;
+              m_key0.protocol = 0;
+              m_key0.fib_index = sm->outside_fib_index;
+              kv0.key = m_key0.as_u64;
+              if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv0, &value0))
+                {
+                  next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
+                }
+            }
+          /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+        }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+static uword
+nat44_classify_node_fn (vlib_main_t * vm,
+                        vlib_node_runtime_t * node,
+                        vlib_frame_t * frame)
+{
+  return nat44_classify_node_fn_inline (vm, node, frame);
+};
+
+VLIB_REGISTER_NODE (nat44_classify_node) = {
+  .function = nat44_classify_node_fn,
+  .name = "nat44-classify",
+  .vector_size = sizeof (u32),
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
+  .next_nodes = {
+    [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-in2out",
+    [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-out2in",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat44_classify_node,
+                              nat44_classify_node_fn);
+
+static uword
+nat44_det_classify_node_fn (vlib_main_t * vm,
+                            vlib_node_runtime_t * node,
+                            vlib_frame_t * frame)
+{
+  return nat44_classify_node_fn_inline (vm, node, frame);
+};
+
+VLIB_REGISTER_NODE (nat44_det_classify_node) = {
+  .function = nat44_det_classify_node_fn,
+  .name = "nat44-det-classify",
+  .vector_size = sizeof (u32),
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
+  .next_nodes = {
+    [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-det-in2out",
+    [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-det-out2in",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat44_det_classify_node,
+                              nat44_det_classify_node_fn);
+
+static uword
+nat44_handoff_classify_node_fn (vlib_main_t * vm,
+                                vlib_node_runtime_t * node,
+                                vlib_frame_t * frame)
+{
+  return nat44_classify_node_fn_inline (vm, node, frame);
+};
+
+VLIB_REGISTER_NODE (nat44_handoff_classify_node) = {
+  .function = nat44_handoff_classify_node_fn,
+  .name = "nat44-handoff-classify",
+  .vector_size = sizeof (u32),
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
+  .next_nodes = {
+    [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-in2out-worker-handoff",
+    [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-out2in-worker-handoff",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (nat44_handoff_classify_node,
+                              nat44_handoff_classify_node_fn);
+
 /**
  * @brief Add/del NAT address to FIB.
  *
@@ -190,7 +360,7 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id)
   /* Add external address to FIB */
   pool_foreach (i, sm->interfaces,
   ({
-    if (i->is_inside)
+    if (nat_interface_is_inside(i))
       continue;
 
     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
@@ -198,7 +368,7 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id)
   }));
   pool_foreach (i, sm->output_feature_interfaces,
   ({
-    if (i->is_inside)
+    if (nat_interface_is_inside(i))
       continue;
 
     snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1);
@@ -569,7 +739,7 @@ delete:
   /* Add/delete external address to FIB */
   pool_foreach (interface, sm->interfaces,
   ({
-    if (interface->is_inside)
+    if (nat_interface_is_inside(interface))
       continue;
 
     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
@@ -577,7 +747,7 @@ delete:
   }));
   pool_foreach (interface, sm->output_feature_interfaces,
   ({
-    if (interface->is_inside)
+    if (nat_interface_is_inside(interface))
       continue;
 
     snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add);
@@ -930,7 +1100,7 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm)
   /* Delete external address from FIB */
   pool_foreach (interface, sm->interfaces,
   ({
-    if (interface->is_inside)
+    if (nat_interface_is_inside(interface))
       continue;
 
     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
@@ -938,7 +1108,7 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm)
   }));
   pool_foreach (interface, sm->output_feature_interfaces,
   ({
-    if (interface->is_inside)
+    if (nat_interface_is_inside(interface))
       continue;
 
     snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0);
@@ -952,7 +1122,7 @@ int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
 {
   snat_main_t *sm = &snat_main;
   snat_interface_t *i;
-  const char * feature_name;
+  const char * feature_name, *del_feature_name;
   snat_address_t * ap;
   snat_static_mapping_t * m;
   snat_det_map_t * dm;
@@ -969,9 +1139,6 @@ int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
         feature_name = is_inside ?  "nat44-in2out" : "nat44-out2in";
     }
 
-  vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index,
-                              !is_del, 0, 0);
-
   if (sm->fq_in2out_index == ~0 && !sm->deterministic && sm->num_workers > 1)
     sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index, 0);
 
@@ -983,9 +1150,63 @@ int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
     if (i->sw_if_index == sw_if_index)
       {
         if (is_del)
-          pool_put (sm->interfaces, i);
+          {
+            if (nat_interface_is_inside(i) && nat_interface_is_outside(i))
+              {
+                if (is_inside)
+                  i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE;
+                else
+                  i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
+
+                if (sm->num_workers > 1 && !sm->deterministic)
+                  del_feature_name = "nat44-handoff-classify";
+                else if (sm->deterministic)
+                  del_feature_name = "nat44-det-classify";
+                else
+                  del_feature_name = "nat44-classify";
+
+                vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
+                                             sw_if_index, 0, 0, 0);
+                vnet_feature_enable_disable ("ip4-unicast", feature_name,
+                                             sw_if_index, 1, 0, 0);
+              }
+            else
+              {
+                vnet_feature_enable_disable ("ip4-unicast", feature_name,
+                                             sw_if_index, 0, 0, 0);
+                pool_put (sm->interfaces, i);
+              }
+          }
         else
-          return VNET_API_ERROR_VALUE_EXIST;
+          {
+            if ((nat_interface_is_inside(i) && is_inside) ||
+                (nat_interface_is_outside(i) && !is_inside))
+              return 0;
+
+            if (sm->num_workers > 1 && !sm->deterministic)
+              {
+                del_feature_name = !is_inside ?  "nat44-in2out-worker-handoff" :
+                                                 "nat44-out2in-worker-handoff";
+                feature_name = "nat44-handoff-classify";
+              }
+            else if (sm->deterministic)
+              {
+                del_feature_name = !is_inside ?  "nat44-det-in2out" :
+                                                 "nat44-det-out2in";
+                feature_name = "nat44-det-classify";
+              }
+            else
+              {
+                del_feature_name = !is_inside ?  "nat44-in2out" : "nat44-out2in";
+                feature_name = "nat44-classify";
+              }
+
+            vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
+                                         sw_if_index, 0, 0, 0);
+            vnet_feature_enable_disable ("ip4-unicast", feature_name,
+                                         sw_if_index, 1, 0, 0);
+            goto set_flags;
+          }
 
         goto fib;
       }
@@ -996,7 +1217,14 @@ int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
 
   pool_get (sm->interfaces, i);
   i->sw_if_index = sw_if_index;
-  i->is_inside = is_inside;
+  i->flags = 0;
+  vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0, 0);
+
+set_flags:
+  if (is_inside)
+    i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
+  else
+    i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
 
   /* Add/delete external addresses to FIB */
 fib:
@@ -1090,7 +1318,11 @@ fq:
 
   pool_get (sm->output_feature_interfaces, i);
   i->sw_if_index = sw_if_index;
-  i->is_inside = is_inside;
+  i->flags = 0;
+  if (is_inside)
+    i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
+  else
+    i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
 
   /* Add/delete external addresses to FIB */
 fib:
@@ -1216,14 +1448,20 @@ static clib_error_t * snat_init (vlib_main_t * vm)
   /* Init IPFIX logging */
   snat_ipfix_logging_init(vm);
 
+  /* Init NAT64 */
   error = nat64_init(vm);
+  if (error)
+    return error;
 
-  return error;
+  dslite_init(vm);
+
+  /* Init virtual fragmenentation reassembly */
+  return nat_reass_init(vm);
 }
 
 VLIB_INIT_FUNCTION (snat_init);
 
-void snat_free_outside_address_and_port (snat_main_t * sm,
+void snat_free_outside_address_and_port (snat_address_t * addresses,
                                          u32 thread_index,
                                          snat_session_key_t * k,
                                          u32 address_index)
@@ -1231,9 +1469,9 @@ void snat_free_outside_address_and_port (snat_main_t * sm,
   snat_address_t *a;
   u16 port_host_byte_order = clib_net_to_host_u16 (k->port);
 
-  ASSERT (address_index < vec_len (sm->addresses));
+  ASSERT (address_index < vec_len (addresses));
 
-  a = sm->addresses + address_index;
+  a = addresses + address_index;
 
   switch (k->protocol)
     {
@@ -1342,38 +1580,42 @@ int snat_static_mapping_match (snat_main_t * sm,
 }
 
 static_always_inline u16
-snat_random_port (snat_main_t * sm, u16 min, u16 max)
+snat_random_port (u16 min, u16 max)
 {
+  snat_main_t *sm = &snat_main;
   return min + random_u32 (&sm->random_seed) /
     (random_u32_max() / (max - min + 1) + 1);
 }
 
-int snat_alloc_outside_address_and_port (snat_main_t * sm,
+int snat_alloc_outside_address_and_port (snat_address_t * addresses,
                                          u32 fib_index,
                                          u32 thread_index,
                                          snat_session_key_t * k,
-                                         u32 * address_indexp)
+                                         u32 * address_indexp,
+                                         u8 vrf_mode,
+                                         u16 port_per_thread,
+                                         u32 snat_thread_index)
 {
   int i;
   snat_address_t *a;
   u32 portnum;
 
-  for (i = 0; i < vec_len (sm->addresses); i++)
+  for (i = 0; i < vec_len (addresses); i++)
     {
-      a = sm->addresses + i;
-      if (sm->vrf_mode && a->fib_index != ~0 && a->fib_index != fib_index)
+      a = addresses + i;
+      if (vrf_mode && a->fib_index != ~0 && a->fib_index != fib_index)
         continue;
       switch (k->protocol)
         {
 #define _(N, j, n, s) \
         case SNAT_PROTOCOL_##N: \
-          if (a->busy_##n##_ports_per_thread[thread_index] < sm->port_per_thread) \
+          if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
             { \
               while (1) \
                 { \
-                  portnum = (sm->port_per_thread * \
-                    sm->per_thread_data[thread_index].snat_thread_index) + \
-                    snat_random_port(sm, 1, sm->port_per_thread) + 1024; \
+                  portnum = (port_per_thread * \
+                    snat_thread_index) + \
+                    snat_random_port(1, port_per_thread) + 1024; \
                   if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \
                     continue; \
                   clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \
@@ -2486,7 +2728,9 @@ show_snat_command_fn (vlib_main_t * vm,
       ({
         vlib_cli_output (vm, "%U %s", format_vnet_sw_interface_name, vnm,
                          vnet_get_sw_interface (vnm, i->sw_if_index),
-                         i->is_inside ? "in" : "out");
+                         (nat_interface_is_inside(i) &&
+                          nat_interface_is_outside(i)) ? "in out" :
+                         (nat_interface_is_inside(i) ? "in" : "out"));
       }));
 
       pool_foreach (i, sm->output_feature_interfaces,
@@ -2494,7 +2738,9 @@ show_snat_command_fn (vlib_main_t * vm,
         vlib_cli_output (vm, "%U output-feature %s",
                          format_vnet_sw_interface_name, vnm,
                          vnet_get_sw_interface (vnm, i->sw_if_index),
-                         i->is_inside ? "in" : "out");
+                         (nat_interface_is_inside(i) &&
+                          nat_interface_is_outside(i)) ? "in out" :
+                         (nat_interface_is_inside(i) ? "in" : "out"));
       }));
 
       if (vec_len (sm->auto_add_sw_if_indices))
@@ -2648,6 +2894,7 @@ show_snat_command_fn (vlib_main_t * vm,
             }
         }
     }
+
   return 0;
 }
 
@@ -2841,6 +3088,119 @@ VLIB_CLI_COMMAND (snat_add_interface_address_command, static) = {
     .function = snat_add_interface_address_command_fn,
 };
 
+int
+nat44_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
+                   snat_protocol_t proto, u32 vrf_id, int is_in)
+{
+  snat_main_per_thread_data_t *tsm;
+  clib_bihash_kv_8_8_t kv, value;
+  ip4_header_t ip;
+  u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+  snat_session_key_t key;
+  snat_session_t *s;
+  clib_bihash_8_8_t *t;
+  snat_user_key_t u_key;
+  snat_user_t *u;
+
+  ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
+  if (sm->num_workers)
+    tsm =
+      vec_elt_at_index (sm->per_thread_data,
+                       sm->worker_in2out_cb (&ip, fib_index));
+  else
+    tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+
+  key.addr.as_u32 = addr->as_u32;
+  key.port = clib_host_to_net_u16 (port);
+  key.protocol = proto;
+  key.fib_index = fib_index;
+  kv.key = key.as_u64;
+  t = is_in ? &tsm->in2out : &tsm->out2in;
+  if (!clib_bihash_search_8_8 (t, &kv, &value))
+    {
+      s = pool_elt_at_index (tsm->sessions, value.value);
+      kv.key = s->in2out.as_u64;
+      clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0);
+      kv.key = s->out2in.as_u64;
+      clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0);
+      u_key.addr = s->in2out.addr;
+      u_key.fib_index = s->in2out.fib_index;
+      kv.key = u_key.as_u64;
+      if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
+        {
+          u = pool_elt_at_index (tsm->users, value.value);
+          u->nsessions--;
+        }
+      clib_dlist_remove (tsm->list_pool, s->per_user_index);
+      pool_put (tsm->sessions, s);
+      return 0;
+    }
+
+  return VNET_API_ERROR_NO_SUCH_ENTRY;
+}
+
+static clib_error_t *
+nat44_del_session_command_fn (vlib_main_t * vm,
+                              unformat_input_t * input,
+                              vlib_cli_command_t * cmd)
+{
+  snat_main_t *sm = &snat_main;
+  unformat_input_t _line_input, *line_input = &_line_input;
+  int is_in = 0;
+  clib_error_t *error = 0;
+  ip4_address_t addr;
+  u32 port = 0, vrf_id = sm->outside_vrf_id;
+  snat_protocol_t proto;
+  int rv;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%U:%u %U", unformat_ip4_address, &addr, &port,
+          unformat_snat_protocol, &proto))
+        ;
+      else if (unformat (line_input, "in"))
+        {
+          is_in = 1;
+          vrf_id = sm->inside_vrf_id;
+        }
+      else if (unformat (line_input, "vrf %u", &vrf_id))
+        ;
+      else
+        {
+          error = clib_error_return (0, "unknown input '%U'",
+                                    format_unformat_error, line_input);
+          goto done;
+        }
+    }
+
+  rv = nat44_del_session(sm, &addr, port, proto, vrf_id, is_in);
+
+  switch (rv)
+    {
+    case 0:
+      break;
+
+    default:
+      error = clib_error_return (0, "nat44_del_session returned %d", rv);
+      goto done;
+    }
+
+done:
+  unformat_free (line_input);
+
+  return error;
+}
+
+VLIB_CLI_COMMAND (nat44_del_session_command, static) = {
+    .path = "nat44 del session",
+    .short_help = "nat44 del session in|out <addr>:<port> tcp|udp|icmp [vrf <id>]",
+    .function = nat44_del_session_command_fn,
+};
+
 static clib_error_t *
 snat_det_map_command_fn (vlib_main_t * vm,
                          unformat_input_t * input,