NAT: hairpinning rework (VPP-1003)
[vpp.git] / src / plugins / nat / nat.c
index 876b6aa..8b4f50c 100644 (file)
@@ -95,6 +95,14 @@ VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
   .runs_before = VNET_FEATURES ("interface-output"),
 };
 
+/* Hook up ip4-local features */
+VNET_FEATURE_INIT (ip4_nat_hairpinning, static) =
+{
+  .arc_name = "ip4-local",
+  .node_name = "nat44-hairpinning",
+  .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
+};
+
 
 /* *INDENT-OFF* */
 VLIB_PLUGIN_REGISTER () = {
@@ -344,7 +352,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
                       if (e_port > 1024) \
                         { \
                           a->busy_##n##_ports++; \
-                          a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]++; \
+                          a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]++; \
                         } \
                       break;
                       foreach_snat_protocol
@@ -390,33 +398,12 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
       kv.value = m - sm->static_mappings;
       clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1);
 
-      /* Assign worker */
       if (sm->workers)
         {
-          snat_user_key_t w_key0;
-          snat_worker_key_t w_key1;
-
-          w_key0.addr = m->local_addr;
-          w_key0.fib_index = m->fib_index;
-          kv.key = w_key0.as_u64;
-
-          if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value))
-            {
-              kv.value = sm->first_worker_index +
-                sm->workers[sm->next_worker++ % vec_len (sm->workers)];
-
-              clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1);
-            }
-          else
-            {
-              kv.value = value.value;
-            }
-
-          w_key1.addr = m->external_addr;
-          w_key1.port = clib_host_to_net_u16 (m->external_port);
-          w_key1.fib_index = sm->outside_fib_index;
-          kv.key = w_key1.as_u64;
-          clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1);
+          ip4_header_t ip = {
+            .src_address = m->local_addr,
+          };
+          m->worker_index = sm->worker_in2out_cb (&ip, m->fib_index);
         }
     }
   else
@@ -440,7 +427,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
                       if (e_port > 1024) \
                         { \
                           a->busy_##n##_ports--; \
-                          a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]--; \
+                          a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]--; \
                         } \
                       break;
                       foreach_snat_protocol
@@ -483,8 +470,8 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
           u_key.addr = m->local_addr;
           u_key.fib_index = m->fib_index;
           kv.key = u_key.as_u64;
-          if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value))
-            tsm = vec_elt_at_index (sm->per_thread_data, value.value);
+          if (sm->num_workers)
+            tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index);
           else
             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
           if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
@@ -612,8 +599,6 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
   snat_address_t *a = 0;
   int i;
   nat44_lb_addr_port_t *local;
-  snat_user_key_t w_key0;
-  snat_worker_key_t w_key1;
   u32 worker_index = 0;
   snat_main_per_thread_data_t *tsm;
 
@@ -659,7 +644,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
                       if (e_port > 1024) \
                         { \
                           a->busy_##n##_ports++; \
-                          a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]++; \
+                          a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]++; \
                         } \
                       break;
                       foreach_snat_protocol
@@ -700,27 +685,10 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
       /* Assign worker */
       if (sm->workers)
         {
-          w_key0.addr = locals[0].addr;
-          w_key0.fib_index = fib_index;
-          kv.key = w_key0.as_u64;
-
-          if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value))
-            worker_index = sm->first_worker_index +
-              sm->workers[sm->next_worker++ % vec_len (sm->workers)];
-          else
-            worker_index = value.value;
-
-          w_key1.addr = m->external_addr;
-          w_key1.port = clib_host_to_net_u16 (m->external_port);
-          w_key1.fib_index = sm->outside_fib_index;
-          kv.key = w_key1.as_u64;
-          kv.value = worker_index;
-          if (clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1))
-            {
-              clib_warning ("worker-by-out add key failed");
-              return VNET_API_ERROR_UNSPECIFIED;
-            }
+          worker_index = sm->first_worker_index +
+            sm->workers[sm->next_worker++ % vec_len (sm->workers)];
           tsm = vec_elt_at_index (sm->per_thread_data, worker_index);
+          m->worker_index = worker_index;
         }
       else
         tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
@@ -745,6 +713,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
           locals[i].prefix = (i == 0) ? locals[i].probability :\
             (locals[i - 1].prefix + locals[i].probability);
           vec_add1 (m->locals, locals[i]);
+
           m_key.port = clib_host_to_net_u16 (locals[i].port);
           kv.key = m_key.as_u64;
           kv.value = ~0ULL;
@@ -753,19 +722,6 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
               clib_warning ("in2out key add failed");
               return VNET_API_ERROR_UNSPECIFIED;
             }
-          /* Assign worker */
-          if (sm->workers)
-            {
-              w_key0.addr = locals[i].addr;
-              w_key0.fib_index = fib_index;
-              kv.key = w_key0.as_u64;
-              kv.value = worker_index;
-              if (clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1))
-                {
-                  clib_warning ("worker-by-in key add failed");
-                  return VNET_API_ERROR_UNSPECIFIED;
-                }
-            }
         }
     }
   else
@@ -791,7 +747,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
                       if (e_port > 1024) \
                         { \
                           a->busy_##n##_ports--; \
-                          a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]--; \
+                          a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]--; \
                         } \
                       break;
                       foreach_snat_protocol
@@ -805,15 +761,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
             }
         }
 
-      w_key1.addr = m->external_addr;
-      w_key1.port = clib_host_to_net_u16 (m->external_port);
-      w_key1.fib_index = sm->outside_fib_index;
-      kv.key = w_key1.as_u64;
-      if (!clib_bihash_search_8_8 (&sm->worker_by_out, &kv, &value))
-        tsm = vec_elt_at_index (sm->per_thread_data, value.value);
-      else
-        tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
-
+      tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index);
       m_key.addr = m->external_addr;
       m_key.port = m->external_port;
       m_key.protocol = m->proto;
@@ -824,6 +772,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
           clib_warning ("static_mapping_by_external key del failed");
           return VNET_API_ERROR_UNSPECIFIED;
         }
+
       m_key.port = clib_host_to_net_u16 (m->external_port);
       kv.key = m_key.as_u64;
       if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 0))
@@ -843,6 +792,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
               clib_warning ("static_mapping_by_local key del failed");
               return VNET_API_ERROR_UNSPECIFIED;
             }
+
           m_key.port = clib_host_to_net_u16 (local->port);
           kv.key = m_key.as_u64;
           if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 0))
@@ -1051,7 +1001,11 @@ int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
   /* Add/delete external addresses to FIB */
 fib:
   if (is_inside)
-    return 0;
+    {
+      vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
+                                   sw_if_index, !is_del, 0, 0);
+      return 0;
+    }
 
   vec_foreach (ap, sm->addresses)
     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
@@ -2040,32 +1994,17 @@ static u32
 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0)
 {
   snat_main_t *sm = &snat_main;
-  snat_user_key_t key0;
-  clib_bihash_kv_8_8_t kv0, value0;
   u32 next_worker_index = 0;
+  u32 hash;
 
-  key0.addr = ip0->src_address;
-  key0.fib_index = rx_fib_index0;
+  next_worker_index = sm->first_worker_index;
+  hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
+         (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >>24);
 
-  kv0.key = key0.as_u64;
-
-  /* Ever heard of of the "user" before? */
-  if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
-    {
-      /* No, assign next available worker (RR) */
-      next_worker_index = sm->first_worker_index;
-      if (vec_len (sm->workers))
-        {
-          next_worker_index +=
-            sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
-        }
-
-      /* add non-traslated packets worker lookup */
-      kv0.value = next_worker_index;
-      clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
-    }
+  if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
+    next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
   else
-    next_worker_index = value0.value;
+    next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
 
   return next_worker_index;
 }
@@ -2074,58 +2013,114 @@ static u32
 snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0)
 {
   snat_main_t *sm = &snat_main;
-  snat_worker_key_t key0;
-  clib_bihash_kv_8_8_t kv0, value0;
-  udp_header_t * udp0;
-  u32 next_worker_index = 0;
-
-  udp0 = ip4_next_header (ip0);
-
-  key0.addr = ip0->dst_address;
-  key0.port = udp0->dst_port;
-  key0.fib_index = rx_fib_index0;
+  udp_header_t *udp;
+  u16 port;
+  snat_session_key_t m_key;
+  clib_bihash_kv_8_8_t kv, value;
+  snat_static_mapping_t *m;
+  nat_ed_ses_key_t key;
+  clib_bihash_kv_16_8_t s_kv, s_value;
+  snat_main_per_thread_data_t *tsm;
+  snat_session_t *s;
+  int i;
+  u32 proto;
 
-  if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_ICMP))
+  /* first try static mappings without port */
+  if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
     {
-      icmp46_header_t * icmp0 = (icmp46_header_t *) udp0;
-      icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
-      key0.port = echo0->identifier;
+      m_key.addr = ip0->dst_address;
+      m_key.port = 0;
+      m_key.protocol = 0;
+      m_key.fib_index = rx_fib_index0;
+      kv.key = m_key.as_u64;
+      if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+        {
+          m = pool_elt_at_index (sm->static_mappings, value.value);
+          return m->worker_index;
+        }
     }
 
-  kv0.key = key0.as_u64;
+  proto = ip_proto_to_snat_proto (ip0->protocol);
+  udp = ip4_next_header (ip0);
+  port = udp->dst_port;
 
-  /* Ever heard of of the "user" before? */
-  if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
+  /* unknown protocol */
+  if (PREDICT_FALSE (proto == ~0))
     {
-      key0.port = 0;
-      kv0.key = key0.as_u64;
+      key.l_addr = ip0->dst_address;
+      key.r_addr = ip0->src_address;
+      key.fib_index = rx_fib_index0;
+      key.proto = ip0->protocol;
+      key.rsvd = 0;
+      key.l_port = 0;
+      s_kv.key[0] = key.as_u64[0];
+      s_kv.key[1] = key.as_u64[1];
 
-      if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
+      if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
         {
-          /* No, assign next available worker (RR) */
-          next_worker_index = sm->first_worker_index;
-          if (vec_len (sm->workers))
+          for (i = 0; i < _vec_len (sm->per_thread_data); i++)
             {
-              next_worker_index +=
-                sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
+              tsm = vec_elt_at_index (sm->per_thread_data, i);
+              if (!pool_is_free_index(tsm->sessions, s_value.value))
+                {
+                  s = pool_elt_at_index (tsm->sessions, s_value.value);
+                  if (s->out2in.addr.as_u32 == ip0->dst_address.as_u32 &&
+                      s->out2in.port == ip0->protocol &&
+                      snat_is_unk_proto_session (s))
+                    return i;
+                }
             }
-        }
+         }
+
+      /* if no session use current thread */
+      return vlib_get_thread_index ();
+    }
+
+  if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
+    {
+      icmp46_header_t * icmp = (icmp46_header_t *) udp;
+      icmp_echo_header_t *echo = (icmp_echo_header_t *)(icmp + 1);
+      if (!icmp_is_error_message (icmp))
+        port = echo->identifier;
       else
         {
-          /* Static mapping without port */
-          next_worker_index = value0.value;
+          ip4_header_t *inner_ip = (ip4_header_t *)(echo + 1);
+          proto = ip_proto_to_snat_proto (inner_ip->protocol);
+          void *l4_header = ip4_next_header (inner_ip);
+          switch (proto)
+            {
+            case SNAT_PROTOCOL_ICMP:
+              icmp = (icmp46_header_t*)l4_header;
+              echo = (icmp_echo_header_t *)(icmp + 1);
+              port = echo->identifier;
+              break;
+            case SNAT_PROTOCOL_UDP:
+            case SNAT_PROTOCOL_TCP:
+              port = ((tcp_udp_header_t*)l4_header)->src_port;
+              break;
+            default:
+              return vlib_get_thread_index ();
+            }
         }
+    }
 
-      /* Add to translated packets worker lookup */
-      key0.port = udp0->dst_port;
-      kv0.key = key0.as_u64;
-      kv0.value = next_worker_index;
-      clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
+  /* try static mappings with port */
+  if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
+    {
+      m_key.addr = ip0->dst_address;
+      m_key.port = clib_net_to_host_u16 (port);
+      m_key.protocol = proto;
+      m_key.fib_index = rx_fib_index0;
+      kv.key = m_key.as_u64;
+      if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+        {
+          m = pool_elt_at_index (sm->static_mappings, value.value);
+          return m->worker_index;
+        }
     }
-  else
-    next_worker_index = value0.value;
 
-  return next_worker_index;
+  /* worker by outside port */
+  return (u32) ((clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread);
 }
 
 static clib_error_t *
@@ -2183,6 +2178,8 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
   /* for show commands, etc. */
   sm->translation_buckets = translation_buckets;
   sm->translation_memory_size = translation_memory_size;
+  /* do not exceed load factor 10 */
+  sm->max_translations = 10 * translation_buckets;
   sm->user_buckets = user_buckets;
   sm->user_memory_size = user_memory_size;
   sm->max_translations_per_user = max_translations_per_user;
@@ -2230,12 +2227,6 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
                                     user_memory_size);
             }
 
-          clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets,
-                                user_memory_size);
-
-          clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out",
-                                translation_buckets, translation_memory_size);
-
           clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed",
                                  translation_buckets, translation_memory_size);
 
@@ -2616,10 +2607,6 @@ show_snat_command_fn (vlib_main_t * vm,
                                verbose - 1);
               vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed,
                                verbose - 1);
-              vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in,
-                               verbose - 1);
-              vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out,
-                               verbose - 1);
               vec_foreach_index (j, sm->per_thread_data)
                 {
                   tsm = vec_elt_at_index (sm->per_thread_data, j);
@@ -3121,7 +3108,7 @@ snat_det_close_session_out_fn (vlib_main_t *vm,
   snat_main_t *sm = &snat_main;
   unformat_input_t _line_input, *line_input = &_line_input;
   ip4_address_t out_addr, ext_addr, in_addr;
-  u16 out_port, ext_port;
+  u32 out_port, ext_port;
   snat_det_map_t * dm;
   snat_det_session_t * ses;
   snat_det_out_key_t key;
@@ -3152,10 +3139,10 @@ snat_det_close_session_out_fn (vlib_main_t *vm,
     vlib_cli_output (vm, "no match");
   else
     {
-      snat_det_reverse(dm, &ext_addr, out_port, &in_addr);
+      snat_det_reverse(dm, &ext_addr, (u16)out_port, &in_addr);
       key.ext_host_addr = out_addr;
-      key.ext_host_port = ntohs(ext_port);
-      key.out_port = ntohs(out_port);
+      key.ext_host_port = ntohs((u16)ext_port);
+      key.out_port = ntohs((u16)out_port);
       ses = snat_det_get_ses_by_out(dm, &out_addr, key.as_u64);
       if (!ses)
         vlib_cli_output (vm, "no match");
@@ -3192,7 +3179,7 @@ snat_det_close_session_in_fn (vlib_main_t *vm,
   snat_main_t *sm = &snat_main;
   unformat_input_t _line_input, *line_input = &_line_input;
   ip4_address_t in_addr, ext_addr;
-  u16 in_port, ext_port;
+  u32 in_port, ext_port;
   snat_det_map_t * dm;
   snat_det_session_t * ses;
   snat_det_out_key_t key;
@@ -3224,8 +3211,8 @@ snat_det_close_session_in_fn (vlib_main_t *vm,
   else
     {
       key.ext_host_addr = ext_addr;
-      key.ext_host_port = ntohs (ext_port);
-      ses = snat_det_find_ses_by_in (dm, &in_addr, ntohs(in_port), key);
+      key.ext_host_port = ntohs ((u16)ext_port);
+      ses = snat_det_find_ses_by_in (dm, &in_addr, ntohs((u16)in_port), key);
       if (!ses)
         vlib_cli_output (vm, "no match");
       else