NAT: hairpinning rework (VPP-1003)
[vpp.git] / src / plugins / nat / nat.c
index 8aecac6..8b4f50c 100644 (file)
@@ -95,6 +95,14 @@ VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = {
   .runs_before = VNET_FEATURES ("interface-output"),
 };
 
+/* Hook up ip4-local features */
+VNET_FEATURE_INIT (ip4_nat_hairpinning, static) =
+{
+  .arc_name = "ip4-local",
+  .node_name = "nat44-hairpinning",
+  .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
+};
+
 
 /* *INDENT-OFF* */
 VLIB_PLUGIN_REGISTER () = {
@@ -152,6 +160,7 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id)
 {
   snat_address_t * ap;
   snat_interface_t *i;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
 
   if (vrf_id != ~0)
     sm->vrf_mode = 1;
@@ -172,7 +181,9 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id)
   else
     ap->fib_index = ~0;
 #define _(N, i, n, s) \
-  clib_bitmap_alloc (ap->busy_##n##_port_bitmap, 65535);
+  clib_bitmap_alloc (ap->busy_##n##_port_bitmap, 65535); \
+  ap->busy_##n##_ports = 0; \
+  vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
   foreach_snat_protocol
 #undef _
 
@@ -339,7 +350,10 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
                         return VNET_API_ERROR_INVALID_VALUE; \
                       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \
                       if (e_port > 1024) \
-                        a->busy_##n##_ports++; \
+                        { \
+                          a->busy_##n##_ports++; \
+                          a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]++; \
+                        } \
                       break;
                       foreach_snat_protocol
 #undef _
@@ -384,33 +398,12 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
       kv.value = m - sm->static_mappings;
       clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1);
 
-      /* Assign worker */
       if (sm->workers)
         {
-          snat_user_key_t w_key0;
-          snat_worker_key_t w_key1;
-
-          w_key0.addr = m->local_addr;
-          w_key0.fib_index = m->fib_index;
-          kv.key = w_key0.as_u64;
-
-          if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value))
-            {
-              kv.value = sm->first_worker_index +
-                sm->workers[sm->next_worker++ % vec_len (sm->workers)];
-
-              clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1);
-            }
-          else
-            {
-              kv.value = value.value;
-            }
-
-          w_key1.addr = m->external_addr;
-          w_key1.port = clib_host_to_net_u16 (m->external_port);
-          w_key1.fib_index = sm->outside_fib_index;
-          kv.key = w_key1.as_u64;
-          clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1);
+          ip4_header_t ip = {
+            .src_address = m->local_addr,
+          };
+          m->worker_index = sm->worker_in2out_cb (&ip, m->fib_index);
         }
     }
   else
@@ -432,7 +425,10 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
                     case SNAT_PROTOCOL_##N: \
                       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \
                       if (e_port > 1024) \
-                        a->busy_##n##_ports--; \
+                        { \
+                          a->busy_##n##_ports--; \
+                          a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]--; \
+                        } \
                       break;
                       foreach_snat_protocol
 #undef _
@@ -474,13 +470,13 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
           u_key.addr = m->local_addr;
           u_key.fib_index = m->fib_index;
           kv.key = u_key.as_u64;
-          if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+          if (sm->num_workers)
+            tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index);
+          else
+            tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+          if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
             {
               user_index = value.value;
-              if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value))
-                tsm = vec_elt_at_index (sm->per_thread_data, value.value);
-              else
-                tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
               u = pool_elt_at_index (tsm->users, user_index);
               if (u->nstaticsessions)
                 {
@@ -539,10 +535,10 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
                                                           s->in2out.fib_index);
 
                       value.key = s->in2out.as_u64;
-                      if (clib_bihash_add_del_8_8 (&sm->in2out, &value, 0))
+                      if (clib_bihash_add_del_8_8 (&tsm->in2out, &value, 0))
                         clib_warning ("in2out key del failed");
                       value.key = s->out2in.as_u64;
-                      if (clib_bihash_add_del_8_8 (&sm->out2in, &value, 0))
+                      if (clib_bihash_add_del_8_8 (&tsm->out2in, &value, 0))
                         clib_warning ("out2in key del failed");
 delete:
                       pool_put (tsm->sessions, s);
@@ -557,7 +553,7 @@ delete:
                   if (addr_only)
                     {
                       pool_put (tsm->users, u);
-                      clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 0);
+                      clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 0);
                     }
                 }
             }
@@ -603,9 +599,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
   snat_address_t *a = 0;
   int i;
   nat44_lb_addr_port_t *local;
-  snat_user_key_t w_key0;
-  snat_worker_key_t w_key1;
   u32 worker_index = 0;
+  snat_main_per_thread_data_t *tsm;
 
   m_key.addr = e_addr;
   m_key.port = e_port;
@@ -647,7 +642,10 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
                         return VNET_API_ERROR_INVALID_VALUE; \
                       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \
                       if (e_port > 1024) \
-                        a->busy_##n##_ports++; \
+                        { \
+                          a->busy_##n##_ports++; \
+                          a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]++; \
+                        } \
                       break;
                       foreach_snat_protocol
 #undef _
@@ -683,42 +681,28 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
           clib_warning ("static_mapping_by_external key add failed");
           return VNET_API_ERROR_UNSPECIFIED;
         }
+
+      /* Assign worker */
+      if (sm->workers)
+        {
+          worker_index = sm->first_worker_index +
+            sm->workers[sm->next_worker++ % vec_len (sm->workers)];
+          tsm = vec_elt_at_index (sm->per_thread_data, worker_index);
+          m->worker_index = worker_index;
+        }
+      else
+        tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+
       m_key.port = clib_host_to_net_u16 (m->external_port);
       kv.key = m_key.as_u64;
       kv.value = ~0ULL;
-      if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 1))
+      if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 1))
         {
           clib_warning ("static_mapping_by_local key add failed");
           return VNET_API_ERROR_UNSPECIFIED;
         }
 
       m_key.fib_index = m->fib_index;
-
-      /* Assign worker */
-      if (sm->workers)
-        {
-          w_key0.addr = locals[0].addr;
-          w_key0.fib_index = fib_index;
-          kv.key = w_key0.as_u64;
-
-          if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value))
-            worker_index = sm->first_worker_index +
-              sm->workers[sm->next_worker++ % vec_len (sm->workers)];
-          else
-            worker_index = value.value;
-
-          w_key1.addr = m->external_addr;
-          w_key1.port = clib_host_to_net_u16 (m->external_port);
-          w_key1.fib_index = sm->outside_fib_index;
-          kv.key = w_key1.as_u64;
-          kv.value = worker_index;
-          if (clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1))
-            {
-              clib_warning ("worker-by-out add key failed");
-              return VNET_API_ERROR_UNSPECIFIED;
-            }
-        }
-
       for (i = 0; i < vec_len (locals); i++)
         {
           m_key.addr = locals[i].addr;
@@ -726,29 +710,18 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
           kv.key = m_key.as_u64;
           kv.value = m - sm->static_mappings;
           clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1);
-          locals[i].prefix = locals[i - 1].prefix + locals[i].probability;
+          locals[i].prefix = (i == 0) ? locals[i].probability :\
+            (locals[i - 1].prefix + locals[i].probability);
           vec_add1 (m->locals, locals[i]);
+
           m_key.port = clib_host_to_net_u16 (locals[i].port);
           kv.key = m_key.as_u64;
           kv.value = ~0ULL;
-          if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 1))
+          if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 1))
             {
               clib_warning ("in2out key add failed");
               return VNET_API_ERROR_UNSPECIFIED;
             }
-          /* Assign worker */
-          if (sm->workers)
-            {
-              w_key0.addr = locals[i].addr;
-              w_key0.fib_index = fib_index;
-              kv.key = w_key0.as_u64;
-              kv.value = worker_index;
-              if (clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1))
-                {
-                  clib_warning ("worker-by-in key add failed");
-                  return VNET_API_ERROR_UNSPECIFIED;
-                }
-            }
         }
     }
   else
@@ -772,7 +745,10 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
                     case SNAT_PROTOCOL_##N: \
                       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \
                       if (e_port > 1024) \
-                        a->busy_##n##_ports--; \
+                        { \
+                          a->busy_##n##_ports--; \
+                          a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]--; \
+                        } \
                       break;
                       foreach_snat_protocol
 #undef _
@@ -785,6 +761,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
             }
         }
 
+      tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index);
       m_key.addr = m->external_addr;
       m_key.port = m->external_port;
       m_key.protocol = m->proto;
@@ -795,9 +772,10 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
           clib_warning ("static_mapping_by_external key del failed");
           return VNET_API_ERROR_UNSPECIFIED;
         }
+
       m_key.port = clib_host_to_net_u16 (m->external_port);
       kv.key = m_key.as_u64;
-      if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 0))
+      if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 0))
         {
           clib_warning ("outi2in key del failed");
           return VNET_API_ERROR_UNSPECIFIED;
@@ -814,14 +792,16 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
               clib_warning ("static_mapping_by_local key del failed");
               return VNET_API_ERROR_UNSPECIFIED;
             }
+
           m_key.port = clib_host_to_net_u16 (local->port);
           kv.key = m_key.as_u64;
-          if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 0))
+          if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 0))
             {
               clib_warning ("in2out key del failed");
               return VNET_API_ERROR_UNSPECIFIED;
             }
         }
+      vec_free(m->locals);
 
       pool_put (sm->static_mappings, m);
     }
@@ -921,16 +901,16 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm)
                                                         ses->out2in.port,
                                                         ses->in2out.fib_index);
                     kv.key = ses->in2out.as_u64;
-                    clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0);
+                    clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0);
                     kv.key = ses->out2in.as_u64;
-                    clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0);
+                    clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0);
                   }
                 vec_add1 (ses_to_be_removed, ses - tsm->sessions);
                 clib_dlist_remove (tsm->list_pool, ses->per_user_index);
                 user_key.addr = ses->in2out.addr;
                 user_key.fib_index = ses->in2out.fib_index;
                 kv.key = user_key.as_u64;
-                if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+                if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
                   {
                     u = pool_elt_at_index (tsm->users, value.value);
                     u->nsessions--;
@@ -1021,7 +1001,11 @@ int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
   /* Add/delete external addresses to FIB */
 fib:
   if (is_inside)
-    return 0;
+    {
+      vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning",
+                                   sw_if_index, !is_del, 0, 0);
+      return 0;
+    }
 
   vec_foreach (ap, sm->addresses)
     snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del);
@@ -1240,6 +1224,7 @@ static clib_error_t * snat_init (vlib_main_t * vm)
 VLIB_INIT_FUNCTION (snat_init);
 
 void snat_free_outside_address_and_port (snat_main_t * sm,
+                                         u32 thread_index,
                                          snat_session_key_t * k,
                                          u32 address_index)
 {
@@ -1259,6 +1244,7 @@ void snat_free_outside_address_and_port (snat_main_t * sm,
       clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
         port_host_byte_order, 0); \
       a->busy_##n##_ports--; \
+      a->busy_##n##_ports_per_thread[thread_index]--; \
       break;
       foreach_snat_protocol
 #undef _
@@ -1322,7 +1308,7 @@ int snat_static_mapping_match (snat_main_t * sm,
           rand = 1 + (random_u32 (&sm->random_seed) % m->locals[hi].prefix);
           while (lo < hi)
             {
-              mid = ((hi - 1) >> 1) + lo;
+              mid = ((hi - lo) >> 1) + lo;
               (rand > m->locals[mid].prefix) ? (lo = mid + 1) : (hi = mid);
             }
           if (!(m->locals[lo].prefix >= rand))
@@ -1381,16 +1367,17 @@ int snat_alloc_outside_address_and_port (snat_main_t * sm,
         {
 #define _(N, j, n, s) \
         case SNAT_PROTOCOL_##N: \
-          if (a->busy_##n##_ports < (sm->port_per_thread * sm->num_snat_thread)) \
+          if (a->busy_##n##_ports_per_thread[thread_index] < sm->port_per_thread) \
             { \
               while (1) \
                 { \
                   portnum = (sm->port_per_thread * \
                     sm->per_thread_data[thread_index].snat_thread_index) + \
-                    snat_random_port(sm, 0, sm->port_per_thread) + 1024; \
+                    snat_random_port(sm, 1, sm->port_per_thread) + 1024; \
                   if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \
                     continue; \
                   clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \
+                  a->busy_##n##_ports_per_thread[thread_index]++; \
                   a->busy_##n##_ports++; \
                   k->addr = a->addr; \
                   k->port = clib_host_to_net_u16(portnum); \
@@ -2007,32 +1994,17 @@ static u32
 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0)
 {
   snat_main_t *sm = &snat_main;
-  snat_user_key_t key0;
-  clib_bihash_kv_8_8_t kv0, value0;
   u32 next_worker_index = 0;
+  u32 hash;
 
-  key0.addr = ip0->src_address;
-  key0.fib_index = rx_fib_index0;
-
-  kv0.key = key0.as_u64;
-
-  /* Ever heard of of the "user" before? */
-  if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
-    {
-      /* No, assign next available worker (RR) */
-      next_worker_index = sm->first_worker_index;
-      if (vec_len (sm->workers))
-        {
-          next_worker_index +=
-            sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
-        }
+  next_worker_index = sm->first_worker_index;
+  hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) +
+         (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >>24);
 
-      /* add non-traslated packets worker lookup */
-      kv0.value = next_worker_index;
-      clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
-    }
+  if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
+    next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
   else
-    next_worker_index = value0.value;
+    next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
 
   return next_worker_index;
 }
@@ -2041,58 +2013,114 @@ static u32
 snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0)
 {
   snat_main_t *sm = &snat_main;
-  snat_worker_key_t key0;
-  clib_bihash_kv_8_8_t kv0, value0;
-  udp_header_t * udp0;
-  u32 next_worker_index = 0;
-
-  udp0 = ip4_next_header (ip0);
-
-  key0.addr = ip0->dst_address;
-  key0.port = udp0->dst_port;
-  key0.fib_index = rx_fib_index0;
+  udp_header_t *udp;
+  u16 port;
+  snat_session_key_t m_key;
+  clib_bihash_kv_8_8_t kv, value;
+  snat_static_mapping_t *m;
+  nat_ed_ses_key_t key;
+  clib_bihash_kv_16_8_t s_kv, s_value;
+  snat_main_per_thread_data_t *tsm;
+  snat_session_t *s;
+  int i;
+  u32 proto;
 
-  if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_ICMP))
+  /* first try static mappings without port */
+  if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
     {
-      icmp46_header_t * icmp0 = (icmp46_header_t *) udp0;
-      icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
-      key0.port = echo0->identifier;
+      m_key.addr = ip0->dst_address;
+      m_key.port = 0;
+      m_key.protocol = 0;
+      m_key.fib_index = rx_fib_index0;
+      kv.key = m_key.as_u64;
+      if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+        {
+          m = pool_elt_at_index (sm->static_mappings, value.value);
+          return m->worker_index;
+        }
     }
 
-  kv0.key = key0.as_u64;
+  proto = ip_proto_to_snat_proto (ip0->protocol);
+  udp = ip4_next_header (ip0);
+  port = udp->dst_port;
 
-  /* Ever heard of of the "user" before? */
-  if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
+  /* unknown protocol */
+  if (PREDICT_FALSE (proto == ~0))
     {
-      key0.port = 0;
-      kv0.key = key0.as_u64;
+      key.l_addr = ip0->dst_address;
+      key.r_addr = ip0->src_address;
+      key.fib_index = rx_fib_index0;
+      key.proto = ip0->protocol;
+      key.rsvd = 0;
+      key.l_port = 0;
+      s_kv.key[0] = key.as_u64[0];
+      s_kv.key[1] = key.as_u64[1];
 
-      if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
+      if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
         {
-          /* No, assign next available worker (RR) */
-          next_worker_index = sm->first_worker_index;
-          if (vec_len (sm->workers))
+          for (i = 0; i < _vec_len (sm->per_thread_data); i++)
             {
-              next_worker_index +=
-                sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
+              tsm = vec_elt_at_index (sm->per_thread_data, i);
+              if (!pool_is_free_index(tsm->sessions, s_value.value))
+                {
+                  s = pool_elt_at_index (tsm->sessions, s_value.value);
+                  if (s->out2in.addr.as_u32 == ip0->dst_address.as_u32 &&
+                      s->out2in.port == ip0->protocol &&
+                      snat_is_unk_proto_session (s))
+                    return i;
+                }
             }
-        }
+         }
+
+      /* if no session use current thread */
+      return vlib_get_thread_index ();
+    }
+
+  if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP))
+    {
+      icmp46_header_t * icmp = (icmp46_header_t *) udp;
+      icmp_echo_header_t *echo = (icmp_echo_header_t *)(icmp + 1);
+      if (!icmp_is_error_message (icmp))
+        port = echo->identifier;
       else
         {
-          /* Static mapping without port */
-          next_worker_index = value0.value;
+          ip4_header_t *inner_ip = (ip4_header_t *)(echo + 1);
+          proto = ip_proto_to_snat_proto (inner_ip->protocol);
+          void *l4_header = ip4_next_header (inner_ip);
+          switch (proto)
+            {
+            case SNAT_PROTOCOL_ICMP:
+              icmp = (icmp46_header_t*)l4_header;
+              echo = (icmp_echo_header_t *)(icmp + 1);
+              port = echo->identifier;
+              break;
+            case SNAT_PROTOCOL_UDP:
+            case SNAT_PROTOCOL_TCP:
+              port = ((tcp_udp_header_t*)l4_header)->src_port;
+              break;
+            default:
+              return vlib_get_thread_index ();
+            }
         }
+    }
 
-      /* Add to translated packets worker lookup */
-      key0.port = udp0->dst_port;
-      kv0.key = key0.as_u64;
-      kv0.value = next_worker_index;
-      clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
+  /* try static mappings with port */
+  if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
+    {
+      m_key.addr = ip0->dst_address;
+      m_key.port = clib_net_to_host_u16 (port);
+      m_key.protocol = proto;
+      m_key.fib_index = rx_fib_index0;
+      kv.key = m_key.as_u64;
+      if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+        {
+          m = pool_elt_at_index (sm->static_mappings, value.value);
+          return m->worker_index;
+        }
     }
-  else
-    next_worker_index = value0.value;
 
-  return next_worker_index;
+  /* worker by outside port */
+  return (u32) ((clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread);
 }
 
 static clib_error_t *
@@ -2110,6 +2138,7 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
   u32 static_mapping_memory_size = 64<<20;
   u8 static_mapping_only = 0;
   u8 static_mapping_connection_tracking = 0;
+  snat_main_per_thread_data_t *tsm;
 
   sm->deterministic = 0;
 
@@ -2149,6 +2178,8 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
   /* for show commands, etc. */
   sm->translation_buckets = translation_buckets;
   sm->translation_memory_size = translation_memory_size;
+  /* do not exceed load factor 10 */
+  sm->max_translations = 10 * translation_buckets;
   sm->user_buckets = user_buckets;
   sm->user_memory_size = user_memory_size;
   sm->max_translations_per_user = max_translations_per_user;
@@ -2184,20 +2215,17 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
           sm->icmp_match_in2out_cb = icmp_match_in2out_slow;
           sm->icmp_match_out2in_cb = icmp_match_out2in_slow;
 
-          clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets,
-                                user_memory_size);
-
-          clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets,
-                                user_memory_size);
-
-          clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets,
-                                translation_memory_size);
+          vec_foreach (tsm, sm->per_thread_data)
+            {
+              clib_bihash_init_8_8 (&tsm->in2out, "in2out", translation_buckets,
+                                    translation_memory_size);
 
-          clib_bihash_init_8_8 (&sm->out2in, "out2in", translation_buckets,
-                                translation_memory_size);
+              clib_bihash_init_8_8 (&tsm->out2in, "out2in", translation_buckets,
+                                    translation_memory_size);
 
-          clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets,
-                                user_memory_size);
+              clib_bihash_init_8_8 (&tsm->user_hash, "users", user_buckets,
+                                    user_memory_size);
+            }
 
           clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed",
                                  translation_buckets, translation_memory_size);
@@ -2575,18 +2603,10 @@ show_snat_command_fn (vlib_main_t * vm,
 
           if (verbose > 0)
             {
-              vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->in2out,
-                               verbose - 1);
-              vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in,
-                               verbose - 1);
               vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->in2out_ed,
                                verbose - 1);
               vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed,
                                verbose - 1);
-              vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in,
-                               verbose - 1);
-              vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out,
-                               verbose - 1);
               vec_foreach_index (j, sm->per_thread_data)
                 {
                   tsm = vec_elt_at_index (sm->per_thread_data, j);
@@ -2597,6 +2617,10 @@ show_snat_command_fn (vlib_main_t * vm,
                   vlib_worker_thread_t *w = vlib_worker_threads + j;
                   vlib_cli_output (vm, "Thread %d (%s at lcore %u):", j, w->name,
                                    w->lcore_id);
+                  vlib_cli_output (vm, "  %U", format_bihash_8_8, &tsm->in2out,
+                                   verbose - 1);
+                  vlib_cli_output (vm, "  %U", format_bihash_8_8, &tsm->out2in,
+                                   verbose - 1);
                   vlib_cli_output (vm, "  %d list pool elements",
                                    pool_elts (tsm->list_pool));
 
@@ -3084,7 +3108,7 @@ snat_det_close_session_out_fn (vlib_main_t *vm,
   snat_main_t *sm = &snat_main;
   unformat_input_t _line_input, *line_input = &_line_input;
   ip4_address_t out_addr, ext_addr, in_addr;
-  u16 out_port, ext_port;
+  u32 out_port, ext_port;
   snat_det_map_t * dm;
   snat_det_session_t * ses;
   snat_det_out_key_t key;
@@ -3115,10 +3139,10 @@ snat_det_close_session_out_fn (vlib_main_t *vm,
     vlib_cli_output (vm, "no match");
   else
     {
-      snat_det_reverse(dm, &ext_addr, out_port, &in_addr);
+      snat_det_reverse(dm, &ext_addr, (u16)out_port, &in_addr);
       key.ext_host_addr = out_addr;
-      key.ext_host_port = ntohs(ext_port);
-      key.out_port = ntohs(out_port);
+      key.ext_host_port = ntohs((u16)ext_port);
+      key.out_port = ntohs((u16)out_port);
       ses = snat_det_get_ses_by_out(dm, &out_addr, key.as_u64);
       if (!ses)
         vlib_cli_output (vm, "no match");
@@ -3155,7 +3179,7 @@ snat_det_close_session_in_fn (vlib_main_t *vm,
   snat_main_t *sm = &snat_main;
   unformat_input_t _line_input, *line_input = &_line_input;
   ip4_address_t in_addr, ext_addr;
-  u16 in_port, ext_port;
+  u32 in_port, ext_port;
   snat_det_map_t * dm;
   snat_det_session_t * ses;
   snat_det_out_key_t key;
@@ -3187,8 +3211,8 @@ snat_det_close_session_in_fn (vlib_main_t *vm,
   else
     {
       key.ext_host_addr = ext_addr;
-      key.ext_host_port = ntohs (ext_port);
-      ses = snat_det_find_ses_by_in (dm, &in_addr, ntohs(in_port), key);
+      key.ext_host_port = ntohs ((u16)ext_port);
+      ses = snat_det_find_ses_by_in (dm, &in_addr, ntohs((u16)in_port), key);
       if (!ses)
         vlib_cli_output (vm, "no match");
       else