nat: ED: global session LRU list
[vpp.git] / src / plugins / nat / in2out_ed.c
index ca737d5..807a716 100644 (file)
@@ -107,6 +107,16 @@ nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
       if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &ed_kv, 0))
        nat_elog_warn ("out2in_ed key del failed");
 
+      ed_bihash_kv_t bihash_key;
+      clib_memset (&bihash_key, 0, sizeof (bihash_key));
+      bihash_key.k.dst_address = s->ext_host_addr.as_u32;
+      bihash_key.k.dst_port = s->ext_host_port;
+      bihash_key.k.src_address = s->out2in.addr.as_u32;
+      bihash_key.k.src_port = s->out2in.port;
+      bihash_key.k.protocol = s->out2in.protocol;
+      clib_bihash_add_del_16_8 (&sm->ed_ext_ports, &bihash_key.kv,
+                               0 /* is_add */ );
+
       if (snat_is_unk_proto_session (s))
        goto delete;
 
@@ -182,6 +192,96 @@ icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
   return next0;
 }
 
+static_always_inline u16
+snat_random_port (u16 min, u16 max)
+{
+  snat_main_t *sm = &snat_main;
+  return min + random_u32 (&sm->random_seed) /
+    (random_u32_max () / (max - min + 1) + 1);
+}
+
+static int
+nat_alloc_addr_and_port_ed (snat_address_t * addresses, u32 fib_index,
+                           u32 thread_index, nat_ed_ses_key_t * key,
+                           snat_session_key_t * key1, u16 port_per_thread,
+                           u32 snat_thread_index)
+{
+  int i;
+  snat_address_t *a, *ga = 0;
+  u32 portnum;
+
+  const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
+  ed_bihash_kv_t bihash_key;
+  clib_memset (&bihash_key, 0, sizeof (bihash_key));
+  bihash_key.k.dst_address = key->r_addr.as_u32;
+  bihash_key.k.dst_port = key->r_port;
+  bihash_key.k.protocol = key1->protocol;
+
+  for (i = 0; i < vec_len (addresses); i++)
+    {
+      a = addresses + i;
+      switch (key1->protocol)
+       {
+#define _(N, j, n, s)                                                     \
+  case SNAT_PROTOCOL_##N:                                                 \
+    if (a->fib_index == fib_index)                                        \
+      {                                                                   \
+        bihash_key.k.src_address = a->addr.as_u32;                        \
+        u16 port = snat_random_port (1, port_per_thread);                 \
+        u16 attempts = port_per_thread;                                   \
+        while (attempts > 0)                                              \
+          {                                                               \
+            --attempts;                                                   \
+            portnum = port_thread_offset + port;                          \
+            bihash_key.k.src_port = clib_host_to_net_u16 (portnum);       \
+            int rv = clib_bihash_add_del_16_8 (                           \
+                &snat_main.ed_ext_ports, &bihash_key.kv, 2 /* is_add */); \
+            if (0 == rv)                                                  \
+              {                                                           \
+                ++a->busy_##n##_port_refcounts[portnum];                  \
+                a->busy_##n##_ports_per_thread[thread_index]++;           \
+                a->busy_##n##_ports++;                                    \
+                key1->addr = a->addr;                                     \
+                key1->port = clib_host_to_net_u16 (portnum);              \
+                return 0;                                                 \
+              }                                                           \
+            port = (port + 1) % port_per_thread;                          \
+          }                                                               \
+      }                                                                   \
+    else if (a->fib_index == ~0)                                          \
+      {                                                                   \
+        ga = a;                                                           \
+      }                                                                   \
+    break;
+
+         foreach_snat_protocol;
+       default:
+         nat_elog_info ("unknown protocol");
+         return 1;
+       }
+    }
+
+  if (ga)
+    {
+      /* fake fib_index to reuse macro */
+      fib_index = ~0;
+      a = ga;
+      switch (key1->protocol)
+       {
+         foreach_snat_protocol;
+       default:
+         nat_elog_info ("unknown protocol");
+         return 1;
+       }
+    }
+
+#undef _
+
+  /* Totally out of translations to use... */
+  snat_ipfix_logging_addresses_exhausted (thread_index, 0);
+  return 1;
+}
+
 static u32
 slow_path_ed (snat_main_t * sm,
              vlib_buffer_t * b,
@@ -234,16 +334,16 @@ slow_path_ed (snat_main_t * sm,
       (sm, key0, &key1, 0, 0, 0, &lb, 0, &identity_nat))
     {
       /* Try to create dynamic translation */
-      if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index,
-                                              thread_index, &key1,
-                                              sm->port_per_thread,
-                                              tsm->snat_thread_index))
+      if (nat_alloc_addr_and_port_ed (sm->addresses, rx_fib_index,
+                                     thread_index, key, &key1,
+                                     sm->port_per_thread,
+                                     tsm->snat_thread_index))
        {
          if (cleared || !nat44_out_of_ports_cleanup (thread_index, now) ||
-             snat_alloc_outside_address_and_port (sm->addresses,
-                                                  rx_fib_index, thread_index,
-                                                  &key1, sm->port_per_thread,
-                                                  tsm->snat_thread_index))
+             nat_alloc_addr_and_port_ed (sm->addresses, rx_fib_index,
+                                         thread_index, key, &key1,
+                                         sm->port_per_thread,
+                                         tsm->snat_thread_index))
            {
              nat_elog_notice ("addresses exhausted");
              b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
@@ -282,6 +382,7 @@ slow_path_ed (snat_main_t * sm,
       if (!is_sm)
        snat_free_outside_address_and_port (sm->addresses,
                                            thread_index, &key1);
+      b->error = node->errors[NAT_IN2OUT_ED_ERROR_CANNOT_CREATE_USER];
       return NAT_NEXT_DROP;
     }
 
@@ -293,6 +394,7 @@ slow_path_ed (snat_main_t * sm,
       if (!is_sm)
        snat_free_outside_address_and_port (sm->addresses,
                                            thread_index, &key1);
+      b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
       return NAT_NEXT_DROP;
     }
 
@@ -453,7 +555,8 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
        {
          if (ip->protocol == IP_PROTOCOL_TCP)
            {
-             if (nat44_set_tcp_session_state_i2o (sm, s, b, thread_index))
+             if (nat44_set_tcp_session_state_i2o
+                 (sm, now, s, b, thread_index))
                return 1;
            }
          /* Accounting */
@@ -712,6 +815,7 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm,
                                  thread_index);
       if (!u)
        {
+         b->error = node->errors[NAT_IN2OUT_ED_ERROR_CANNOT_CREATE_USER];
          nat_elog_warn ("create NAT user failed");
          return 0;
        }
@@ -780,6 +884,7 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm,
       s = nat_ed_session_alloc (sm, u, thread_index, now);
       if (!s)
        {
+         b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_USER_SESS_EXCEEDED];
          nat44_delete_user_with_no_session (sm, u, thread_index);
          nat_elog_warn ("create NAT session failed");
          return 0;
@@ -949,20 +1054,34 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
            }
          s0 = pool_elt_at_index (tsm->sessions, value0.value);
 
+         if (s0->tcp_close_timestamp)
+           {
+             if (now >= s0->tcp_close_timestamp)
+               {
+                 // session is closed, go slow path
+                 next0 = def_slow;
+               }
+             else
+               {
+                 // session in transitory timeout, drop
+                 b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
+                 next0 = NAT_NEXT_DROP;
+               }
+             goto trace0;
+           }
+
          // drop if session expired
          u64 sess_timeout_time;
          sess_timeout_time = s0->last_heard +
            (f64) nat44_session_get_timeout (sm, s0);
          if (now >= sess_timeout_time)
            {
-             // delete session
              nat_free_session_data (sm, s0, thread_index, 0);
              nat44_delete_session (sm, s0, thread_index);
-
-             next0 = NAT_NEXT_DROP;
+             // session is closed, go slow path
+             next0 = def_slow;
              goto trace0;
            }
-         //
 
          b0->flags |= VNET_BUFFER_F_IS_NATED;
 
@@ -1012,7 +1131,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
                  tcp0->checksum = ip_csum_fold (sum0);
                }
              tcp_packets++;
-             if (nat44_set_tcp_session_state_i2o (sm, s0, b0, thread_index))
+             if (nat44_set_tcp_session_state_i2o
+                 (sm, now, s0, b0, thread_index))
                goto trace0;
            }
          else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
@@ -1211,9 +1331,20 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
                      vnet_buffer (b0)->ip.reass.l4_src_port,
                      vnet_buffer (b0)->ip.reass.l4_dst_port);
 
-         if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
+         if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0))
            {
+             s0 = pool_elt_at_index (tsm->sessions, value0.value);
+
+             if (s0->tcp_close_timestamp && now >= s0->tcp_close_timestamp)
+               {
+                 nat_free_session_data (sm, s0, thread_index, 0);
+                 nat44_delete_session (sm, s0, thread_index);
+                 s0 = NULL;
+               }
+           }
 
+         if (!s0)
+           {
              if (is_output_feature)
                {
                  if (PREDICT_FALSE
@@ -1255,11 +1386,6 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
                goto trace0;
 
            }
-         else
-           {
-             s0 = pool_elt_at_index (tsm->sessions, value0.value);
-           }
-
 
          b0->flags |= VNET_BUFFER_F_IS_NATED;
 
@@ -1309,7 +1435,8 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm,
                  tcp0->checksum = ip_csum_fold (sum0);
                }
              tcp_packets++;
-             if (nat44_set_tcp_session_state_i2o (sm, s0, b0, thread_index))
+             if (nat44_set_tcp_session_state_i2o
+                 (sm, now, s0, b0, thread_index))
                goto trace0;
            }
          else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment