nat: TCP state tracking based on RFC 7857/RFC 6146
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_in2out.c
index 73dacce..99db601 100644 (file)
@@ -49,6 +49,7 @@ typedef struct
   u8 is_slow_path;
   u8 translation_via_i2of;
   u8 lookup_skipped;
+  u8 tcp_state;
 } nat_in2out_ed_trace_t;
 
 static u8 *
@@ -78,7 +79,7 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args)
     {
       if (t->lookup_skipped)
        {
-         s = format (s, "\n lookup skipped - cached session index used");
+         s = format (s, "\n  lookup skipped - cached session index used");
        }
       else
        {
@@ -86,6 +87,11 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args)
                      &t->search_key);
        }
     }
+  if (IP_PROTOCOL_TCP == t->i2of.match.proto)
+    {
+      s = format (s, "\n  TCP state: %U", format_nat44_ed_tcp_state,
+                 t->tcp_state);
+    }
 
   return s;
 }
@@ -201,50 +207,129 @@ nat_ed_alloc_addr_and_port_with_snat_address (
 }
 
 static int
-nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
+nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
+                           u32 tx_sw_if_index, u32 nat_proto,
                            u32 thread_index, ip4_address_t s_addr,
-                           u32 snat_thread_index, snat_session_t *s,
-                           ip4_address_t *outside_addr, u16 *outside_port)
+                           ip4_address_t d_addr, u32 snat_thread_index,
+                           snat_session_t *s, ip4_address_t *outside_addr,
+                           u16 *outside_port)
 {
-  int i;
-  snat_address_t *a, *ga = 0;
-
   if (vec_len (sm->addresses) > 0)
     {
       u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
+      snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
+      int i;
 
-      for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
+      // output feature
+      if (tx_sw_if_index != ~0)
        {
-         a = sm->addresses + i;
-         if (a->fib_index == rx_fib_index)
+         for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
            {
-             return nat_ed_alloc_addr_and_port_with_snat_address (
-               sm, nat_proto, thread_index, a, sm->port_per_thread,
-               snat_thread_index, s, outside_addr, outside_port);
+             a = sm->addresses + i;
+             if (a->fib_index == rx_fib_index)
+               {
+                 if (a->sw_if_index == tx_sw_if_index)
+                   {
+                     if ((a->addr_len != ~0) &&
+                         (a->net.as_u32 ==
+                          (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+
+                       {
+                         return nat_ed_alloc_addr_and_port_with_snat_address (
+                           sm, nat_proto, thread_index, a,
+                           sm->port_per_thread, snat_thread_index, s,
+                           outside_addr, outside_port);
+                       }
+                     ra = a;
+                   }
+                 ja = a;
+               }
+             else if (a->fib_index == ~0)
+               {
+                 ba = a;
+               }
            }
-         else if (a->fib_index == ~0)
+         for (i = 0; i < s_addr_offset; ++i)
            {
-             ga = a;
+             a = sm->addresses + i;
+             if (a->fib_index == rx_fib_index)
+               {
+                 if (a->sw_if_index == tx_sw_if_index)
+                   {
+                     if ((a->addr_len != ~0) &&
+                         (a->net.as_u32 ==
+                          (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+
+                       {
+                         return nat_ed_alloc_addr_and_port_with_snat_address (
+                           sm, nat_proto, thread_index, a,
+                           sm->port_per_thread, snat_thread_index, s,
+                           outside_addr, outside_port);
+                       }
+                     ra = a;
+                   }
+                 ja = a;
+               }
+             else if (a->fib_index == ~0)
+               {
+                 ba = a;
+               }
            }
-       }
-
-      for (i = 0; i < s_addr_offset; ++i)
-       {
-         a = sm->addresses + i;
-         if (a->fib_index == rx_fib_index)
+         if (ra)
            {
              return nat_ed_alloc_addr_and_port_with_snat_address (
-               sm, nat_proto, thread_index, a, sm->port_per_thread,
+               sm, nat_proto, thread_index, ra, sm->port_per_thread,
                snat_thread_index, s, outside_addr, outside_port);
            }
-         else if (a->fib_index == ~0)
+       }
+      else
+       {
+         // first try nat pool addresses to sw interface addreses mappings
+         for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
            {
-             ga = a;
+             a = sm->addresses + i;
+             if (a->fib_index == rx_fib_index)
+               {
+                 if ((a->addr_len != ~0) &&
+                     (a->net.as_u32 ==
+                      (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+                   {
+                     return nat_ed_alloc_addr_and_port_with_snat_address (
+                       sm, nat_proto, thread_index, a, sm->port_per_thread,
+                       snat_thread_index, s, outside_addr, outside_port);
+                   }
+                 ja = a;
+               }
+             else if (a->fib_index == ~0)
+               {
+                 ba = a;
+               }
+           }
+         for (i = 0; i < s_addr_offset; ++i)
+           {
+             a = sm->addresses + i;
+             if (a->fib_index == rx_fib_index)
+               {
+                 if ((a->addr_len != ~0) &&
+                     (a->net.as_u32 ==
+                      (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+                   {
+                     return nat_ed_alloc_addr_and_port_with_snat_address (
+                       sm, nat_proto, thread_index, a, sm->port_per_thread,
+                       snat_thread_index, s, outside_addr, outside_port);
+                   }
+                 ja = a;
+               }
+             else if (a->fib_index == ~0)
+               {
+                 ba = a;
+               }
            }
        }
 
-      if (ga)
+      if (ja || ba)
        {
+         a = ja ? ja : ba;
          return nat_ed_alloc_addr_and_port_with_snat_address (
            sm, nat_proto, thread_index, a, sm->port_per_thread,
            snat_thread_index, s, outside_addr, outside_port);
@@ -266,7 +351,6 @@ nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
     .fp_addr = {.ip4.as_u32 = addr.as_u32,}
     ,
   };
-  // TODO: multiple vrfs none can resolve addr
   vec_foreach (outside_fib, sm->outside_fibs)
     {
       fei = fib_table_lookup (outside_fib->fib_index, &pfx);
@@ -307,7 +391,7 @@ nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
 static u32
 slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
              ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
-             u16 r_port, u8 proto, u32 rx_fib_index,
+             u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
              snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
              u32 thread_index, f64 now)
 {
@@ -415,9 +499,9 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
        }
       nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
 
-      if (nat_ed_alloc_addr_and_port (sm, rx_fib_index, proto, thread_index,
-                                     l_addr, tsm->snat_thread_index, s,
-                                     &outside_addr, &outside_port))
+      if (nat_ed_alloc_addr_and_port (
+           sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
+           r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
        {
          nat_elog_notice (sm, "addresses exhausted");
          b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
@@ -597,7 +681,9 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
        {
          if (ip->protocol == IP_PROTOCOL_TCP)
            {
-             nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
+             nat44_set_tcp_session_state_i2o (
+               sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
+               thread_index);
            }
          /* Accounting */
          nat44_session_update_counters (s, now,
@@ -619,7 +705,7 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
                                       ip4_header_t *ip, u16 src_port,
                                       u16 dst_port, u32 thread_index,
                                       u32 rx_sw_if_index, u32 tx_sw_if_index,
-                                      f64 now, int is_multi_worker)
+                                      int is_multi_worker)
 {
   clib_bihash_kv_16_8_t kv, value;
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
@@ -637,12 +723,6 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
       s =
        pool_elt_at_index (tsm->sessions,
                           ed_value_get_session_index (&value));
-      if (nat44_is_ses_closed (s)
-         && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
-       {
-         nat44_ed_free_session_data (sm, s, thread_index, 0);
-         nat_ed_session_delete (sm, s, thread_index, 1);
-       }
       return 1;
     }
 
@@ -682,11 +762,11 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
 
       /* hairpinning */
       pool_foreach (i, sm->output_feature_interfaces)
-       {
-        if ((nat44_ed_is_interface_inside (i)) &&
-            (rx_sw_if_index == i->sw_if_index))
-          return 0;
-      }
+       {
+         if ((nat44_ed_is_interface_inside (i)) &&
+             (rx_sw_if_index == i->sw_if_index))
+           return 0;
+       }
       return 1;
     }
 
@@ -696,9 +776,10 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
 static inline u32
 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
                          icmp46_header_t *icmp, u32 sw_if_index,
-                         u32 rx_fib_index, vlib_node_runtime_t *node,
-                         u32 next, f64 now, u32 thread_index,
-                         snat_session_t **s_p, int is_multi_worker)
+                         u32 tx_sw_if_index, u32 rx_fib_index,
+                         vlib_node_runtime_t *node, u32 next, f64 now,
+                         u32 thread_index, snat_session_t **s_p,
+                         int is_multi_worker)
 {
   vlib_main_t *vm = vlib_get_main ();
   u16 checksum;
@@ -717,11 +798,11 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
       return NAT_NEXT_DROP;
     }
 
-  if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
+  if (tx_sw_if_index != ~0)
     {
       if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
            sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
-           vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
+           tx_sw_if_index, is_multi_worker)))
        {
          return next;
        }
@@ -742,9 +823,10 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
       return NAT_NEXT_DROP;
     }
 
-  next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
-                      lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
-                      &s, node, next, thread_index, vlib_time_now (vm));
+  next =
+    slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
+                 lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
+                 node, next, thread_index, vlib_time_now (vm));
 
   if (NAT_NEXT_DROP == next)
     goto out;
@@ -1093,22 +1175,6 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
          goto trace0;
        }
 
-      if (s0->tcp_closed_timestamp)
-       {
-         if (now >= s0->tcp_closed_timestamp)
-           {
-             // session is closed, go slow path, freed in slow path
-             next[0] = def_slow;
-           }
-         else
-           {
-             // session in transitory timeout, drop
-             b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
-             next[0] = NAT_NEXT_DROP;
-           }
-         goto trace0;
-       }
-
       // drop if session expired
       u64 sess_timeout_time;
       sess_timeout_time =
@@ -1159,7 +1225,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
        case IP_PROTOCOL_TCP:
          vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
                                         thread_index, cntr_sw_if_index0, 1);
-         nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
+         nat44_set_tcp_session_state_i2o (
+           sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+           thread_index);
          break;
        case IP_PROTOCOL_UDP:
          vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
@@ -1202,6 +1270,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
              clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
              clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
              t->translation_via_i2of = (&s0->i2o == f);
+             t->tcp_state = s0->tcp_state;
            }
          else
            {
@@ -1316,8 +1385,9 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
       if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
        {
          next[0] = icmp_in2out_ed_slow_path (
-           sm, b0, ip0, icmp0, rx_sw_if_index0, rx_fib_index0, node, next[0],
-           now, thread_index, &s0, is_multi_worker);
+           sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
+           rx_fib_index0, node, next[0], now, thread_index, &s0,
+           is_multi_worker);
          if (NAT_NEXT_DROP != next[0] && s0 &&
              NAT_ED_TRNSL_ERR_SUCCESS !=
                (translation_error = nat_6t_flow_buf_translate_i2o (
@@ -1345,13 +1415,6 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
          s0 =
            pool_elt_at_index (tsm->sessions,
                               ed_value_get_session_index (&value0));
-
-         if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
-           {
-             nat44_ed_free_session_data (sm, s0, thread_index, 0);
-             nat_ed_session_delete (sm, s0, thread_index, 1);
-             s0 = NULL;
-           }
        }
 
       if (!s0)
@@ -1361,7 +1424,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
              if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
                    sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
                    vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
-                   rx_sw_if_index0, tx_sw_if_index0, now, is_multi_worker)))
+                   rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
                goto trace0;
 
              /*
@@ -1383,11 +1446,12 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
                goto trace0;
            }
 
-         next[0] = slow_path_ed (
-           vm, sm, b0, ip0->src_address, ip0->dst_address,
-           vnet_buffer (b0)->ip.reass.l4_src_port,
-           vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
-           rx_fib_index0, &s0, node, next[0], thread_index, now);
+         next[0] =
+           slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
+                         vnet_buffer (b0)->ip.reass.l4_src_port,
+                         vnet_buffer (b0)->ip.reass.l4_dst_port,
+                         ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
+                         node, next[0], thread_index, now);
 
          if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
            goto trace0;
@@ -1414,7 +1478,9 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
        {
          vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
                                         thread_index, cntr_sw_if_index0, 1);
-         nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
+         nat44_set_tcp_session_state_i2o (
+           sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+           thread_index);
        }
       else
        {
@@ -1447,6 +1513,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
              clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
              clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
              t->translation_via_i2of = 1;
+             t->tcp_state = s0->tcp_state;
            }
 
          else