nat: tweak rfc7857 tcp connection tracking
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_inlines.h
index cb41896..b99d152 100644 (file)
@@ -12,6 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /**
  * @brief The NAT inline functions
  */
@@ -24,6 +25,7 @@
 #include <vnet/fib/ip4_fib.h>
 
 #include <nat/lib/log.h>
+#include <nat/lib/ipfix_logging.h>
 #include <nat/nat44-ed/nat44_ed.h>
 
 always_inline void
@@ -123,13 +125,13 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
                                    u16 *lookup_dport, u8 *lookup_protocol)
 {
   icmp46_header_t *icmp0;
-  icmp_echo_header_t *echo0, *inner_echo0 = 0;
+  nat_icmp_echo_header_t *echo0, *inner_echo0 = 0;
   ip4_header_t *inner_ip0 = 0;
   void *l4_header = 0;
   icmp46_header_t *inner_icmp0;
 
   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
-  echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+  echo0 = (nat_icmp_echo_header_t *) (icmp0 + 1);
 
   // avoid warning about unused variables in caller by setting to bogus values
   *lookup_sport = 0;
@@ -155,14 +157,14 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
        {
        case IP_PROTOCOL_ICMP:
          inner_icmp0 = (icmp46_header_t *) l4_header;
-         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+         inner_echo0 = (nat_icmp_echo_header_t *) (inner_icmp0 + 1);
          *lookup_sport = inner_echo0->identifier;
          *lookup_dport = inner_echo0->identifier;
          break;
        case IP_PROTOCOL_UDP:
        case IP_PROTOCOL_TCP:
-         *lookup_sport = ((tcp_udp_header_t *) l4_header)->dst_port;
-         *lookup_dport = ((tcp_udp_header_t *) l4_header)->src_port;
+         *lookup_sport = ((nat_tcp_udp_header_t *) l4_header)->dst_port;
+         *lookup_dport = ((nat_tcp_udp_header_t *) l4_header)->src_port;
          break;
        default:
          return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
@@ -171,6 +173,12 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
   return 0;
 }
 
+always_inline int
+nat44_ed_tcp_is_established (nat44_ed_tcp_state_e state)
+{
+  return state == NAT44_ED_TCP_STATE_ESTABLISHED ? 1 : 0;
+}
+
 always_inline u32
 nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s)
 {
@@ -184,10 +192,10 @@ nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s)
       return sm->timeouts.udp;
     case IP_PROTOCOL_TCP:
       {
-       if (s->state)
-         return sm->timeouts.tcp.transitory;
-       else
+       if (nat44_ed_tcp_is_established (s->tcp_state))
          return sm->timeouts.tcp.established;
+       else
+         return sm->timeouts.tcp.transitory;
       }
     default:
       return sm->timeouts.udp;
@@ -340,8 +348,7 @@ nat_lru_free_one_with_head (snat_main_t *sm, int thread_index, f64 now,
 
       sess_timeout_time =
        s->last_heard + (f64) nat44_session_get_timeout (sm, s);
-      if (now >= sess_timeout_time ||
-         (s->tcp_closed_timestamp && now >= s->tcp_closed_timestamp))
+      if (now >= sess_timeout_time)
        {
          nat44_ed_free_session_data (sm, s, thread_index, 0);
          nat_ed_session_delete (sm, s, thread_index, 0);
@@ -701,100 +708,120 @@ is_interface_addr (snat_main_t *sm, vlib_node_runtime_t *node,
 }
 
 always_inline void
-nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses,
-                                vlib_buffer_t *b, u32 thread_index)
+nat44_ed_session_reopen (u32 thread_index, snat_session_t *s)
+{
+  nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr,
+                        s->in2out.port, &s->ext_host_nat_addr,
+                        s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
+                        &s->ext_host_addr, s->ext_host_port, s->proto,
+                        nat44_ed_is_twice_nat_session (s));
+
+  nat_ipfix_logging_nat44_ses_delete (
+    thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+    s->in2out.port, s->out2in.port, s->in2out.fib_index);
+  nat_ipfix_logging_nat44_ses_create (
+    thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+    s->in2out.port, s->out2in.port, s->in2out.fib_index);
+
+  nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
+                        s->in2out.port, &s->ext_host_nat_addr,
+                        s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
+                        &s->ext_host_addr, s->ext_host_port, s->proto, 0);
+  s->total_pkts = 0;
+  s->total_bytes = 0;
+}
+
+/*
+ * "Some rise by SYN, and some by virtue FIN" - William Shakespeare
+ * TCP state tracking patterned after RFC 7857 (and RFC 6146, which is
+ * referenced by RFC 7857). In contrast to the state machine in RFC7857 we only
+ * transition to ESTABLISHED state after seeing a full 3-way handshake (SYNs
+ * and ACKs in both directions). RFC7857 as a means of protecting against
+ * spurious RSTs closing a session, goes back to ESTABLISHED if a data packet
+ * is received after the RST. This state machine will leave the state in
+ * transitory if RST is seen. Our implementation also goes beyond by supporting
+ * creation of a new session while old session is in transitory timeout after
+ * seeing FIN packets from both sides.
+ */
+always_inline void
+nat44_set_tcp_session_state (snat_main_t *sm, f64 now, snat_session_t *ses,
+                            u8 tcp_flags, u32 thread_index,
+                            nat44_ed_dir_e dir)
 {
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
-  u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
-  u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number;
-  u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number;
-  if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
-    ses->state = NAT44_SES_RST;
-  if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
-    ses->state = 0;
-  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
-      (ses->state & NAT44_SES_O2I_SYN))
-    ses->state = 0;
-  if (tcp_flags & TCP_FLAG_SYN)
-    ses->state |= NAT44_SES_I2O_SYN;
-  if (tcp_flags & TCP_FLAG_FIN)
-    {
-      ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
-      ses->state |= NAT44_SES_I2O_FIN;
-    }
-  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN))
+  u8 old_flags = ses->tcp_flags[dir];
+  ses->tcp_flags[dir] |=
+    tcp_flags & (TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK);
+  if (old_flags == ses->tcp_flags[dir])
+    return;
+
+  u8 old_state = ses->tcp_state;
+
+  switch (old_state)
     {
-      if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq)
+    case NAT44_ED_TCP_STATE_CLOSED:
+      // ESTABLISHED when a SYN and ACK is seen from both sides
+      if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
+          ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
        {
-         ses->state |= NAT44_SES_O2I_FIN_ACK;
-         if (nat44_is_ses_closed (ses))
-           { // if session is now closed, save the timestamp
-             ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
-             ses->last_lru_update = now;
-           }
+         ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
+         ses->lru_head_index = tsm->tcp_estab_lru_head_index;
        }
+      break;
+    case NAT44_ED_TCP_STATE_ESTABLISHED:
+      // CLOSING when a FIN is seen from either side or session has been RST
+      if ((ses->tcp_flags[dir] & TCP_FLAG_FIN) ||
+         (ses->tcp_flags[dir] & TCP_FLAG_RST))
+       {
+         ses->tcp_state = NAT44_ED_TCP_STATE_CLOSING;
+         ses->tcp_flags[NAT44_ED_DIR_I2O] = 0;
+         ses->tcp_flags[NAT44_ED_DIR_O2I] = 0;
+         // need to update last heard otherwise session might get
+         // immediately timed out if it has been idle longer than
+         // transitory timeout
+         ses->last_heard = now;
+         ses->lru_head_index = tsm->tcp_trans_lru_head_index;
+       }
+      break;
+    case NAT44_ED_TCP_STATE_CLOSING:
+      // Allow a transitory session to reopen
+      if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
+          ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
+       {
+         nat44_ed_session_reopen (thread_index, ses);
+         ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
+         ses->lru_head_index = tsm->tcp_estab_lru_head_index;
+       }
+      break;
     }
-
-  // move the session to proper LRU
-  if (ses->state)
-    {
-      ses->lru_head_index = tsm->tcp_trans_lru_head_index;
-    }
-  else
-    {
-      ses->lru_head_index = tsm->tcp_estab_lru_head_index;
-    }
+  if (old_state == ses->tcp_state)
+    return;
+  ses->last_lru_update = now;
   clib_dlist_remove (tsm->lru_pool, ses->lru_index);
   clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
 }
 
+always_inline void
+nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses,
+                                u8 tcp_flags, u32 thread_index)
+{
+  return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
+                                     NAT44_ED_DIR_I2O);
+}
+
 always_inline void
 nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses,
-                                u8 tcp_flags, u32 tcp_ack_number,
-                                u32 tcp_seq_number, u32 thread_index)
+                                u8 tcp_flags, u32 thread_index)
 {
-  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
-  if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
-    ses->state = NAT44_SES_RST;
-  if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
-    ses->state = 0;
-  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
-      (ses->state & NAT44_SES_O2I_SYN))
-    ses->state = 0;
-  if (tcp_flags & TCP_FLAG_SYN)
-    ses->state |= NAT44_SES_O2I_SYN;
-  if (tcp_flags & TCP_FLAG_FIN)
-    {
-      ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
-      ses->state |= NAT44_SES_O2I_FIN;
-    }
-  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN))
-    {
-      if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq)
-       ses->state |= NAT44_SES_I2O_FIN_ACK;
-      if (nat44_is_ses_closed (ses))
-       { // if session is now closed, save the timestamp
-         ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
-         ses->last_lru_update = now;
-       }
-    }
-  // move the session to proper LRU
-  if (ses->state)
-    {
-      ses->lru_head_index = tsm->tcp_trans_lru_head_index;
-    }
-  else
-    {
-      ses->lru_head_index = tsm->tcp_estab_lru_head_index;
-    }
-  clib_dlist_remove (tsm->lru_pool, ses->lru_index);
-  clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
+  return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
+                                     NAT44_ED_DIR_O2I);
 }
 
 always_inline void
 nat44_session_update_counters (snat_session_t *s, f64 now, uword bytes,
                               u32 thread_index)
 {
+  // regardless of TCP state, reset the timer if data packet is seen.
   s->last_heard = now;
   s->total_pkts++;
   s->total_bytes += bytes;