nat: tweak rfc7857 tcp connection tracking 92/35692/7
authorOle Troan <ot@cisco.com>
Thu, 17 Mar 2022 10:58:38 +0000 (11:58 +0100)
committerOle Tr�an <otroan@employees.org>
Thu, 21 Apr 2022 10:35:58 +0000 (10:35 +0000)
The RFC7857 state machine introduced in 56c492a is a trade-off.
It tries to retain sessions as much as possible and also offers
some protection against spurious RST by re-establishing sessions if data
is received after the RST. From experience in the wild, this algorithm is
a little too liberal, as it leaves too many spurious established sessions
in the session table.

E.g. a oberserved pattern is:
client      server
         <- FIN, ACK
ACK      ->
ACK      ->
RST, ACK ->

With the current state machine this would leave the session in established state.

These proposed changes do:
 - require 3-way handshake to establish session.
   (current requires only to see SYNs from both sides)
 - RST will move session to transitory without recovery if data is sent after
 - Only a single FIN is needed to move to transitory

Fixes: 56c492aa0502751de2dd9d890096a82c5f04776d
Type: fix
Signed-off-by: Ole Troan <ot@cisco.com>
Change-Id: I92e593e00b2efe48d04997642d85bd59e0eaa2ea
Signed-off-by: Ole Troan <ot@cisco.com>
src/plugins/nat/nat44-ed/nat44_ed.c
src/plugins/nat/nat44-ed/nat44_ed.h
src/plugins/nat/nat44-ed/nat44_ed_format.c
src/plugins/nat/nat44-ed/nat44_ed_inlines.h
test/test_nat44_ed.py

index f2d2c71..9c79753 100644 (file)
@@ -2456,8 +2456,6 @@ nat44_plugin_enable (nat44_config_t c)
 
   nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
 
-  nat44_ed_init_tcp_state_stable (sm);
-
   nat_affinity_enable ();
 
   nat_reset_timeouts (&sm->timeouts);
index 05503a4..5b5b2ec 100644 (file)
@@ -123,14 +123,10 @@ typedef enum
 
 typedef enum
 {
-  NAT44_ED_TCP_FLAG_NONE = 0,
-  NAT44_ED_TCP_FLAG_FIN,
+  NAT44_ED_TCP_FLAG_FIN = 0,
   NAT44_ED_TCP_FLAG_SYN,
-  NAT44_ED_TCP_FLAG_SYNFIN,
   NAT44_ED_TCP_FLAG_RST,
-  NAT44_ED_TCP_FLAG_FINRST,
-  NAT44_ED_TCP_FLAG_SYNRST,
-  NAT44_ED_TCP_FLAG_SYNFINRST,
+  NAT44_ED_TCP_FLAG_ACK,
   NAT44_ED_TCP_N_FLAG,
 } nat44_ed_tcp_flag_e;
 
@@ -145,15 +141,8 @@ typedef enum
 typedef enum
 {
   NAT44_ED_TCP_STATE_CLOSED = 0,
-  NAT44_ED_TCP_STATE_SYN_I2O,
-  NAT44_ED_TCP_STATE_SYN_O2I,
   NAT44_ED_TCP_STATE_ESTABLISHED,
-  NAT44_ED_TCP_STATE_FIN_I2O,
-  NAT44_ED_TCP_STATE_FIN_O2I,
-  NAT44_ED_TCP_STATE_RST_TRANS,
-  NAT44_ED_TCP_STATE_FIN_TRANS,
-  NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O,
-  NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I,
+  NAT44_ED_TCP_STATE_CLOSING,
   NAT44_ED_TCP_N_STATE,
 } nat44_ed_tcp_state_e;
 
@@ -336,6 +325,7 @@ typedef CLIB_PACKED(struct
   u16 ext_host_nat_port;
 
   /* TCP session state */
+  u8 tcp_flags[NAT44_ED_N_DIR];
   nat44_ed_tcp_state_e tcp_state;
 
   /* per vrf sessions index */
index f643d1c..ee3e925 100644 (file)
@@ -303,34 +303,11 @@ format_nat44_ed_tcp_state (u8 *s, va_list *args)
     case NAT44_ED_TCP_STATE_CLOSED:
       s = format (s, "closed");
       break;
-    case NAT44_ED_TCP_STATE_SYN_I2O:
-      s = format (s, "SYN seen in in2out direction");
-      break;
-    case NAT44_ED_TCP_STATE_SYN_O2I:
-      s = format (s, "SYN seen in out2in direction");
-      break;
     case NAT44_ED_TCP_STATE_ESTABLISHED:
-      s = format (s, "SYN seen in both directions/established");
-      break;
-    case NAT44_ED_TCP_STATE_FIN_I2O:
-      s = format (s, "FIN seen in in2out direction");
-      break;
-    case NAT44_ED_TCP_STATE_FIN_O2I:
-      s = format (s, "FIN seen in out2in direction");
-      break;
-    case NAT44_ED_TCP_STATE_RST_TRANS:
-      s = format (s, "RST seen/transitory timeout");
-      break;
-    case NAT44_ED_TCP_STATE_FIN_TRANS:
-      s = format (s, "FIN seen in both directions/transitory timeout");
-      break;
-    case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I:
-      s = format (s, "FIN seen in both directions/transitory timeout/session "
-                    "reopening in out2in direction");
+      s = format (s, "established");
       break;
-    case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O:
-      s = format (s, "FIN seen in both directions/transitory timeout/session "
-                    "reopening in in2out direction");
+    case NAT44_ED_TCP_STATE_CLOSING:
+      s = format (s, "closing");
       break;
     case NAT44_ED_TCP_N_STATE:
       s = format (s, "BUG! unexpected N_STATE! BUG!");
index f50ecf7..b99d152 100644 (file)
@@ -176,21 +176,7 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
 always_inline int
 nat44_ed_tcp_is_established (nat44_ed_tcp_state_e state)
 {
-  static int lookup[] = {
-    [NAT44_ED_TCP_STATE_CLOSED] = 0,
-    [NAT44_ED_TCP_STATE_SYN_I2O] = 0,
-    [NAT44_ED_TCP_STATE_SYN_O2I] = 0,
-    [NAT44_ED_TCP_STATE_ESTABLISHED] = 1,
-    [NAT44_ED_TCP_STATE_FIN_I2O] = 1,
-    [NAT44_ED_TCP_STATE_FIN_O2I] = 1,
-    [NAT44_ED_TCP_STATE_RST_TRANS] = 0,
-    [NAT44_ED_TCP_STATE_FIN_TRANS] = 0,
-    [NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] = 0,
-    [NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] = 0,
-    [NAT44_ED_TCP_N_STATE] = 0,
-  };
-  ASSERT (state <= ARRAY_LEN (lookup));
-  return lookup[state];
+  return state == NAT44_ED_TCP_STATE_ESTABLISHED ? 1 : 0;
 }
 
 always_inline u32
@@ -745,251 +731,74 @@ nat44_ed_session_reopen (u32 thread_index, snat_session_t *s)
   s->total_bytes = 0;
 }
 
-always_inline void
-nat44_ed_init_tcp_state_stable (snat_main_t *sm)
-{
-  /* first make sure whole table is initialised in a way where state
-   * is not changed, then define special cases */
-  nat44_ed_tcp_state_e s;
-  for (s = 0; s < NAT44_ED_TCP_N_STATE; ++s)
-    {
-      int i;
-      for (i = 0; i < NAT44_ED_N_DIR; ++i)
-       {
-         int j = 0;
-         for (j = 0; j < NAT44_ED_TCP_N_FLAG; ++j)
-           {
-             sm->tcp_state_change_table[s][i][j] = s;
-           }
-       }
-    }
-
-  /* CLOSED and any kind of SYN -> HALF-OPEN */
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_SYN_I2O;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_SYN_O2I;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_SYN_I2O;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_SYN_O2I;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_SYN_I2O;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_SYN_O2I;
-
-  /* HALF-OPEN and any kind of SYN in right direction -> ESTABLISHED */
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-
-  /* ESTABLISHED and any kind of RST -> RST_TRANS */
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_RST] =
-    NAT44_ED_TCP_STATE_RST_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_RST] =
-    NAT44_ED_TCP_STATE_RST_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNRST] =
-    NAT44_ED_TCP_STATE_RST_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNRST] =
-    NAT44_ED_TCP_STATE_RST_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_FINRST] =
-    NAT44_ED_TCP_STATE_RST_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_FINRST] =
-    NAT44_ED_TCP_STATE_RST_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_RST_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_RST_TRANS;
-
-  /* ESTABLISHED and any kind of FIN without RST -> HALF-CLOSED */
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_FIN] =
-    NAT44_ED_TCP_STATE_FIN_I2O;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_FIN] =
-    NAT44_ED_TCP_STATE_FIN_O2I;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_FIN_I2O;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_FIN_O2I;
-
-  /* HALF-CLOSED and any kind of FIN -> FIN_TRANS */
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_FIN] =
-    NAT44_ED_TCP_STATE_FIN_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_FIN] =
-    NAT44_ED_TCP_STATE_FIN_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_FIN_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_FIN_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_FINRST] =
-    NAT44_ED_TCP_STATE_FIN_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_FINRST] =
-    NAT44_ED_TCP_STATE_FIN_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_FIN_TRANS;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_FIN_TRANS;
-
-  /* RST_TRANS and anything non-RST -> ESTABLISHED */
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_NONE] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_NONE] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_FIN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_FIN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-
-  /* FIN_TRANS and any kind of SYN -> HALF-REOPEN */
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNRST] =
-    NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNRST] =
-    NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I]
-                           [NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I;
-
-  /* HALF-REOPEN and any kind of SYN in right direction -> ESTABLISHED */
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O]
-                           [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I]
-                           [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O]
-                           [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNRST] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I]
-                           [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNRST] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O]
-                           [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I]
-                           [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNFIN] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O]
-                           [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-  sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I]
-                           [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNFINRST] =
-    NAT44_ED_TCP_STATE_ESTABLISHED;
-}
-
-/* TCP state tracking according to RFC 7857 (and RFC 6146, which is referenced
- * by RFC 7857). Our implementation also goes beyond by supporting creation of
- * a new session while old session is in transitory timeout after seeing FIN
- * packets from both sides. */
+/*
+ * "Some rise by SYN, and some by virtue FIN" - William Shakespeare
+ * TCP state tracking patterned after RFC 7857 (and RFC 6146, which is
+ * referenced by RFC 7857). In contrast to the state machine in RFC7857 we only
+ * transition to ESTABLISHED state after seeing a full 3-way handshake (SYNs
+ * and ACKs in both directions). RFC7857 as a means of protecting against
+ * spurious RSTs closing a session, goes back to ESTABLISHED if a data packet
+ * is received after the RST. This state machine will leave the state in
+ * transitory if RST is seen. Our implementation also goes beyond by supporting
+ * creation of a new session while old session is in transitory timeout after
+ * seeing FIN packets from both sides.
+ */
 always_inline void
 nat44_set_tcp_session_state (snat_main_t *sm, f64 now, snat_session_t *ses,
                             u8 tcp_flags, u32 thread_index,
                             nat44_ed_dir_e dir)
 {
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
-  nat44_ed_tcp_flag_e flags =
-    tcp_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN | TCP_FLAG_RST);
+  u8 old_flags = ses->tcp_flags[dir];
+  ses->tcp_flags[dir] |=
+    tcp_flags & (TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK);
+  if (old_flags == ses->tcp_flags[dir])
+    return;
 
   u8 old_state = ses->tcp_state;
-  ses->tcp_state = sm->tcp_state_change_table[ses->tcp_state][dir][flags];
 
-  if (old_state != ses->tcp_state)
+  switch (old_state)
     {
-      if (nat44_ed_tcp_is_established (ses->tcp_state))
+    case NAT44_ED_TCP_STATE_CLOSED:
+      // ESTABLISHED when a SYN and ACK is seen from both sides
+      if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
+          ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
        {
-         if (NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O == old_state ||
-             NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I == old_state)
-           {
-             nat44_ed_session_reopen (thread_index, ses);
-           }
+         ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
          ses->lru_head_index = tsm->tcp_estab_lru_head_index;
        }
-      else
+      break;
+    case NAT44_ED_TCP_STATE_ESTABLISHED:
+      // CLOSING when a FIN is seen from either side or session has been RST
+      if ((ses->tcp_flags[dir] & TCP_FLAG_FIN) ||
+         (ses->tcp_flags[dir] & TCP_FLAG_RST))
        {
-         if (NAT44_ED_TCP_STATE_ESTABLISHED == old_state)
-           { // need to update last heard otherwise session might get
-             // immediately timed out if it has been idle longer than
-             // transitory timeout
-             ses->last_heard = now;
-           }
+         ses->tcp_state = NAT44_ED_TCP_STATE_CLOSING;
+         ses->tcp_flags[NAT44_ED_DIR_I2O] = 0;
+         ses->tcp_flags[NAT44_ED_DIR_O2I] = 0;
+         // need to update last heard otherwise session might get
+         // immediately timed out if it has been idle longer than
+         // transitory timeout
+         ses->last_heard = now;
          ses->lru_head_index = tsm->tcp_trans_lru_head_index;
        }
-      ses->last_lru_update = now;
-      clib_dlist_remove (tsm->lru_pool, ses->lru_index);
-      clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
+      break;
+    case NAT44_ED_TCP_STATE_CLOSING:
+      // Allow a transitory session to reopen
+      if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
+          ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
+       {
+         nat44_ed_session_reopen (thread_index, ses);
+         ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
+         ses->lru_head_index = tsm->tcp_estab_lru_head_index;
+       }
+      break;
     }
+  if (old_state == ses->tcp_state)
+    return;
+  ses->last_lru_update = now;
+  clib_dlist_remove (tsm->lru_pool, ses->lru_index);
+  clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
 }
 
 always_inline void
@@ -1012,13 +821,8 @@ always_inline void
 nat44_session_update_counters (snat_session_t *s, f64 now, uword bytes,
                               u32 thread_index)
 {
-  if (NAT44_ED_TCP_STATE_RST_TRANS != s->tcp_state &&
-      NAT44_ED_TCP_STATE_FIN_TRANS != s->tcp_state &&
-      NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O != s->tcp_state &&
-      NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I != s->tcp_state)
-    {
-      s->last_heard = now;
-    }
+  // regardless of TCP state, reset the timer if data packet is seen.
+  s->last_heard = now;
   s->total_pkts++;
   s->total_bytes += bytes;
 }
index f172dab..21eebb2 100644 (file)
@@ -2609,8 +2609,8 @@ class TestNAT44EDMW(TestNAT44ED):
                               self.tcp_external_port)
 
         # Wait at least the transitory time, the session is in established
-        # state anyway. RST followed by a data packet should keep it
-        # established.
+        # state anyway. RST followed by a data packet should move it to
+        # transitory state.
         self.virtual_sleep(6)
         p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) /
              IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) /
@@ -2624,15 +2624,6 @@ class TestNAT44EDMW(TestNAT44ED):
                  flags="P"))
         self.send_and_expect(self.pg0, p, self.pg1)
 
-        # State is established, session should be still open after 6 seconds
-        self.virtual_sleep(6)
-
-        p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) /
-             IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) /
-             TCP(sport=self.tcp_port_in, dport=self.tcp_external_port,
-                 flags="R"))
-        self.send_and_expect(self.pg0, p, self.pg1)
-
         # State is transitory, session should be closed after 6 seconds
         self.virtual_sleep(6)
 
@@ -3135,9 +3126,16 @@ class TestNAT44EDMW(TestNAT44ED):
         # SYN out2in
         p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) /
              IP(src=self.pg1.remote_ip4, dst=self.nat_addr) /
-             TCP(sport=self.tcp_external_port, dport=self.tcp_port_out))
+             TCP(sport=self.tcp_external_port, dport=self.tcp_port_out,
+                 flags='SA'))
         self.send_and_expect(self.pg1, p, self.pg0)
 
+        p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
+             IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) /
+             TCP(sport=self.tcp_port_in, dport=self.tcp_external_port,
+                 flags="A"))
+        self.send_and_expect(self.pg0, p, self.pg1)
+
         # FIN in2out
         p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
              IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) /
@@ -3152,17 +3150,8 @@ class TestNAT44EDMW(TestNAT44ED):
                  flags="F"))
         self.send_and_expect(self.pg1, p, self.pg0)
 
-        # SYN in2out
-        p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
-             IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) /
-             TCP(sport=self.tcp_port_in, dport=self.tcp_external_port))
-        self.send_and_expect(self.pg0, p, self.pg1)
-
-        # SYN out2in
-        p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) /
-             IP(src=self.pg1.remote_ip4, dst=self.nat_addr) /
-             TCP(sport=self.tcp_external_port, dport=self.tcp_port_out))
-        self.send_and_expect(self.pg1, p, self.pg0)
+        self.init_tcp_session(self.pg0, self.pg1, self.tcp_port_in,
+                              self.tcp_external_port)
 
         # 2 records should be produced - first one del & add
         capture = self.pg3.get_capture(2)
@@ -3746,9 +3735,16 @@ class TestNAT44EDMW(TestNAT44ED):
         p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) /
              IP(src=self.pg0.remote_ip4, dst=self.pg1.remote_ip4) /
              TCP(sport=in_port, dport=ext_port,
-                 flags="S", seq=101, ack=301))
+                 flags="SA", seq=101, ack=301))
         self.send_and_expect(self.pg0, p, self.pg1)
 
+        # send ACK packet out -> in
+        p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) /
+             IP(src=self.pg1.remote_ip4, dst=self.nat_addr) /
+             TCP(sport=ext_port, dport=out_port,
+                 flags="A", seq=300, ack=101))
+        self.send_and_expect(self.pg1, p, self.pg0)
+
         self.virtual_sleep(3)
         # send ACK packet in -> out - should be forwarded and session alive
         p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) /