nat: sessions get expired when fib table removed 78/28178/7
authorFilip Varga <fivarga@cisco.com>
Tue, 4 Aug 2020 16:06:06 +0000 (18:06 +0200)
committerOle Trøan <otroan@employees.org>
Mon, 17 Aug 2020 07:53:18 +0000 (07:53 +0000)
fib table removal would leave lingering sessions in vpp
this patch is aimed at solving this issue by grouping
sessions by source and destionation fib. if one of the
fibs gets removed this grouping is tagged as expired
and session won't be passed to non existing fib table

Ticket: VPPSUPP-93
Type: improvement

Change-Id: I45b1205a8b58d91f174e6feb862554ec2f6cffad
Signed-off-by: Filip Varga <fivarga@cisco.com>
src/plugins/nat/in2out_ed.c
src/plugins/nat/nat.c
src/plugins/nat/nat.h
src/plugins/nat/nat44/ed_inlines.h
src/plugins/nat/nat44_cli.c
src/plugins/nat/out2in_ed.c
src/plugins/nat/test/test_nat.py

index 0c65a50..a1f5e5b 100644 (file)
@@ -488,6 +488,8 @@ slow_path_ed (snat_main_t * sm,
               &s->ext_host_nat_addr, s->ext_host_nat_port,
               s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0);
 
+  per_vrf_sessions_register_session (s, thread_index);
+
   return next;
 }
 
@@ -886,6 +888,8 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm,
                  s - tsm->sessions);
       if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
        nat_elog_notice ("out2in key add failed");
+
+      per_vrf_sessions_register_session (s, thread_index);
     }
 
   /* Update IP checksum */
@@ -1024,11 +1028,20 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
        pool_elt_at_index (tsm->sessions,
                           ed_value_get_session_index (&value0));
 
+      if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
+       {
+         // session is closed, go slow path
+         nat_free_session_data (sm, s0, thread_index, 0);
+         nat_ed_session_delete (sm, s0, thread_index, 1);
+         next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
+         goto trace0;
+       }
+
       if (s0->tcp_closed_timestamp)
        {
          if (now >= s0->tcp_closed_timestamp)
            {
-             // session is closed, go slow path
+             // session is closed, go slow path, freed in slow path
              next[0] = def_slow;
            }
          else
index ea1add6..66a5243 100644 (file)
@@ -189,6 +189,11 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
   snat_main_per_thread_data_t *tsm =
     vec_elt_at_index (sm->per_thread_data, thread_index);
 
+  if (is_ed_session (s))
+    {
+      per_vrf_sessions_unregister_session (s, thread_index);
+    }
+
   if (is_fwd_bypass_session (s))
     {
       if (snat_is_unk_proto_session (s))
@@ -1814,6 +1819,65 @@ nat_validate_counters (snat_main_t * sm, u32 sw_if_index)
   vlib_zero_simple_counter (&sm->counters.hairpinning, sw_if_index);
 }
 
+void
+expire_per_vrf_sessions (u32 fib_index)
+{
+  per_vrf_sessions_t *per_vrf_sessions;
+  snat_main_per_thread_data_t *tsm;
+  snat_main_t *sm = &snat_main;
+
+  /* *INDENT-OFF* */
+  vec_foreach (tsm, sm->per_thread_data)
+    {
+      vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
+        {
+          if ((per_vrf_sessions->rx_fib_index == fib_index) ||
+              (per_vrf_sessions->tx_fib_index == fib_index))
+            {
+              per_vrf_sessions->expired = 1;
+            }
+        }
+    }
+  /* *INDENT-ON* */
+}
+
+void
+update_per_vrf_sessions_vec (u32 fib_index, int is_del)
+{
+  snat_main_t *sm = &snat_main;
+  nat_fib_t *fib;
+
+  // we don't care if it is outside/inside fib
+  // we just care about their ref_count
+  // if it reaches 0 sessions should expire
+  // because the fib isn't valid for NAT anymore
+
+  vec_foreach (fib, sm->fibs)
+  {
+    if (fib->fib_index == fib_index)
+      {
+       if (is_del)
+         {
+           fib->ref_count--;
+           if (!fib->ref_count)
+             {
+               vec_del1 (sm->fibs, fib - sm->fibs);
+               expire_per_vrf_sessions (fib_index);
+             }
+           return;
+         }
+       else
+         fib->ref_count++;
+      }
+  }
+  if (!is_del)
+    {
+      vec_add2 (sm->fibs, fib, 1);
+      fib->ref_count = 1;
+      fib->fib_index = fib_index;
+    }
+}
+
 int
 snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
 {
@@ -1861,6 +1925,9 @@ snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
     sm->fq_out2in_index =
       vlib_frame_queue_main_init (sm->out2in_node_index, NAT_FQ_NELTS);
 
+  if (sm->endpoint_dependent)
+    update_per_vrf_sessions_vec (fib_index, is_del);
+
   if (!is_inside)
     {
       /* *INDENT-OFF* */
@@ -1887,6 +1954,7 @@ snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
          outside_fib->fib_index = fib_index;
        }
     }
+
 feature_set:
   /* *INDENT-OFF* */
   pool_foreach (i, sm->interfaces,
@@ -2073,7 +2141,6 @@ snat_interface_add_del_output_feature (u32 sw_if_index,
   u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
                                                       sw_if_index);
 
-
   if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
     return VNET_API_ERROR_UNSUPPORTED;
 
@@ -2085,6 +2152,9 @@ snat_interface_add_del_output_feature (u32 sw_if_index,
   }));
   /* *INDENT-ON* */
 
+  if (sm->endpoint_dependent)
+    update_per_vrf_sessions_vec (fib_index, is_del);
+
   if (!is_inside)
     {
       /* *INDENT-OFF* */
@@ -2439,6 +2509,29 @@ test_key_calc_split ()
   ASSERT (fib_index == fib_index2);
 }
 
+static clib_error_t *
+nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
+{
+  snat_main_t *sm = &snat_main;
+  u32 fib_index;
+
+  if (sm->endpoint_dependent)
+    {
+      // TODO: consider removing all NAT interfaces
+
+      if (!is_add)
+       {
+         fib_index = ip4_fib_index_from_table_id (table_id);
+         if (fib_index != ~0)
+           expire_per_vrf_sessions (fib_index);
+       }
+    }
+  return 0;
+}
+
+VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
+
+
 static clib_error_t *
 snat_init (vlib_main_t * vm)
 {
@@ -3853,6 +3946,7 @@ nat44_db_init (snat_main_per_thread_data_t * tsm)
                             sm->translation_memory_size);
       clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed,
                                          format_ed_session_kvp);
+
     }
   else
     {
@@ -3884,6 +3978,7 @@ nat44_db_free (snat_main_per_thread_data_t * tsm)
   if (sm->endpoint_dependent)
     {
       clib_bihash_free_16_8 (&tsm->in2out_ed);
+      vec_free (tsm->per_vrf_sessions_vec);
     }
   else
     {
index ddcf4c9..8bec46a 100644 (file)
@@ -198,6 +198,20 @@ typedef enum
 #define NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT 4
 #define NAT_STATIC_MAPPING_FLAG_LB           8
 
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+  // number of sessions in this vrf
+  u32 ses_count;
+
+  u32 rx_fib_index;
+  u32 tx_fib_index;
+
+  // is this vrf expired
+  u8 expired;
+}) per_vrf_sessions_t;
+/* *INDENT-ON* */
+
 /* *INDENT-OFF* */
 typedef CLIB_PACKED(struct
 {
@@ -258,10 +272,13 @@ typedef CLIB_PACKED(struct
 
   /* user index */
   u32 user_index;
+
+  /* per vrf sessions index */
+  u32 per_vrf_sessions_index;
+
 }) snat_session_t;
 /* *INDENT-ON* */
 
-
 typedef struct
 {
   ip4_address_t addr;
@@ -285,6 +302,12 @@ typedef struct
 /* *INDENT-ON* */
 } snat_address_t;
 
+typedef struct
+{
+  u32 fib_index;
+  u32 ref_count;
+} nat_fib_t;
+
 typedef struct
 {
   u32 fib_index;
@@ -414,6 +437,8 @@ typedef struct
   /* real thread index */
   u32 thread_index;
 
+  per_vrf_sessions_t *per_vrf_sessions_vec;
+
 } snat_main_per_thread_data_t;
 
 struct snat_main_s;
@@ -501,6 +526,9 @@ typedef struct snat_main_s
   u16 start_port;
   u16 end_port;
 
+  /* vector of fibs */
+  nat_fib_t *fibs;
+
   /* vector of outside fibs */
   nat_outside_fib_t *outside_fibs;
 
@@ -1350,6 +1378,8 @@ int snat_alloc_outside_address_and_port (snat_address_t * addresses,
                                         u16 port_per_thread,
                                         u32 snat_thread_index);
 
+void expire_per_vrf_sessions (u32 fib_index);
+
 /**
  * @brief Match NAT44 static mapping.
  *
index 37212f3..1b4df4d 100644 (file)
@@ -141,4 +141,115 @@ nat_ed_session_alloc (snat_main_t * sm, u32 thread_index, f64 now, u8 proto)
   return s;
 }
 
+// slow path
+static_always_inline void
+per_vrf_sessions_cleanup (u32 thread_index)
+{
+  snat_main_t *sm = &snat_main;
+  snat_main_per_thread_data_t *tsm =
+    vec_elt_at_index (sm->per_thread_data, thread_index);
+  per_vrf_sessions_t *per_vrf_sessions;
+  u32 *to_free = 0, *i;
+
+  vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
+  {
+    if (per_vrf_sessions->expired)
+      {
+       if (per_vrf_sessions->ses_count == 0)
+         {
+           vec_add1 (to_free, per_vrf_sessions - tsm->per_vrf_sessions_vec);
+         }
+      }
+  }
+
+  if (vec_len (to_free))
+    {
+      vec_foreach (i, to_free)
+      {
+       vec_del1 (tsm->per_vrf_sessions_vec, *i);
+      }
+    }
+
+  vec_free (to_free);
+}
+
+// slow path
+static_always_inline void
+per_vrf_sessions_register_session (snat_session_t * s, u32 thread_index)
+{
+  snat_main_t *sm = &snat_main;
+  snat_main_per_thread_data_t *tsm =
+    vec_elt_at_index (sm->per_thread_data, thread_index);
+  per_vrf_sessions_t *per_vrf_sessions;
+
+  per_vrf_sessions_cleanup (thread_index);
+
+  // s->per_vrf_sessions_index == ~0 ... reuse of old session
+
+  vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
+  {
+    // ignore already expired registrations
+    if (per_vrf_sessions->expired)
+      continue;
+
+    if ((s->in2out.fib_index == per_vrf_sessions->rx_fib_index) &&
+       (s->out2in.fib_index == per_vrf_sessions->tx_fib_index))
+      {
+       goto done;
+      }
+    if ((s->in2out.fib_index == per_vrf_sessions->tx_fib_index) &&
+       (s->out2in.fib_index == per_vrf_sessions->rx_fib_index))
+      {
+       goto done;
+      }
+  }
+
+  // create a new registration
+  vec_add2 (tsm->per_vrf_sessions_vec, per_vrf_sessions, 1);
+  clib_memset (per_vrf_sessions, 0, sizeof (*per_vrf_sessions));
+
+  per_vrf_sessions->rx_fib_index = s->in2out.fib_index;
+  per_vrf_sessions->tx_fib_index = s->out2in.fib_index;
+
+done:
+  s->per_vrf_sessions_index = per_vrf_sessions - tsm->per_vrf_sessions_vec;
+  per_vrf_sessions->ses_count++;
+}
+
+// fast path
+static_always_inline void
+per_vrf_sessions_unregister_session (snat_session_t * s, u32 thread_index)
+{
+  snat_main_t *sm = &snat_main;
+  snat_main_per_thread_data_t *tsm;
+  per_vrf_sessions_t *per_vrf_sessions;
+
+  ASSERT (s->per_vrf_sessions_index != ~0);
+  
+  tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
+  per_vrf_sessions = vec_elt_at_index (tsm->per_vrf_sessions_vec,
+                                       s->per_vrf_sessions_index);
+
+  ASSERT (per_vrf_sessions->ses_count != 0);
+
+  per_vrf_sessions->ses_count--;
+  s->per_vrf_sessions_index = ~0;
+}
+
+// fast path
+static_always_inline u8
+per_vrf_sessions_is_expired (snat_session_t * s, u32 thread_index)
+{
+  snat_main_t *sm = &snat_main;
+  snat_main_per_thread_data_t *tsm;
+  per_vrf_sessions_t *per_vrf_sessions;
+
+  ASSERT (s->per_vrf_sessions_index != ~0);
+
+  tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
+  per_vrf_sessions = vec_elt_at_index (tsm->per_vrf_sessions_vec,
+                                       s->per_vrf_sessions_index);
+  return per_vrf_sessions->expired;
+}
+
 #endif
index 64529af..ad2e9b7 100644 (file)
@@ -1842,8 +1842,79 @@ nat_show_timeouts_command_fn (vlib_main_t * vm,
   return 0;
 }
 
+static clib_error_t *
+nat44_debug_fib_expire_command_fn (vlib_main_t * vm,
+                                  unformat_input_t * input,
+                                  vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  clib_error_t *error = 0;
+  u32 fib = ~0;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%u", &fib))
+       ;
+      else
+       {
+         error = clib_error_return (0, "unknown input '%U'",
+                                    format_unformat_error, line_input);
+         goto done;
+       }
+    }
+  expire_per_vrf_sessions (fib);
+done:
+  unformat_free (line_input);
+  return error;
+}
+
+static clib_error_t *
+nat44_debug_fib_registration_command_fn (vlib_main_t * vm,
+                                        unformat_input_t * input,
+                                        vlib_cli_command_t * cmd)
+{
+  snat_main_t *sm = &snat_main;
+  snat_main_per_thread_data_t *tsm;
+  per_vrf_sessions_t *per_vrf_sessions;
+
+  vlib_cli_output (vm, "VRF registration debug:");
+  vec_foreach (tsm, sm->per_thread_data)
+  {
+    vlib_cli_output (vm, "thread %u:", tsm->thread_index);
+    vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
+    {
+      vlib_cli_output (vm, "rx fib %u tx fib %u ses count %u %s",
+                      per_vrf_sessions->rx_fib_index,
+                      per_vrf_sessions->tx_fib_index,
+                      per_vrf_sessions->ses_count,
+                      per_vrf_sessions->expired ? "expired" : "");
+    }
+  }
+  return 0;
+}
+
 /* *INDENT-OFF* */
 
+/*?
+?*/
+VLIB_CLI_COMMAND (nat44_debug_fib_expire_command, static) = {
+  .path = "debug nat44 fib expire",
+  .short_help = "debug nat44 fib expire <fib-index>",
+  .function = nat44_debug_fib_expire_command_fn,
+};
+
+/*?
+?*/
+VLIB_CLI_COMMAND (nat44_debug_fib_registration_command, static) = {
+  .path = "debug nat44 fib registration",
+  .short_help = "debug nat44 fib registration",
+  .function = nat44_debug_fib_registration_command_fn,
+};
+
 /*?
  * @cliexpar
  * @cliexstart{set snat workers}
index 5690636..9868fe7 100644 (file)
@@ -310,6 +310,8 @@ create_session_for_static_mapping_ed (snat_main_t * sm,
               &s->ext_host_nat_addr, s->ext_host_nat_port,
               s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0);
 
+  per_vrf_sessions_register_session (s, thread_index);
+
   return s;
 }
 
@@ -407,6 +409,8 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip,
       kv.value = s - tsm->sessions;
       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
        nat_elog_notice ("in2out_ed key add failed");
+
+      per_vrf_sessions_register_session (s, thread_index);
     }
 
   if (ip->protocol == IP_PROTOCOL_TCP)
@@ -651,6 +655,8 @@ nat44_ed_out2in_unknown_proto (snat_main_t * sm,
                  ip->protocol, thread_index, s - tsm->sessions);
       if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
        nat_elog_notice ("in2out key add failed");
+
+      per_vrf_sessions_register_session (s, thread_index);
     }
 
   /* Update IP checksum */
@@ -780,8 +786,10 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
            }
        }
 
+      // lookup for session
       if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0))
        {
+         // session does not exist go slow path
          next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
          goto trace0;
        }
@@ -791,11 +799,21 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
                           ed_value_get_session_index (&value0));
 
     skip_lookup:
+
+      if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
+       {
+         // session is closed, go slow path
+         nat_free_session_data (sm, s0, thread_index, 0);
+         nat_ed_session_delete (sm, s0, thread_index, 1);
+         next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
+         goto trace0;
+       }
+
       if (s0->tcp_closed_timestamp)
        {
          if (now >= s0->tcp_closed_timestamp)
            {
-             // session is closed, go slow path
+             // session is closed, go slow path, freed in slow path
              next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
            }
          else
@@ -819,7 +837,6 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
          next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
          goto trace0;
        }
-      //
 
       old_addr0 = ip0->dst_address.as_u32;
       new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
index 09bf8a2..6b673b0 100644 (file)
@@ -6703,16 +6703,16 @@ class TestNAT44EndpointDependent(MethodHolder):
             is_add=1)
         self.vapi.nat44_interface_add_del_feature(
             sw_if_index=self.pg0.sw_if_index,
-            flags=flags, is_add=1)
+            is_add=1, flags=flags)
         self.vapi.nat44_interface_add_del_output_feature(
-            is_add=1,
-            sw_if_index=self.pg1.sw_if_index)
+            sw_if_index=self.pg1.sw_if_index,
+            is_add=1)
         self.vapi.nat44_interface_add_del_feature(
             sw_if_index=self.pg5.sw_if_index,
             is_add=1)
         self.vapi.nat44_interface_add_del_feature(
             sw_if_index=self.pg5.sw_if_index,
-            flags=flags, is_add=1)
+            is_add=1, flags=flags)
         self.vapi.nat44_interface_add_del_feature(
             sw_if_index=self.pg6.sw_if_index,
             is_add=1)
@@ -7220,6 +7220,7 @@ class TestNAT44EndpointDependent(MethodHolder):
             self.vapi.cli("clear logging")
 
     def show_commands_at_teardown(self):
+        self.logger.info(self.vapi.cli("show errors"))
         self.logger.info(self.vapi.cli("show nat44 addresses"))
         self.logger.info(self.vapi.cli("show nat44 interfaces"))
         self.logger.info(self.vapi.cli("show nat44 static mappings"))
@@ -7227,6 +7228,7 @@ class TestNAT44EndpointDependent(MethodHolder):
         self.logger.info(self.vapi.cli("show nat44 sessions detail"))
         self.logger.info(self.vapi.cli("show nat44 hash tables detail"))
         self.logger.info(self.vapi.cli("show nat timeouts"))
+        self.logger.info(self.vapi.cli("debug nat44 fib registration"))
 
 
 class TestNAT44EndpointDependent3(MethodHolder):