acl-plugin: multicore: session management fixes 70/12770/9
authorAndrew Yourtchenko <ayourtch@gmail.com>
Wed, 30 May 2018 20:29:29 +0000 (22:29 +0200)
committerFlorin Coras <florin.coras@gmail.com>
Sat, 2 Jun 2018 06:01:42 +0000 (06:01 +0000)
- implement a 1us purgatory for the session structures
  by adding a special connection list, where all connections
  about to be deleted go.

- add per-list-head timeouts updated upon the list enqueue/dequeue
  for connection idle management

- add a "unused" session list with list ID#0, which should
  never be used unless there is a logic error. Use this ID
  to initialize the sessions.

- improve the maintainability of the session linked list
  structures by using symbolic bogus index name instead of ~0

- change the ordering of session creations - first reverse, then
  local. To minimize the potential for two workers competing for
  the same session in the corner case of the two packets
  on different workers creating the same logical session

- reduce the maximum session count to keep the memory usage the same

- add extra log/debug/trace to session cleaning logic

- be more aggressive with cleaning up sessions - wind up the
  interrupts from the workers to themselves if there is more
  work to do

Change-Id: I3aa1c91a925a08e83793467cb15bda178c21e426
Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
src/plugins/acl/acl.c
src/plugins/acl/acl.h
src/plugins/acl/dataplane_node.c
src/plugins/acl/fa_node.h
src/plugins/acl/sess_mgmt_node.c
src/plugins/acl/session_inlines.h

index 65785cc..4f63a97 100644 (file)
@@ -3779,13 +3779,22 @@ acl_plugin_show_sessions (acl_main_t * am,
   u16 wk;
   vnet_interface_main_t *im = &am->vnet_main->interface_main;
   vnet_sw_interface_t *swif;
+  u64 now = clib_cpu_time_now ();
+  u64 clocks_per_second = am->vlib_main->clib_time.clocks_per_second;
 
   {
     u64 n_adds = am->fa_session_total_adds;
     u64 n_dels = am->fa_session_total_dels;
+    u64 n_deact = am->fa_session_total_deactivations;
     vlib_cli_output (vm, "Sessions total: add %lu - del %lu = %lu", n_adds,
                     n_dels, n_adds - n_dels);
+    vlib_cli_output (vm, "Sessions active: add %lu - deact %lu = %lu", n_adds,
+                    n_deact, n_adds - n_deact);
+    vlib_cli_output (vm, "Sessions being purged: deact %lu - del %lu = %lu",
+                    n_deact, n_dels, n_deact - n_dels);
   }
+  vlib_cli_output (vm, "now: %lu clocks per second: %lu", now,
+                  clocks_per_second);
   vlib_cli_output (vm, "\n\nPer-thread data:");
   for (wk = 0; wk < vec_len (am->per_worker_data); wk++)
     {
@@ -4140,6 +4149,7 @@ acl_init (vlib_main_t * vm)
   memset (am, 0, sizeof (*am));
   am->vlib_main = vm;
   am->vnet_main = vnet_get_main ();
+  am->log_default = vlib_log_register_class ("acl_plugin", 0);
 
   u8 *name = format (0, "acl_%08x%c", api_version, 0);
 
@@ -4176,30 +4186,33 @@ acl_init (vlib_main_t * vm)
   am->fa_conn_table_max_entries = ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES;
   am->reclassify_sessions = 0;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+  am->fa_min_deleted_sessions_per_interval =
+    ACL_FA_DEFAULT_MIN_DELETED_SESSIONS_PER_INTERVAL;
+  am->fa_max_deleted_sessions_per_interval =
+    ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL;
+  am->fa_cleaner_wait_time_increment =
+    ACL_FA_DEFAULT_CLEANER_WAIT_TIME_INCREMENT;
+
   vec_validate (am->per_worker_data, tm->n_vlib_mains - 1);
   {
     u16 wk;
-    u8 tt;
     for (wk = 0; wk < vec_len (am->per_worker_data); wk++)
       {
        acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk];
-       vec_validate (pw->fa_conn_list_head, ACL_N_TIMEOUTS - 1);
-       vec_validate (pw->fa_conn_list_tail, ACL_N_TIMEOUTS - 1);
-       for (tt = 0; tt < ACL_N_TIMEOUTS; tt++)
-         {
-           pw->fa_conn_list_head[tt] = ~0;
-           pw->fa_conn_list_tail[tt] = ~0;
-         }
+       vec_validate (pw->expired,
+                     ACL_N_TIMEOUTS *
+                     am->fa_max_deleted_sessions_per_interval);
+       _vec_len (pw->expired) = 0;
+       vec_validate_init_empty (pw->fa_conn_list_head, ACL_N_TIMEOUTS - 1,
+                                FA_SESSION_BOGUS_INDEX);
+       vec_validate_init_empty (pw->fa_conn_list_tail, ACL_N_TIMEOUTS - 1,
+                                FA_SESSION_BOGUS_INDEX);
+       vec_validate_init_empty (pw->fa_conn_list_head_expiry_time,
+                                ACL_N_TIMEOUTS - 1, ~0ULL);
       }
   }
 
-  am->fa_min_deleted_sessions_per_interval =
-    ACL_FA_DEFAULT_MIN_DELETED_SESSIONS_PER_INTERVAL;
-  am->fa_max_deleted_sessions_per_interval =
-    ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL;
-  am->fa_cleaner_wait_time_increment =
-    ACL_FA_DEFAULT_CLEANER_WAIT_TIME_INCREMENT;
-
   am->fa_cleaner_cnt_delete_by_sw_index = 0;
   am->fa_cleaner_cnt_delete_by_sw_index_ok = 0;
   am->fa_cleaner_cnt_unknown_event = 0;
index 51c5b0c..0c0a6db 100644 (file)
@@ -38,6 +38,8 @@
 #define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24)
 #define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 120
 
+#define SESSION_PURGATORY_TIMEOUT_USEC 10
+
 #define ACL_PLUGIN_HASH_LOOKUP_HEAP_SIZE (2 << 25)
 #define ACL_PLUGIN_HASH_LOOKUP_HASH_BUCKETS 65536
 #define ACL_PLUGIN_HASH_LOOKUP_HASH_MEMORY (2 << 25)
@@ -49,9 +51,12 @@ void input_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32
 void output_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap);
 
 enum acl_timeout_e {
-  ACL_TIMEOUT_UDP_IDLE = 0,
+  ACL_TIMEOUT_UNUSED = 0,
+  ACL_TIMEOUT_UDP_IDLE,
   ACL_TIMEOUT_TCP_IDLE,
   ACL_TIMEOUT_TCP_TRANSIENT,
+  ACL_N_USER_TIMEOUTS,
+  ACL_TIMEOUT_PURGATORY = ACL_N_USER_TIMEOUTS, /* a special-case queue for deletion-in-progress sessions */
   ACL_N_TIMEOUTS
 };
 
@@ -249,6 +254,8 @@ typedef struct {
   /* total session adds/dels */
   u64 fa_session_total_adds;
   u64 fa_session_total_dels;
+  /* how many sessions went into purgatory */
+  u64 fa_session_total_deactivations;
 
   /* L2 datapath glue */
 
@@ -325,8 +332,21 @@ typedef struct {
   /* convenience */
   vlib_main_t * vlib_main;
   vnet_main_t * vnet_main;
+  /* logging */
+  vlib_log_class_t log_default;
 } acl_main_t;
 
+#define acl_log_err(...) \
+  vlib_log(VLIB_LOG_LEVEL_ERR, acl_main.log_default, __VA_ARGS__)
+#define acl_log_warn(...) \
+  vlib_log(VLIB_LOG_LEVEL_WARNING, acl_main.log_default, __VA_ARGS__)
+#define acl_log_notice(...) \
+  vlib_log(VLIB_LOG_LEVEL_NOTICE, acl_main.log_default, __VA_ARGS__)
+#define acl_log_info(...) \
+  vlib_log(VLIB_LOG_LEVEL_INFO, acl_main.log_default, __VA_ARGS__)
+
+
+
 #define foreach_acl_eh                                          \
    _(HOPBYHOP , 0  , "IPv6ExtHdrHopByHop")                      \
    _(ROUTING  , 43 , "IPv6ExtHdrRouting")                       \
index 351cbbd..f1ed4c2 100644 (file)
@@ -153,7 +153,8 @@ acl_fa_node_fn (vlib_main_t * vm,
 
       if (acl_fa_ifc_has_sessions (am, sw_if_index0))
        {
-         if (acl_fa_find_session (am, sw_if_index0, &fa_5tuple, &value_sess))
+         if (acl_fa_find_session (am, sw_if_index0, &fa_5tuple, &value_sess)
+             && (value_sess.value != ~0ULL))
            {
              trace_bitmap |= 0x80000000;
              error0 = ACL_FA_ERROR_ACL_EXIST_SESSION;
@@ -215,10 +216,12 @@ acl_fa_node_fn (vlib_main_t * vm,
                         sw_if_index0);
                      vec_elt (pw->fa_session_epoch_change_by_sw_if_index,
                               sw_if_index0)++;
-                     if (acl_fa_conn_list_delete_session (am, f_sess_id))
+                     if (acl_fa_conn_list_delete_session
+                         (am, f_sess_id, now))
                        {
                          /* delete the session only if we were able to unlink it */
-                         acl_fa_delete_session (am, sw_if_index0, f_sess_id);
+                         acl_fa_two_stage_delete_session (am, sw_if_index0,
+                                                          f_sess_id, now);
                        }
                      acl_check_needed = 1;
                      trace_bitmap |= 0x40000000;
@@ -243,7 +246,7 @@ acl_fa_node_fn (vlib_main_t * vm,
            {
              if (!acl_fa_can_add_session (am, is_input, sw_if_index0))
                acl_fa_try_recycle_session (am, is_input, thread_index,
-                                           sw_if_index0);
+                                           sw_if_index0, now);
 
              if (acl_fa_can_add_session (am, is_input, sw_if_index0))
                {
index 8d79e42..5c55cb9 100644 (file)
@@ -18,8 +18,8 @@
 #define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK)
 
 #define ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
-#define ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE (1<<30)
-#define ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES 1000000
+#define ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE (1ULL<<30)
+#define ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES 500000
 
 typedef union {
   u64 as_u64;
@@ -80,7 +80,8 @@ typedef struct {
   u32 link_prev_idx;      /* +4 bytes = 12 */
   u32 link_next_idx;      /* +4 bytes = 16 */
   u8 link_list_id;        /* +1 bytes = 17 */
-  u8 reserved1[7];        /* +7 bytes = 24 */
+  u8 deleted;             /* +1 bytes = 18 */
+  u8 reserved1[6];        /* +6 bytes = 24 */
   u64 reserved2[5];       /* +5*8 bytes = 64 */
 } fa_session_t;
 
@@ -120,12 +121,16 @@ CT_ASSERT_EQUAL(fa_session_t_size_is_128, sizeof(fa_session_t), 128);
 CT_ASSERT_EQUAL(fa_full_session_id_size_is_64, sizeof(fa_full_session_id_t), sizeof(u64));
 #undef CT_ASSERT_EQUAL
 
+#define FA_SESSION_BOGUS_INDEX ~0
+
 typedef struct {
   /* The pool of sessions managed by this worker */
   fa_session_t *fa_sessions_pool;
   /* per-worker ACL_N_TIMEOUTS of conn lists */
   u32 *fa_conn_list_head;
   u32 *fa_conn_list_tail;
+  /* expiry time set whenever an element is enqueued */
+  u64 *fa_conn_list_head_expiry_time;
   /* adds and deletes per-worker-per-interface */
   u64 *fa_session_dels_by_sw_if_index;
   u64 *fa_session_adds_by_sw_if_index;
index 103db35..465111a 100644 (file)
@@ -38,7 +38,7 @@ fa_session_get_shortest_timeout (acl_main_t * am)
 {
   int timeout_type;
   u64 timeout = ~0LL;
-  for (timeout_type = 0; timeout_type < ACL_N_TIMEOUTS; timeout_type++)
+  for (timeout_type = 0; timeout_type <= ACL_N_USER_TIMEOUTS; timeout_type++)
     {
       if (timeout > am->session_timeout_sec[timeout_type])
        {
@@ -107,12 +107,15 @@ acl_fa_verify_init_sessions (acl_main_t * am)
 static u64
 fa_session_get_list_timeout (acl_main_t * am, fa_session_t * sess)
 {
-  u64 timeout = am->vlib_main->clib_time.clocks_per_second;
+  u64 timeout = am->vlib_main->clib_time.clocks_per_second / 1000;
   /*
    * we have the shortest possible timeout type in all the lists
    * (see README-multicore for the rationale)
    */
-  timeout *= fa_session_get_shortest_timeout (am);
+  if (sess->link_list_id == ACL_TIMEOUT_PURGATORY)
+    timeout = fa_session_get_timeout (am, sess);
+  else
+    timeout *= fa_session_get_shortest_timeout (am);
   return timeout;
 }
 
@@ -121,28 +124,15 @@ acl_fa_get_list_head_expiry_time (acl_main_t * am,
                                  acl_fa_per_worker_data_t * pw, u64 now,
                                  u16 thread_index, int timeout_type)
 {
-  fa_session_t *sess =
-    get_session_ptr (am, thread_index, pw->fa_conn_list_head[timeout_type]);
-  /*
-   * We can not check just the index here because inbetween the worker thread might
-   * dequeue the connection from the head just as we are about to check it.
-   */
-  if (!is_valid_session_ptr (am, thread_index, sess))
-    {
-      return ~0LL;             // infinity.
-    }
-  else
-    {
-      u64 timeout_time =
-       sess->link_enqueue_time + fa_session_get_list_timeout (am, sess);
-      return timeout_time;
-    }
+  return pw->fa_conn_list_head_expiry_time[timeout_type];
 }
 
 static int
 acl_fa_conn_time_to_check (acl_main_t * am, acl_fa_per_worker_data_t * pw,
                           u64 now, u16 thread_index, u32 session_index)
 {
+  if (session_index == FA_SESSION_BOGUS_INDEX)
+    return 0;
   fa_session_t *sess = get_session_ptr (am, thread_index, session_index);
   u64 timeout_time =
     sess->link_enqueue_time + fa_session_get_list_timeout (am, sess);
@@ -165,24 +155,43 @@ acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
 
   {
     u8 tt = 0;
+    int n_pending_swipes = 0;
     for (tt = 0; tt < ACL_N_TIMEOUTS; tt++)
       {
-       while ((vec_len (pw->expired) <
-               am->fa_max_deleted_sessions_per_interval)
-              && (~0 != pw->fa_conn_list_head[tt])
-              &&
-              (acl_fa_conn_time_to_check
-               (am, pw, now, thread_index, pw->fa_conn_list_head[tt])))
+       int n_expired = 0;
+       while (n_expired < am->fa_max_deleted_sessions_per_interval)
          {
            fsid.session_index = pw->fa_conn_list_head[tt];
-           elog_acl_maybe_trace_X2 (am,
-                                    "acl_fa_check_idle_sessions: expire session %d on thread %d",
-                                    "i4i4", (u32) fsid.session_index,
-                                    (u32) thread_index);
+           if (!acl_fa_conn_time_to_check
+               (am, pw, now, thread_index, pw->fa_conn_list_head[tt]))
+             {
+               break;
+             }
+           if (am->trace_sessions > 3)
+             {
+               elog_acl_maybe_trace_X3 (am,
+                                        "acl_fa_check_idle_sessions: expire session %d in list %d on thread %d",
+                                        "i4i4i4", (u32) fsid.session_index,
+                                        (u32) tt, (u32) thread_index);
+             }
            vec_add1 (pw->expired, fsid.session_index);
-           acl_fa_conn_list_delete_session (am, fsid);
+           n_expired++;
+           acl_fa_conn_list_delete_session (am, fsid, now);
          }
       }
+    for (tt = 0; tt < ACL_N_TIMEOUTS; tt++)
+      {
+       u32 session_index = pw->fa_conn_list_head[tt];
+       if (session_index == FA_SESSION_BOGUS_INDEX)
+         break;
+       fa_session_t *sess =
+         get_session_ptr (am, thread_index, session_index);
+       n_pending_swipes += sess->link_enqueue_time <= pw->swipe_end_time;
+      }
+    if (n_pending_swipes == 0)
+      {
+       pw->swipe_end_time = 0;
+      }
   }
 
   u32 *psid = NULL;
@@ -196,32 +205,60 @@ acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
        u32 sw_if_index = sess->sw_if_index;
        u64 sess_timeout_time =
          sess->last_active_time + fa_session_get_timeout (am, sess);
-       if ((now < sess_timeout_time)
-           && (0 ==
-               clib_bitmap_get (pw->pending_clear_sw_if_index_bitmap,
-                                sw_if_index)))
+       int timeout_passed = (now >= sess_timeout_time);
+       int clearing_interface =
+         clib_bitmap_get (pw->pending_clear_sw_if_index_bitmap, sw_if_index);
+       if (am->trace_sessions > 3)
          {
-#ifdef FA_NODE_VERBOSE_DEBUG
-           clib_warning
-             ("ACL_FA_NODE_CLEAN: Restarting timer for session %d, sw_if_index %d",
-              (int) fsid.session_index, sess->sw_if_index);
-#endif
+           elog_acl_maybe_trace_X4 (am,
+                                    "acl_fa_check_idle_sessions: session %d sw_if_index %d timeout_passed %d clearing_interface %d",
+                                    "i4i4i4i4", (u32) fsid.session_index,
+                                    (u32) sess->sw_if_index,
+                                    (u32) timeout_passed,
+                                    (u32) clearing_interface);
+         }
+       if (timeout_passed || clearing_interface)
+         {
+           if (acl_fa_two_stage_delete_session (am, sw_if_index, fsid, now))
+             {
+               if (am->trace_sessions > 3)
+                 {
+                   elog_acl_maybe_trace_X2 (am,
+                                            "acl_fa_check_idle_sessions: deleted session %d sw_if_index %d",
+                                            "i4i4", (u32) fsid.session_index,
+                                            (u32) sess->sw_if_index);
+                 }
+               /* the session has been put */
+               pw->cnt_deleted_sessions++;
+             }
+           else
+             {
+               /* the connection marked as deleted and put to purgatory */
+               if (am->trace_sessions > 3)
+                 {
+                   elog_acl_maybe_trace_X2 (am,
+                                            "acl_fa_check_idle_sessions: session %d sw_if_index %d marked as deleted, put to purgatory",
+                                            "i4i4", (u32) fsid.session_index,
+                                            (u32) sess->sw_if_index);
+                 }
+             }
+         }
+       else
+
+         {
+           if (am->trace_sessions > 3)
+             {
+               elog_acl_maybe_trace_X2 (am,
+                                        "acl_fa_check_idle_sessions: restart timer for session %d sw_if_index %d",
+                                        "i4i4", (u32) fsid.session_index,
+                                        (u32) sess->sw_if_index);
+             }
            /* There was activity on the session, so the idle timeout
               has not passed. Enqueue for another time period. */
 
            acl_fa_conn_list_add_session (am, fsid, now);
            pw->cnt_session_timer_restarted++;
          }
-       else
-         {
-#ifdef FA_NODE_VERBOSE_DEBUG
-           clib_warning
-             ("ACL_FA_NODE_CLEAN: Deleting session %d, sw_if_index %d",
-              (int) fsid.session_index, sess->sw_if_index);
-#endif
-           acl_fa_delete_session (am, sw_if_index, fsid);
-           pw->cnt_deleted_sessions++;
-         }
       }
     else
       {
@@ -237,6 +274,10 @@ acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
 
   if (pw->swipe_end_time && 0 == total_expired)
     pw->swipe_end_time = 0;
+
+  elog_acl_maybe_trace_X1 (am,
+                          "acl_fa_check_idle_sessions: done, total sessions expired: %d",
+                          "i4", (u32) total_expired);
   return (total_expired);
 }
 
@@ -271,6 +312,37 @@ static char *acl_fa_cleaner_error_strings[] = {
 static vlib_node_registration_t acl_fa_session_cleaner_process_node;
 static vlib_node_registration_t acl_fa_worker_session_cleaner_process_node;
 
+static void
+send_one_worker_interrupt (vlib_main_t * vm, acl_main_t * am,
+                          int thread_index)
+{
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+  if (!pw->interrupt_is_pending)
+    {
+      pw->interrupt_is_pending = 1;
+      vlib_node_set_interrupt_pending (vlib_mains[thread_index],
+                                      acl_fa_worker_session_cleaner_process_node.index);
+      elog_acl_maybe_trace_X1 (am,
+                              "send_one_worker_interrupt: send interrupt to worker %u",
+                              "i4", ((u32) thread_index));
+      /* if the interrupt was requested, mark that done. */
+      /* pw->interrupt_is_needed = 0; */
+      CLIB_MEMORY_BARRIER ();
+    }
+}
+
+static int
+purgatory_has_connections (vlib_main_t * vm, acl_main_t * am,
+                          int thread_index)
+{
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+
+  return (FA_SESSION_BOGUS_INDEX !=
+         pw->fa_conn_list_head[ACL_TIMEOUT_PURGATORY]);
+
+}
+
+
 /*
  * Per-worker thread interrupt-driven cleaner thread
  * to clean idle connections if there are no packets
@@ -333,6 +405,7 @@ acl_fa_worker_conn_cleaner_process (vlib_main_t * vm,
                                       "acl_fa_worker_conn_cleaner: now %lu, clearing done, nothing to do",
                                       "i8", now);
              pw->clear_in_process = 0;
+             pw->swipe_end_time = 0;
            }
          else
            {
@@ -358,7 +431,7 @@ acl_fa_worker_conn_cleaner_process (vlib_main_t * vm,
                           (u32) pw->clear_in_process);
   if (pw->clear_in_process)
     {
-      if (0 == num_expired)
+      if (pw->swipe_end_time == 0)
        {
          /* we were clearing but we could not process any more connections. time to stop. */
          clib_bitmap_zero (pw->pending_clear_sw_if_index_bitmap);
@@ -373,23 +446,19 @@ acl_fa_worker_conn_cleaner_process (vlib_main_t * vm,
                                   "acl_fa_worker_conn_cleaner: now %lu, more work to do - requesting interrupt",
                                   "i8", now);
          /* should continue clearing.. So could they please sent an interrupt again? */
-         pw->interrupt_is_needed = 1;
+         send_one_worker_interrupt (vm, am, thread_index);
+         // pw->interrupt_is_needed = 1;
        }
     }
   else
     {
-      if (num_expired >= am->fa_max_deleted_sessions_per_interval)
+      if (num_expired > 0)
        {
          /* there was too much work, we should get an interrupt ASAP */
-         pw->interrupt_is_needed = 1;
+         // pw->interrupt_is_needed = 1;
+         send_one_worker_interrupt (vm, am, thread_index);
          pw->interrupt_is_unwanted = 0;
        }
-      else if (num_expired <= am->fa_min_deleted_sessions_per_interval)
-       {
-         /* signal that they should trigger us less */
-         pw->interrupt_is_needed = 0;
-         pw->interrupt_is_unwanted = 1;
-       }
       else
        {
          /* the current rate of interrupts is ok */
@@ -401,26 +470,13 @@ acl_fa_worker_conn_cleaner_process (vlib_main_t * vm,
                               "i8i4i4", now, ((u32) pw->interrupt_is_needed),
                               ((u32) pw->interrupt_is_unwanted));
     }
-  pw->interrupt_generation = am->fa_interrupt_generation;
-  return 0;
-}
-
-static void
-send_one_worker_interrupt (vlib_main_t * vm, acl_main_t * am,
-                          int thread_index)
-{
-  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
-  if (!pw->interrupt_is_pending)
+  /* be persistent about quickly deleting the connections from the purgatory */
+  if (purgatory_has_connections (vm, am, thread_index))
     {
-      pw->interrupt_is_pending = 1;
-      vlib_node_set_interrupt_pending (vlib_mains[thread_index],
-                                      acl_fa_worker_session_cleaner_process_node.index);
-      elog_acl_maybe_trace_X1 (am,
-                              "send_one_worker_interrupt: send interrupt to worker %d",
-                              "i4", ((u32) thread_index));
-      /* if the interrupt was requested, mark that done. */
-      /* pw->interrupt_is_needed = 0; */
+      send_one_worker_interrupt (vm, am, thread_index);
     }
+  pw->interrupt_generation = am->fa_interrupt_generation;
+  return 0;
 }
 
 static void
@@ -482,14 +538,14 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
              if ((head_expiry < next_expire) && !pw->interrupt_is_pending)
                {
                  elog_acl_maybe_trace_X3 (am,
-                                          "acl_fa_session_cleaner_process: now %lu, worker: %d tt: %d",
+                                          "acl_fa_session_cleaner_process: now %lu, worker: %u tt: %u",
                                           "i8i2i2", now, ti, tt);
                  elog_acl_maybe_trace_X2 (am,
                                           "acl_fa_session_cleaner_process: head expiry: %lu, is earlier than curr next expire: %lu",
                                           "i8i8", head_expiry, next_expire);
                  next_expire = head_expiry;
                }
-             if (~0 != pw->fa_conn_list_head[tt])
+             if (FA_SESSION_BOGUS_INDEX != pw->fa_conn_list_head[tt])
                {
                  has_pending_conns = 1;
                }
@@ -546,7 +602,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
            {
              am->fa_cleaner_cnt_delete_by_sw_index++;
              elog_acl_maybe_trace_X1 (am,
-                                      "acl_fa_session_cleaner_process: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX %d",
+                                      "acl_fa_session_cleaner_process: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX %u",
                                       "i4", *sw_if_index0);
              if (*sw_if_index0 == ~0)
                {
@@ -564,10 +620,9 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
                    }
                }
            }
-#ifdef FA_NODE_VERBOSE_DEBUG
-           clib_warning ("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U",
-                         format_bitmap_hex, clear_sw_if_index_bitmap);
-#endif
+           acl_log_err
+             ("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U, clear_all: %u",
+              format_bitmap_hex, clear_sw_if_index_bitmap, clear_all);
            vec_foreach (pw0, am->per_worker_data)
            {
              CLIB_MEMORY_BARRIER ();
@@ -575,7 +630,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
                {
                  CLIB_MEMORY_BARRIER ();
                  elog_acl_maybe_trace_X1 (am,
-                                          "ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d",
+                                          "ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %u",
                                           "i4",
                                           (u32) (pw0 - am->per_worker_data));
                  vlib_process_suspend (vm, 0.0001);
@@ -587,7 +642,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
                }
              if (pw0->clear_in_process)
                {
-                 clib_warning
+                 acl_log_err
                    ("ERROR-BUG! Could not initiate cleaning on worker because another cleanup in progress");
                }
              else
@@ -603,6 +658,10 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
                      pw0->pending_clear_sw_if_index_bitmap =
                        clib_bitmap_dup (clear_sw_if_index_bitmap);
                    }
+                 acl_log_err
+                   ("ACL_FA_CLEANER: thread %u, pending clear bitmap: %U",
+                    (am->per_worker_data - pw0), format_bitmap_hex,
+                    pw0->pending_clear_sw_if_index_bitmap);
                  pw0->clear_in_process = 1;
                }
            }
@@ -610,11 +669,8 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
            send_interrupts_to_workers (vm, am);
 
            /* now wait till they all complete */
-#ifdef FA_NODE_VERBOSE_DEBUG
-           clib_warning ("CLEANER mains len: %d per-worker len: %d",
-                         vec_len (vlib_mains),
-                         vec_len (am->per_worker_data));
-#endif
+           acl_log_err ("CLEANER mains len: %u per-worker len: %d",
+                        vec_len (vlib_mains), vec_len (am->per_worker_data));
            vec_foreach (pw0, am->per_worker_data)
            {
              CLIB_MEMORY_BARRIER ();
@@ -622,7 +678,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
                {
                  CLIB_MEMORY_BARRIER ();
                  elog_acl_maybe_trace_X1 (am,
-                                          "ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d",
+                                          "ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %u",
                                           "i4",
                                           (u32) (pw0 - am->per_worker_data));
                  vlib_process_suspend (vm, 0.0001);
@@ -633,11 +689,10 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
                    }
                }
            }
-#ifdef FA_NODE_VERBOSE_DEBUG
-           clib_warning ("ACL_FA_NODE_CLEAN: cleaning done");
-#endif
+           acl_log_err ("ACL_FA_NODE_CLEAN: cleaning done");
            clib_bitmap_free (clear_sw_if_index_bitmap);
          }
+         am->fa_cleaner_cnt_delete_by_sw_index_ok++;
          break;
        default:
 #ifdef FA_NODE_VERBOSE_DEBUG
index 1fc4981..709ecc8 100644 (file)
@@ -101,9 +101,16 @@ fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
 always_inline u64
 fa_session_get_timeout (acl_main_t * am, fa_session_t * sess)
 {
-  u64 timeout = am->vlib_main->clib_time.clocks_per_second;
-  int timeout_type = fa_session_get_timeout_type (am, sess);
-  timeout *= am->session_timeout_sec[timeout_type];
+  u64 timeout = (am->vlib_main->clib_time.clocks_per_second);
+  if (sess->link_list_id == ACL_TIMEOUT_PURGATORY)
+    {
+      timeout /= (1000000 / SESSION_PURGATORY_TIMEOUT_USEC);
+    }
+  else
+    {
+      int timeout_type = fa_session_get_timeout_type (am, sess);
+      timeout *= am->session_timeout_sec[timeout_type];
+    }
   return timeout;
 }
 
@@ -113,8 +120,12 @@ always_inline fa_session_t *
 get_session_ptr (acl_main_t * am, u16 thread_index, u32 session_index)
 {
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
-  fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool,
-                                          session_index) ? 0 :
+  if (session_index > vec_len (pw->fa_sessions_pool))
+    {
+      return 0;
+    }
+
+  fa_session_t *sess = (session_index > vec_len (pw->fa_sessions_pool)) ? 0 :
     pool_elt_at_index (pw->fa_sessions_pool,
                       session_index);
   return sess;
@@ -135,7 +146,9 @@ acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id,
 {
   fa_session_t *sess =
     get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
-  u8 list_id = fa_session_get_timeout_type (am, sess);
+  u8 list_id =
+    sess->deleted ? ACL_TIMEOUT_PURGATORY : fa_session_get_timeout_type (am,
+                                                                        sess);
   uword thread_index = os_get_thread_index ();
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
   /* the retrieved session thread index must be necessarily the same as the one in the key */
@@ -144,9 +157,9 @@ acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id,
   ASSERT (sess->thread_index == thread_index);
   sess->link_enqueue_time = now;
   sess->link_list_id = list_id;
-  sess->link_next_idx = ~0;
+  sess->link_next_idx = FA_SESSION_BOGUS_INDEX;
   sess->link_prev_idx = pw->fa_conn_list_tail[list_id];
-  if (~0 != pw->fa_conn_list_tail[list_id])
+  if (FA_SESSION_BOGUS_INDEX != pw->fa_conn_list_tail[list_id])
     {
       fa_session_t *prev_sess =
        get_session_ptr (am, thread_index, pw->fa_conn_list_tail[list_id]);
@@ -164,15 +177,18 @@ acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id,
   pw->serviced_sw_if_index_bitmap =
     clib_bitmap_set (pw->serviced_sw_if_index_bitmap, sess->sw_if_index, 1);
 
-  if (~0 == pw->fa_conn_list_head[list_id])
+  if (FA_SESSION_BOGUS_INDEX == pw->fa_conn_list_head[list_id])
     {
       pw->fa_conn_list_head[list_id] = sess_id.session_index;
+      /* set the head expiry time because it is the first element */
+      pw->fa_conn_list_head_expiry_time[list_id] =
+       now + fa_session_get_timeout (am, sess);
     }
 }
 
 static int
 acl_fa_conn_list_delete_session (acl_main_t * am,
-                                fa_full_session_id_t sess_id)
+                                fa_full_session_id_t sess_id, u64 now)
 {
   uword thread_index = os_get_thread_index ();
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
@@ -186,9 +202,15 @@ acl_fa_conn_list_delete_session (acl_main_t * am,
     }
   fa_session_t *sess =
     get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
+  u64 next_expiry_time = ~0ULL;
   /* we should never try to delete the session with another thread index */
-  ASSERT (sess->thread_index == thread_index);
-  if (~0 != sess->link_prev_idx)
+  if (sess->thread_index != os_get_thread_index ())
+    {
+      clib_error
+       ("Attempting to delete session belonging to thread %d by thread %d",
+        sess->thread_index, thread_index);
+    }
+  if (FA_SESSION_BOGUS_INDEX != sess->link_prev_idx)
     {
       fa_session_t *prev_sess =
        get_session_ptr (am, thread_index, sess->link_prev_idx);
@@ -196,17 +218,20 @@ acl_fa_conn_list_delete_session (acl_main_t * am,
       ASSERT (prev_sess->link_list_id == sess->link_list_id);
       prev_sess->link_next_idx = sess->link_next_idx;
     }
-  if (~0 != sess->link_next_idx)
+  if (FA_SESSION_BOGUS_INDEX != sess->link_next_idx)
     {
       fa_session_t *next_sess =
        get_session_ptr (am, thread_index, sess->link_next_idx);
       /* The next session must be in the same list as the one we are deleting */
       ASSERT (next_sess->link_list_id == sess->link_list_id);
       next_sess->link_prev_idx = sess->link_prev_idx;
+      next_expiry_time = now + fa_session_get_timeout (am, next_sess);
     }
   if (pw->fa_conn_list_head[sess->link_list_id] == sess_id.session_index)
     {
       pw->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx;
+      pw->fa_conn_list_head_expiry_time[sess->link_list_id] =
+       next_expiry_time;
     }
   if (pw->fa_conn_list_tail[sess->link_list_id] == sess_id.session_index)
     {
@@ -219,7 +244,7 @@ always_inline int
 acl_fa_restart_timer_for_session (acl_main_t * am, u64 now,
                                  fa_full_session_id_t sess_id)
 {
-  if (acl_fa_conn_list_delete_session (am, sess_id))
+  if (acl_fa_conn_list_delete_session (am, sess_id, now))
     {
       acl_fa_conn_list_add_session (am, sess_id, now);
       return 1;
@@ -346,50 +371,105 @@ reverse_session_add_del (acl_main_t * am, const int is_ip6,
 }
 
 always_inline void
-acl_fa_delete_session (acl_main_t * am, u32 sw_if_index,
-                      fa_full_session_id_t sess_id)
+acl_fa_deactivate_session (acl_main_t * am, u32 sw_if_index,
+                          fa_full_session_id_t sess_id)
 {
-  void *oldheap = clib_mem_set_heap (am->acl_mheap);
   fa_session_t *sess =
     get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
   ASSERT (sess->thread_index == os_get_thread_index ());
   clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &sess->info.kv, 0);
 
   reverse_session_add_del (am, sess->info.pkt.is_ip6, &sess->info.kv, 0);
+  sess->deleted = 1;
+  clib_smp_atomic_add (&am->fa_session_total_deactivations, 1);
+}
 
+always_inline void
+acl_fa_put_session (acl_main_t * am, u32 sw_if_index,
+                   fa_full_session_id_t sess_id)
+{
+  if (sess_id.thread_index != os_get_thread_index ())
+    {
+      clib_error
+       ("Attempting to delete session belonging to thread %d by thread %d",
+        sess_id.thread_index, os_get_thread_index ());
+    }
+  void *oldheap = clib_mem_set_heap (am->acl_mheap);
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index];
   pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
   /* Deleting from timer structures not needed,
      as the caller must have dealt with the timers. */
   vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index);
   clib_mem_set_heap (oldheap);
-  pw->fa_session_dels_by_sw_if_index[sw_if_index]++;
+  clib_smp_atomic_add (&pw->fa_session_dels_by_sw_if_index[sw_if_index], 1);
   clib_smp_atomic_add (&am->fa_session_total_dels, 1);
 }
 
+always_inline int
+acl_fa_two_stage_delete_session (acl_main_t * am, u32 sw_if_index,
+                                fa_full_session_id_t sess_id, u64 now)
+{
+  fa_session_t *sess =
+    get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
+  if (sess->deleted)
+    {
+      acl_fa_put_session (am, sw_if_index, sess_id);
+      return 1;
+    }
+  else
+    {
+      acl_fa_deactivate_session (am, sw_if_index, sess_id);
+      acl_fa_conn_list_add_session (am, sess_id, now);
+      return 0;
+    }
+}
+
 always_inline int
 acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index)
 {
   u64 curr_sess_count;
   curr_sess_count = am->fa_session_total_adds - am->fa_session_total_dels;
-  return (curr_sess_count < am->fa_conn_table_max_entries);
+  return (curr_sess_count + vec_len (vlib_mains) <
+         am->fa_conn_table_max_entries);
 }
 
 
 always_inline void
 acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index,
-                           u32 sw_if_index)
+                           u32 sw_if_index, u64 now)
 {
   /* try to recycle a TCP transient session */
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
-  u8 timeout_type = ACL_TIMEOUT_TCP_TRANSIENT;
-  fa_full_session_id_t sess_id;
-  sess_id.session_index = pw->fa_conn_list_head[timeout_type];
-  if (~0 != sess_id.session_index)
+  fa_full_session_id_t volatile sess_id;
+  int n_recycled = 0;
+
+  /* clean up sessions from purgatory, if we can */
+  sess_id.session_index = pw->fa_conn_list_head[ACL_TIMEOUT_PURGATORY];
+  while ((FA_SESSION_BOGUS_INDEX != sess_id.session_index)
+        && n_recycled < am->fa_max_deleted_sessions_per_interval)
+    {
+      sess_id.thread_index = thread_index;
+      fa_session_t *sess =
+       get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
+      if (sess->link_enqueue_time + fa_session_get_timeout (am, sess) < now)
+       {
+         acl_fa_conn_list_delete_session (am, sess_id, now);
+         /* interface that needs the sessions may not be the interface of the session. */
+         acl_fa_put_session (am, sess->sw_if_index, sess_id);
+         n_recycled++;
+       }
+      else
+       break;                  /* too early to try to recycle from here, bail out */
+      sess_id.session_index = pw->fa_conn_list_head[ACL_TIMEOUT_PURGATORY];
+    }
+  sess_id.session_index = pw->fa_conn_list_head[ACL_TIMEOUT_TCP_TRANSIENT];
+  if (FA_SESSION_BOGUS_INDEX != sess_id.session_index)
     {
       sess_id.thread_index = thread_index;
-      acl_fa_conn_list_delete_session (am, sess_id);
-      acl_fa_delete_session (am, sw_if_index, sess_id);
+      acl_fa_conn_list_delete_session (am, sess_id, now);
+      acl_fa_deactivate_session (am, sw_if_index, sess_id);
+      /* this goes to purgatory list */
+      acl_fa_conn_list_add_session (am, sess_id, now);
     }
 }
 
@@ -419,26 +499,31 @@ acl_fa_add_session (acl_main_t * am, int is_input, int is_ip6,
   kv.key[3] = pkv->key[3];
   kv.key[4] = pkv->key[4];
   kv.value = f_sess_id.as_u64;
+  if (kv.value == ~0)
+    {
+      clib_error ("Adding session with invalid value");
+    }
 
   memcpy (sess, pkv, sizeof (pkv->key));
   sess->last_active_time = now;
   sess->sw_if_index = sw_if_index;
   sess->tcp_flags_seen.as_u16 = 0;
   sess->thread_index = thread_index;
-  sess->link_list_id = ~0;
-  sess->link_prev_idx = ~0;
-  sess->link_next_idx = ~0;
+  sess->link_list_id = ACL_TIMEOUT_UNUSED;
+  sess->link_prev_idx = FA_SESSION_BOGUS_INDEX;
+  sess->link_next_idx = FA_SESSION_BOGUS_INDEX;
+  sess->deleted = 0;
+
+  acl_fa_conn_list_add_session (am, f_sess_id, now);
 
   ASSERT (am->fa_sessions_hash_is_initialized == 1);
-  clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv, 1);
 
   reverse_session_add_del (am, is_ip6, &kv, 1);
-
-  acl_fa_conn_list_add_session (am, f_sess_id, now);
+  clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv, 1);
 
   vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);
   clib_mem_set_heap (oldheap);
-  pw->fa_session_adds_by_sw_if_index[sw_if_index]++;
+  clib_smp_atomic_add (&pw->fa_session_adds_by_sw_if_index[sw_if_index], 1);
   clib_smp_atomic_add (&am->fa_session_total_adds, 1);
   return sess;
 }