acl-plugin: rework the optimization 7383, fortify acl-plugin memory behavior (VPP...
[vpp.git] / src / plugins / acl / fa_node.c
index 3c23c80..74079a2 100644 (file)
@@ -330,7 +330,7 @@ static void
 acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
                 int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt)
 {
-  int l3_offset = 14;
+  int l3_offset = ethernet_buffer_header_size(b0);
   int l4_offset;
   u16 ports[2];
   u16 proto;
@@ -599,29 +599,38 @@ fa_session_get_timeout (acl_main_t * am, fa_session_t * sess)
 }
 
 static void
-acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0)
+acl_fa_verify_init_sessions (acl_main_t * am)
 {
-  /// FIXME-MULTICORE: lock around this function
-#ifdef FA_NODE_VERBOSE_DEBUG
-  clib_warning
-    ("Initializing bihash for sw_if_index %d num buckets %lu memory size %llu",
-     sw_if_index0, am->fa_conn_table_hash_num_buckets,
-     am->fa_conn_table_hash_memory_size);
-#endif
-  BV (clib_bihash_init) (&am->fa_sessions_hash,
+  if (!am->fa_sessions_hash_is_initialized) {
+    u16 wk;
+    /* Allocate the per-worker sessions pools */
+    for (wk = 0; wk < vec_len (am->per_worker_data); wk++) {
+      acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk];
+      pool_alloc_aligned(pw->fa_sessions_pool, am->fa_conn_table_max_entries, CLIB_CACHE_LINE_BYTES);
+    }
+
+    /* ... and the interface session hash table */
+    BV (clib_bihash_init) (&am->fa_sessions_hash,
                         "ACL plugin FA session bihash",
                         am->fa_conn_table_hash_num_buckets,
                         am->fa_conn_table_hash_memory_size);
-  am->fa_sessions_hash_is_initialized = 1;
+    am->fa_sessions_hash_is_initialized = 1;
+  }
 }
 
 static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index)
 {
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
-  fa_session_t *sess = pw->fa_sessions_pool + session_index;
+  fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool, session_index) ? 0 : pool_elt_at_index(pw->fa_sessions_pool, session_index);
   return sess;
 }
 
+static inline int is_valid_session_ptr(acl_main_t *am, u16 thread_index, fa_session_t *sess)
+{
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+  return ((sess != 0) && ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool)));
+}
+
 static void
 acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 now)
 {
@@ -648,9 +657,6 @@ acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64
 
   if (~0 == pw->fa_conn_list_head[list_id]) {
     pw->fa_conn_list_head[list_id] = sess_id.session_index;
-    /* If it is a first conn in any list, kick the cleaner */
-    vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
-                                 ACL_FA_CLEANER_RESCHEDULE, 0);
   }
 }
 
@@ -725,6 +731,7 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
 static void
 acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t sess_id)
 {
+  void *oldheap = clib_mem_set_heap(am->acl_mheap);
   fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
   ASSERT(sess->thread_index == os_get_thread_index ());
   BV (clib_bihash_add_del) (&am->fa_sessions_hash,
@@ -733,8 +740,9 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t se
   pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
   /* Deleting from timer structures not needed,
      as the caller must have dealt with the timers. */
-  vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index);
-  am->fa_session_dels_by_sw_if_index[sw_if_index]++;
+  vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index);
+  clib_mem_set_heap (oldheap);
+  pw->fa_session_dels_by_sw_if_index[sw_if_index]++;
   clib_smp_atomic_add(&am->fa_session_total_dels, 1);
 }
 
@@ -749,10 +757,14 @@ acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index)
 static u64
 acl_fa_get_list_head_expiry_time(acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, int timeout_type)
 {
-  if (~0 == pw->fa_conn_list_head[timeout_type]) {
+  fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]);
+  /*
+   * We can not check just the index here because inbetween the worker thread might
+   * dequeue the connection from the head just as we are about to check it.
+   */
+  if (!is_valid_session_ptr(am, thread_index, sess)) {
     return ~0LL; // infinity.
   } else {
-    fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]);
     u64 timeout_time =
               sess->link_enqueue_time + fa_session_get_list_timeout (am, sess);
     return timeout_time;
@@ -867,6 +879,7 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
   clib_bihash_kv_40_8_t kv;
   fa_full_session_id_t f_sess_id;
   uword thread_index = os_get_thread_index();
+  void *oldheap = clib_mem_set_heap(am->acl_mheap);
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
 
   f_sess_id.thread_index = thread_index;
@@ -893,17 +906,14 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
 
 
 
-  if (!acl_fa_ifc_has_sessions (am, sw_if_index))
-    {
-      acl_fa_ifc_init_sessions (am, sw_if_index);
-    }
-
+  ASSERT(am->fa_sessions_hash_is_initialized == 1);
   BV (clib_bihash_add_del) (&am->fa_sessions_hash,
                            &kv, 1);
   acl_fa_conn_list_add_session(am, f_sess_id, now);
 
-  vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index);
-  am->fa_session_adds_by_sw_if_index[sw_if_index]++;
+  vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);
+  clib_mem_set_heap (oldheap);
+  pw->fa_session_adds_by_sw_if_index[sw_if_index]++;
   clib_smp_atomic_add(&am->fa_session_total_adds, 1);
 }
 
@@ -931,7 +941,6 @@ acl_fa_node_fn (vlib_main_t * vm,
   u32 pkts_acl_permit = 0;
   u32 pkts_restart_session_timer = 0;
   u32 trace_bitmap = 0;
-  u32 feature_bitmap0;
   acl_main_t *am = &acl_main;
   fa_5tuple_t fa_5tuple, kv_sess;
   clib_bihash_kv_40_8_t value_sess;
@@ -977,8 +986,6 @@ acl_fa_node_fn (vlib_main_t * vm,
            sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
          else
            sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
-         if (is_l2_path)
-           feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap;
 
          /*
           * Extract the L3/L4 matching info into a 5-tuple structure,
@@ -1089,9 +1096,7 @@ acl_fa_node_fn (vlib_main_t * vm,
          if (action > 0)
            {
              if (is_l2_path)
-               next0 =
-                 feat_bitmap_get_next_node_index (l2_feat_next_node_index,
-                                                  feature_bitmap0);
+               next0 = vnet_l2_feature_next (b0, l2_feat_next_node_index, 0);
              else
                vnet_feature_next (sw_if_index0, &next0, b0);
            }
@@ -1342,8 +1347,10 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm,
      if (num_expired >= am->fa_max_deleted_sessions_per_interval) {
        /* there was too much work, we should get an interrupt ASAP */
        pw->interrupt_is_needed = 1;
+       pw->interrupt_is_unwanted = 0;
      } else if (num_expired <= am->fa_min_deleted_sessions_per_interval) {
        /* signal that they should trigger us less */
+       pw->interrupt_is_needed = 0;
        pw->interrupt_is_unwanted = 1;
      } else {
        /* the current rate of interrupts is ok */
@@ -1359,11 +1366,11 @@ send_one_worker_interrupt (vlib_main_t * vm, acl_main_t *am, int thread_index)
 {
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
   if (!pw->interrupt_is_pending) {
+    pw->interrupt_is_pending = 1;
     vlib_node_set_interrupt_pending (vlib_mains[thread_index],
                   acl_fa_worker_session_cleaner_process_node.index);
-    pw->interrupt_is_pending = 1;
     /* if the interrupt was requested, mark that done. */
-    pw->interrupt_is_needed = 0;
+    /* pw->interrupt_is_needed = 0; */
   }
 }
 
@@ -1430,8 +1437,8 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
         }
       }
 
-      /* If no pending connections then no point in timing out */
-      if (!has_pending_conns)
+      /* If no pending connections and no ACL applied then no point in timing out */
+      if (!has_pending_conns && (0 == am->fa_total_enabled_count))
         {
           am->fa_cleaner_cnt_wait_without_timeout++;
           (void) vlib_process_wait_for_event (vm);
@@ -1465,6 +1472,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
          {
             uword *clear_sw_if_index_bitmap = 0;
            uword *sw_if_index0;
+            int clear_all = 0;
 #ifdef FA_NODE_VERBOSE_DEBUG
            clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX received");
 #endif
@@ -1476,12 +1484,26 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
                ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d",
                 *sw_if_index0);
 #endif
-              clear_sw_if_index_bitmap = clib_bitmap_set(clear_sw_if_index_bitmap, *sw_if_index0, 1);
+              if (*sw_if_index0 == ~0)
+                {
+                  clear_all = 1;
+                }
+              else
+                {
+                  if (!pool_is_free_index (am->vnet_main->interface_main.sw_interfaces, *sw_if_index0))
+                    {
+                      clear_sw_if_index_bitmap = clib_bitmap_set(clear_sw_if_index_bitmap, *sw_if_index0, 1);
+                    }
+                }
            }
 #ifdef FA_NODE_VERBOSE_DEBUG
            clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U", format_bitmap_hex, clear_sw_if_index_bitmap);
 #endif
            vec_foreach(pw0, am->per_worker_data) {
+              if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) {
+                /* thread 0 in multithreaded scenario is not used */
+                continue;
+              }
               CLIB_MEMORY_BARRIER ();
              while (pw0->clear_in_process) {
                 CLIB_MEMORY_BARRIER ();
@@ -1496,7 +1518,15 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
               if (pw0->clear_in_process) {
                 clib_warning("ERROR-BUG! Could not initiate cleaning on worker because another cleanup in progress");
              } else {
-                pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(clear_sw_if_index_bitmap);
+                if (clear_all)
+                  {
+                    /* if we need to clear all, then just clear the interfaces that we are servicing */
+                    pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(pw0->serviced_sw_if_index_bitmap);
+                  }
+                else
+                  {
+                    pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(clear_sw_if_index_bitmap);
+                  }
                 pw0->clear_in_process = 1;
               }
             }
@@ -1508,6 +1538,10 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
            clib_warning("CLEANER mains len: %d per-worker len: %d", vec_len(vlib_mains), vec_len(am->per_worker_data));
 #endif
            vec_foreach(pw0, am->per_worker_data) {
+              if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) {
+                /* thread 0 in multithreaded scenario is not used */
+                continue;
+              }
               CLIB_MEMORY_BARRIER ();
              while (pw0->clear_in_process) {
                 CLIB_MEMORY_BARRIER ();
@@ -1549,6 +1583,10 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
       int interrupts_unwanted = 0;
 
       vec_foreach(pw0, am->per_worker_data) {
+        if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) {
+          /* thread 0 in multithreaded scenario is not used */
+          continue;
+        }
         if (pw0->interrupt_is_needed) {
           interrupts_needed++;
           /* the per-worker value is reset when sending the interrupt */
@@ -1561,6 +1599,8 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
       if (interrupts_needed) {
         /* they need more interrupts, do less waiting around next time */
         am->fa_current_cleaner_timer_wait_interval /= 2;
+        /* never go into zero-wait either though - we need to give the space to others */
+        am->fa_current_cleaner_timer_wait_interval += 1; 
       } else if (interrupts_unwanted) {
         /* slowly increase the amount of sleep up to a limit */
         if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval)
@@ -1577,22 +1617,39 @@ void
 acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
 {
   acl_main_t *am = &acl_main;
+  if (enable_disable) {
+    acl_fa_verify_init_sessions(am);
+    am->fa_total_enabled_count++;
+    void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
+    vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
+                                 ACL_FA_CLEANER_RESCHEDULE, 0);
+    clib_mem_set_heap (oldheap);
+  } else {
+    am->fa_total_enabled_count--;
+  }
+
   if (is_input)
     {
+      ASSERT(clib_bitmap_get(am->fa_in_acl_on_sw_if_index, sw_if_index) != enable_disable);
+      void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
       vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa",
                                   sw_if_index, enable_disable, 0, 0);
       vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa",
                                   sw_if_index, enable_disable, 0, 0);
+      clib_mem_set_heap (oldheap);
       am->fa_in_acl_on_sw_if_index =
        clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index,
                         enable_disable);
     }
   else
     {
+      ASSERT(clib_bitmap_get(am->fa_out_acl_on_sw_if_index, sw_if_index) != enable_disable);
+      void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
       vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa",
                                   sw_if_index, enable_disable, 0, 0);
       vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa",
                                   sw_if_index, enable_disable, 0, 0);
+      clib_mem_set_heap (oldheap);
       am->fa_out_acl_on_sw_if_index =
        clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index,
                         enable_disable);
@@ -1603,9 +1660,11 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
 #ifdef FA_NODE_VERBOSE_DEBUG
       clib_warning("ENABLE-DISABLE: clean the connections on interface %d", sw_if_index);
 #endif
+      void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
       vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
                                 ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
                                 sw_if_index);
+      clib_mem_set_heap (oldheap);
     }
 }