acl-plugin: multicore: use pool_init_fixed for per-worker preallocated pools
[vpp.git] / src / plugins / acl / fa_node.c
index 3181a22..5fa4509 100644 (file)
@@ -20,7 +20,7 @@
 #include <vnet/pg/pg.h>
 #include <vppinfra/error.h>
 #include <acl/acl.h>
-#include "bihash_40_8.h"
+#include <vppinfra/bihash_40_8.h>
 
 #include <vppinfra/bihash_template.h>
 #include <vppinfra/bihash_template.c>
@@ -39,6 +39,31 @@ typedef struct
   u8 action;
 } acl_fa_trace_t;
 
+static u8 *
+format_fa_5tuple (u8 * s, va_list * args)
+{
+  fa_5tuple_t *p5t = va_arg (*args, fa_5tuple_t *);
+
+  return format(s, "%s sw_if_index %d (lsb16 %d) l3 %s%s %U -> %U"
+                   " l4 proto %d l4_valid %d port %d -> %d tcp flags (%s) %02x rsvd %x",
+                p5t->pkt.is_input ? "input" : "output",
+                p5t->pkt.sw_if_index, p5t->l4.lsb_of_sw_if_index, p5t->pkt.is_ip6 ? "ip6" : "ip4",
+                p5t->pkt.is_nonfirst_fragment ? " non-initial fragment" : "",
+                format_ip46_address, &p5t->addr[0], p5t->pkt.is_ip6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+                format_ip46_address, &p5t->addr[1], p5t->pkt.is_ip6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+                p5t->l4.proto, p5t->pkt.l4_valid,
+                p5t->l4.port[0], p5t->l4.port[1],
+                p5t->pkt.tcp_flags_valid ? "valid": "invalid",
+                p5t->pkt.tcp_flags,
+                p5t->pkt.flags_reserved);
+}
+
+u8 *
+format_acl_plugin_5tuple (u8 * s, va_list * args)
+{
+  return format_fa_5tuple(s, args);
+}
+
 /* packet trace format function */
 static u8 *
 format_acl_fa_trace (u8 * s, va_list * args)
@@ -55,6 +80,9 @@ format_acl_fa_trace (u8 * s, va_list * args)
            t->match_rule_index, t->trace_bitmap,
            t->packet_info[0], t->packet_info[1], t->packet_info[2],
            t->packet_info[3], t->packet_info[4], t->packet_info[5]);
+
+  /* Now also print out the packet_info in a form usable by humans */
+  s = format (s, "\n   %U", format_fa_5tuple, t->packet_info);
   return s;
 }
 
@@ -170,7 +198,8 @@ single_acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tupl
       clib_warning
        ("ACL_FA_NODE_DBG acl %d rule %d pkt dst addr %U match rule addr %U/%d",
         acl_index, i, format_ip46_address, &pkt_5tuple->addr[1],
-        IP46_TYPE_ANY, format_ip46_address, &r->dst, IP46_TYPE_ANY,
+        r->is_ipv6 ? IP46_TYPE_IP6: IP46_TYPE_IP4, format_ip46_address,
+         &r->dst, r->is_ipv6 ? IP46_TYPE_IP6: IP46_TYPE_IP4,
         r->dst_prefixlen);
 #endif
 
@@ -182,7 +211,8 @@ single_acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tupl
       clib_warning
        ("ACL_FA_NODE_DBG acl %d rule %d pkt src addr %U match rule addr %U/%d",
         acl_index, i, format_ip46_address, &pkt_5tuple->addr[0],
-        IP46_TYPE_ANY, format_ip46_address, &r->src, IP46_TYPE_ANY,
+        r->is_ipv6 ? IP46_TYPE_IP6: IP46_TYPE_IP4, format_ip46_address,
+         &r->src, r->is_ipv6 ? IP46_TYPE_IP6: IP46_TYPE_IP4,
         r->src_prefixlen);
       clib_warning
        ("ACL_FA_NODE_DBG acl %d rule %d trying to match pkt proto %d with rule %d",
@@ -330,16 +360,24 @@ static void
 acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
                 int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt)
 {
-  int l3_offset = ethernet_buffer_header_size(b0);
+  int l3_offset;
   int l4_offset;
   u16 ports[2];
   u16 proto;
+
   /* IP4 and IP6 protocol numbers of ICMP */
   static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 };
 
-  if (is_input && !(is_l2_path))
+  if (is_l2_path)
     {
-      l3_offset = 0;
+      l3_offset = ethernet_buffer_header_size(b0);
+    }
+  else
+    {
+      if (is_input)
+        l3_offset = 0;
+      else
+        l3_offset = vnet_buffer(b0)->ip.save_rewrite_length;
     }
 
   /* key[0..3] contains src/dst address and is cleared/set below */
@@ -606,7 +644,13 @@ acl_fa_verify_init_sessions (acl_main_t * am)
     /* Allocate the per-worker sessions pools */
     for (wk = 0; wk < vec_len (am->per_worker_data); wk++) {
       acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk];
-      pool_alloc_aligned(pw->fa_sessions_pool, am->fa_conn_table_max_entries, CLIB_CACHE_LINE_BYTES);
+
+      /*
+      * // In lieu of trying to preallocate the pool and its free bitmap, rather use pool_init_fixed
+      * pool_alloc_aligned(pw->fa_sessions_pool, am->fa_conn_table_max_entries, CLIB_CACHE_LINE_BYTES);
+      * clib_bitmap_validate(pool_header(pw->fa_sessions_pool)->free_bitmap, am->fa_conn_table_max_entries);
+      */
+      pool_init_fixed(pw->fa_sessions_pool, am->fa_conn_table_max_entries);
     }
 
     /* ... and the interface session hash table */
@@ -621,14 +665,14 @@ acl_fa_verify_init_sessions (acl_main_t * am)
 static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index)
 {
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
-  fa_session_t *sess = pw->fa_sessions_pool + session_index;
+  fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool, session_index) ? 0 : pool_elt_at_index(pw->fa_sessions_pool, session_index);
   return sess;
 }
 
 static inline int is_valid_session_ptr(acl_main_t *am, u16 thread_index, fa_session_t *sess)
 {
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
-  return ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool));
+  return ((sess != 0) && ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool)));
 }
 
 static void
@@ -731,6 +775,7 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
 static void
 acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t sess_id)
 {
+  void *oldheap = clib_mem_set_heap(am->acl_mheap);
   fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
   ASSERT(sess->thread_index == os_get_thread_index ());
   BV (clib_bihash_add_del) (&am->fa_sessions_hash,
@@ -740,6 +785,7 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t se
   /* Deleting from timer structures not needed,
      as the caller must have dealt with the timers. */
   vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index);
+  clib_mem_set_heap (oldheap);
   pw->fa_session_dels_by_sw_if_index[sw_if_index]++;
   clib_smp_atomic_add(&am->fa_session_total_dels, 1);
 }
@@ -869,7 +915,7 @@ acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index, u32
   }
 }
 
-static void
+static fa_session_t *
 acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
                    fa_5tuple_t * p5tuple)
 {
@@ -877,6 +923,7 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
   clib_bihash_kv_40_8_t kv;
   fa_full_session_id_t f_sess_id;
   uword thread_index = os_get_thread_index();
+  void *oldheap = clib_mem_set_heap(am->acl_mheap);
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
 
   f_sess_id.thread_index = thread_index;
@@ -909,8 +956,10 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
   acl_fa_conn_list_add_session(am, f_sess_id, now);
 
   vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);
+  clib_mem_set_heap (oldheap);
   pw->fa_session_adds_by_sw_if_index[sw_if_index]++;
   clib_smp_atomic_add(&am->fa_session_total_adds, 1);
+  return sess;
 }
 
 static int
@@ -1075,8 +1124,10 @@ acl_fa_node_fn (vlib_main_t * vm,
 
                  if (acl_fa_can_add_session (am, is_input, sw_if_index0))
                    {
-                      acl_fa_add_session (am, is_input, sw_if_index0, now,
-                                         &kv_sess);
+                      fa_session_t *sess = acl_fa_add_session (am, is_input, sw_if_index0, now,
+                                                              &kv_sess);
+                      acl_fa_track_session (am, is_input, sw_if_index0, now,
+                                            sess, &fa_5tuple);
                      pkts_new_session += 1;
                    }
                  else
@@ -1354,6 +1405,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm,
        pw->interrupt_is_unwanted = 0;
      }
    }
+   pw->interrupt_generation = am->fa_interrupt_generation;
    return 0;
 }
 
@@ -1397,7 +1449,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
 
   am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval;
   am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index;
-
+  am->fa_interrupt_generation = 1;
   while (1)
     {
       now = clib_cpu_time_now ();
@@ -1496,10 +1548,6 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
            clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U", format_bitmap_hex, clear_sw_if_index_bitmap);
 #endif
            vec_foreach(pw0, am->per_worker_data) {
-              if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) {
-                /* thread 0 in multithreaded scenario is not used */
-                continue;
-              }
               CLIB_MEMORY_BARRIER ();
              while (pw0->clear_in_process) {
                 CLIB_MEMORY_BARRIER ();
@@ -1534,10 +1582,6 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
            clib_warning("CLEANER mains len: %d per-worker len: %d", vec_len(vlib_mains), vec_len(am->per_worker_data));
 #endif
            vec_foreach(pw0, am->per_worker_data) {
-              if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) {
-                /* thread 0 in multithreaded scenario is not used */
-                continue;
-              }
               CLIB_MEMORY_BARRIER ();
              while (pw0->clear_in_process) {
                 CLIB_MEMORY_BARRIER ();
@@ -1574,15 +1618,28 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
       if (event_data)
        _vec_len (event_data) = 0;
 
+      /*
+       * If the interrupts were not processed yet, ensure we wait a bit,
+       * but up to a point.
+       */
+      int need_more_wait = 0;
+      int max_wait_cycles = 100;
+      do {
+        need_more_wait = 0;
+        vec_foreach(pw0, am->per_worker_data) {
+          if (pw0->interrupt_generation != am->fa_interrupt_generation) {
+            need_more_wait = 1;
+          }
+        }
+        if (need_more_wait) {
+          vlib_process_suspend(vm, 0.0001);
+        }
+      } while (need_more_wait && (--max_wait_cycles > 0));
 
       int interrupts_needed = 0;
       int interrupts_unwanted = 0;
 
       vec_foreach(pw0, am->per_worker_data) {
-        if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) {
-          /* thread 0 in multithreaded scenario is not used */
-          continue;
-        }
         if (pw0->interrupt_is_needed) {
           interrupts_needed++;
           /* the per-worker value is reset when sending the interrupt */
@@ -1603,6 +1660,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
           am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment;
       }
       am->fa_cleaner_cnt_event_cycles++;
+      am->fa_interrupt_generation++;
     }
   /* NOT REACHED */
   return 0;
@@ -1616,8 +1674,10 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
   if (enable_disable) {
     acl_fa_verify_init_sessions(am);
     am->fa_total_enabled_count++;
+    void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
     vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
                                  ACL_FA_CLEANER_RESCHEDULE, 0);
+    clib_mem_set_heap (oldheap);
   } else {
     am->fa_total_enabled_count--;
   }
@@ -1625,10 +1685,12 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
   if (is_input)
     {
       ASSERT(clib_bitmap_get(am->fa_in_acl_on_sw_if_index, sw_if_index) != enable_disable);
+      void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
       vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa",
                                   sw_if_index, enable_disable, 0, 0);
       vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa",
                                   sw_if_index, enable_disable, 0, 0);
+      clib_mem_set_heap (oldheap);
       am->fa_in_acl_on_sw_if_index =
        clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index,
                         enable_disable);
@@ -1636,10 +1698,12 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
   else
     {
       ASSERT(clib_bitmap_get(am->fa_out_acl_on_sw_if_index, sw_if_index) != enable_disable);
+      void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
       vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa",
                                   sw_if_index, enable_disable, 0, 0);
       vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa",
                                   sw_if_index, enable_disable, 0, 0);
+      clib_mem_set_heap (oldheap);
       am->fa_out_acl_on_sw_if_index =
        clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index,
                         enable_disable);
@@ -1650,9 +1714,11 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
 #ifdef FA_NODE_VERBOSE_DEBUG
       clib_warning("ENABLE-DISABLE: clean the connections on interface %d", sw_if_index);
 #endif
+      void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
       vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
                                 ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
                                 sw_if_index);
+      clib_mem_set_heap (oldheap);
     }
 }