acl-plugin: performance optimizations for established connections
[vpp.git] / src / plugins / acl / session_inlines.h
index d43e550..76c6384 100644 (file)
 
 /* ICMPv4 invert type for stateful ACL */
 static const u8 icmp4_invmap[] = {
-  [ICMP4_echo_reply] = ICMP4_echo_request + 1,
-  [ICMP4_timestamp_reply] = ICMP4_timestamp_request + 1,
-  [ICMP4_information_reply] = ICMP4_information_request + 1,
-  [ICMP4_address_mask_reply] = ICMP4_address_mask_request + 1
+  [ICMP4_echo_request] = ICMP4_echo_reply + 1,
+  [ICMP4_timestamp_request] = ICMP4_timestamp_reply + 1,
+  [ICMP4_information_request] = ICMP4_information_reply + 1,
+  [ICMP4_address_mask_request] = ICMP4_address_mask_reply + 1
 };
 
 /* Supported ICMPv4 messages for session creation */
@@ -32,8 +32,8 @@ static const u8 icmp4_valid_new[] = {
 
 /* ICMPv6 invert type for stateful ACL */
 static const u8 icmp6_invmap[] = {
-  [ICMP6_echo_reply - 128] = ICMP6_echo_request + 1,
-  [ICMP6_node_information_response - 128] = ICMP6_node_information_request + 1
+  [ICMP6_echo_request - 128] = ICMP6_echo_reply + 1,
+  [ICMP6_node_information_request - 128] = ICMP6_node_information_response + 1
 };
 
 /* Supported ICMPv6 messages for session creation */
@@ -70,7 +70,7 @@ acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0)
 always_inline int
 fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
 {
-  /* seen both SYNs and ACKs but not FINs means we are in establshed state */
+  /* seen both SYNs and ACKs but not FINs means we are in established state */
   u16 masked_flags =
     sess->tcp_flags_seen.as_u16 & ((TCP_FLAGS_RSTFINACKSYN << 8) +
                                   TCP_FLAGS_RSTFINACKSYN);
@@ -101,23 +101,37 @@ fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
 always_inline u64
 fa_session_get_timeout (acl_main_t * am, fa_session_t * sess)
 {
-  u64 timeout = am->vlib_main->clib_time.clocks_per_second;
-  int timeout_type = fa_session_get_timeout_type (am, sess);
-  timeout *= am->session_timeout_sec[timeout_type];
+  u64 timeout = (am->vlib_main->clib_time.clocks_per_second);
+  if (sess->link_list_id == ACL_TIMEOUT_PURGATORY)
+    {
+      timeout /= (1000000 / SESSION_PURGATORY_TIMEOUT_USEC);
+    }
+  else
+    {
+      int timeout_type = fa_session_get_timeout_type (am, sess);
+      timeout *= am->session_timeout_sec[timeout_type];
+    }
   return timeout;
 }
 
+always_inline fa_session_t *
+get_session_ptr_no_check (acl_main_t * am, u16 thread_index,
+                         u32 session_index)
+{
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+  return pool_elt_at_index (pw->fa_sessions_pool, session_index);
+}
 
 
 always_inline fa_session_t *
 get_session_ptr (acl_main_t * am, u16 thread_index, u32 session_index)
 {
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
-  fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool,
-                                          session_index) ? 0 :
-    pool_elt_at_index (pw->fa_sessions_pool,
-                      session_index);
-  return sess;
+
+  if (PREDICT_FALSE (session_index >= vec_len (pw->fa_sessions_pool)))
+    return 0;
+
+  return pool_elt_at_index (pw->fa_sessions_pool, session_index);
 }
 
 always_inline int
@@ -135,7 +149,9 @@ acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id,
 {
   fa_session_t *sess =
     get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
-  u8 list_id = fa_session_get_timeout_type (am, sess);
+  u8 list_id =
+    sess->deleted ? ACL_TIMEOUT_PURGATORY : fa_session_get_timeout_type (am,
+                                                                        sess);
   uword thread_index = os_get_thread_index ();
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
   /* the retrieved session thread index must be necessarily the same as the one in the key */
@@ -144,9 +160,9 @@ acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id,
   ASSERT (sess->thread_index == thread_index);
   sess->link_enqueue_time = now;
   sess->link_list_id = list_id;
-  sess->link_next_idx = ~0;
+  sess->link_next_idx = FA_SESSION_BOGUS_INDEX;
   sess->link_prev_idx = pw->fa_conn_list_tail[list_id];
-  if (~0 != pw->fa_conn_list_tail[list_id])
+  if (FA_SESSION_BOGUS_INDEX != pw->fa_conn_list_tail[list_id])
     {
       fa_session_t *prev_sess =
        get_session_ptr (am, thread_index, pw->fa_conn_list_tail[list_id]);
@@ -164,15 +180,18 @@ acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id,
   pw->serviced_sw_if_index_bitmap =
     clib_bitmap_set (pw->serviced_sw_if_index_bitmap, sess->sw_if_index, 1);
 
-  if (~0 == pw->fa_conn_list_head[list_id])
+  if (FA_SESSION_BOGUS_INDEX == pw->fa_conn_list_head[list_id])
     {
       pw->fa_conn_list_head[list_id] = sess_id.session_index;
+      /* set the head expiry time because it is the first element */
+      pw->fa_conn_list_head_expiry_time[list_id] =
+       now + fa_session_get_timeout (am, sess);
     }
 }
 
 static int
 acl_fa_conn_list_delete_session (acl_main_t * am,
-                                fa_full_session_id_t sess_id)
+                                fa_full_session_id_t sess_id, u64 now)
 {
   uword thread_index = os_get_thread_index ();
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
@@ -186,9 +205,15 @@ acl_fa_conn_list_delete_session (acl_main_t * am,
     }
   fa_session_t *sess =
     get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
+  u64 next_expiry_time = ~0ULL;
   /* we should never try to delete the session with another thread index */
-  ASSERT (sess->thread_index == thread_index);
-  if (~0 != sess->link_prev_idx)
+  if (sess->thread_index != os_get_thread_index ())
+    {
+      clib_error
+       ("Attempting to delete session belonging to thread %d by thread %d",
+        sess->thread_index, thread_index);
+    }
+  if (FA_SESSION_BOGUS_INDEX != sess->link_prev_idx)
     {
       fa_session_t *prev_sess =
        get_session_ptr (am, thread_index, sess->link_prev_idx);
@@ -196,17 +221,20 @@ acl_fa_conn_list_delete_session (acl_main_t * am,
       ASSERT (prev_sess->link_list_id == sess->link_list_id);
       prev_sess->link_next_idx = sess->link_next_idx;
     }
-  if (~0 != sess->link_next_idx)
+  if (FA_SESSION_BOGUS_INDEX != sess->link_next_idx)
     {
       fa_session_t *next_sess =
        get_session_ptr (am, thread_index, sess->link_next_idx);
       /* The next session must be in the same list as the one we are deleting */
       ASSERT (next_sess->link_list_id == sess->link_list_id);
       next_sess->link_prev_idx = sess->link_prev_idx;
+      next_expiry_time = now + fa_session_get_timeout (am, next_sess);
     }
   if (pw->fa_conn_list_head[sess->link_list_id] == sess_id.session_index)
     {
       pw->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx;
+      pw->fa_conn_list_head_expiry_time[sess->link_list_id] =
+       next_expiry_time;
     }
   if (pw->fa_conn_list_tail[sess->link_list_id] == sess_id.session_index)
     {
@@ -219,7 +247,7 @@ always_inline int
 acl_fa_restart_timer_for_session (acl_main_t * am, u64 now,
                                  fa_full_session_id_t sess_id)
 {
-  if (acl_fa_conn_list_delete_session (am, sess_id))
+  if (acl_fa_conn_list_delete_session (am, sess_id, now))
     {
       acl_fa_conn_list_add_session (am, sess_id, now);
       return 1;
@@ -227,25 +255,38 @@ acl_fa_restart_timer_for_session (acl_main_t * am, u64 now,
   else
     {
       /*
-       * Our thread does not own this connection, so we can not delete
-       * The session. To avoid the complicated signaling, we simply
-       * pick the list waiting time to be the shortest of the timeouts.
-       * This way we do not have to do anything special, and let
-       * the regular requeue check take care of everything.
+       * Our thread does not own this connection, so we can not requeue
+       * The session. So we post the signal to the owner.
        */
+      aclp_post_session_change_request (am, sess_id.thread_index,
+                                       sess_id.session_index,
+                                       ACL_FA_REQ_SESS_RESCHEDULE);
       return 0;
     }
 }
 
+always_inline int
+is_ip6_5tuple (fa_5tuple_t * p5t)
+{
+  return (p5t->l3_zero_pad[0] | p5t->
+         l3_zero_pad[1] | p5t->l3_zero_pad[2] | p5t->l3_zero_pad[3] | p5t->
+         l3_zero_pad[4] | p5t->l3_zero_pad[5]) != 0;
+}
 
 always_inline u8
 acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
-                     fa_session_t * sess, fa_5tuple_t * pkt_5tuple)
+                     fa_session_t * sess, fa_5tuple_t * pkt_5tuple,
+                     u32 pkt_len)
 {
   sess->last_active_time = now;
-  if (pkt_5tuple->pkt.tcp_flags_valid)
+  u8 old_flags = sess->tcp_flags_seen.as_u8[is_input];
+  u8 new_flags = old_flags | pkt_5tuple->pkt.tcp_flags;
+
+  int flags_need_update = pkt_5tuple->pkt.tcp_flags_valid
+    && (old_flags != new_flags);
+  if (PREDICT_FALSE (flags_need_update))
     {
-      sess->tcp_flags_seen.as_u8[is_input] |= pkt_5tuple->pkt.tcp_flags;
+      sess->tcp_flags_seen.as_u8[is_input] = new_flags;
     }
   return 3;
 }
@@ -260,12 +301,12 @@ reverse_l4_u64_fastpath (u64 l4, int is_ip6)
   l4o.port[0] = l4i.port[1];
 
   l4o.non_port_l4_data = l4i.non_port_l4_data;
-  l4o.is_input = 1 - l4i.is_input;
+  l4o.l4_flags = l4i.l4_flags ^ FA_SK_L4_FLAG_IS_INPUT;
   return l4o.as_u64;
 }
 
-always_inline u64
-reverse_l4_u64_slowpath (u64 l4, int is_ip6)
+always_inline int
+reverse_l4_u64_slowpath_valid (u64 l4, int is_ip6, u64 * out)
 {
   fa_session_l4_key_t l4i = {.as_u64 = l4 };
   fa_session_l4_key_t l4o;
@@ -294,69 +335,107 @@ reverse_l4_u64_slowpath (u64 l4, int is_ip6)
        * The other messages will be forwarded without creating a reverse session.
        */
 
-      if (type >= 0 && (type <= icmp_valid_new_size[is_ip6])
-         && (icmp_valid_new[is_ip6][type])
-         && (type <= icmp_invmap_size[is_ip6]) && icmp_invmap[is_ip6][type])
+      int valid_reverse_sess = (type >= 0
+                               && (type <= icmp_valid_new_size[is_ip6])
+                               && (icmp_valid_new[is_ip6][type])
+                               && (type <= icmp_invmap_size[is_ip6])
+                               && icmp_invmap[is_ip6][type]);
+      if (valid_reverse_sess)
        {
-         /*
-          * we set the inverse direction and correct the port,
-          * if it is okay to add the reverse session.
-          * If not, then the same session will be added twice
-          * to bihash, which is the same as adding just one session.
-          */
-         l4o.is_input = 1 - l4i.is_input;
+         l4o.l4_flags = l4i.l4_flags ^ FA_SK_L4_FLAG_IS_INPUT;
          l4o.port[0] = icmp_invmap[is_ip6][type] - 1;
        }
 
-      return l4o.as_u64;
+      *out = l4o.as_u64;
+      return valid_reverse_sess;
     }
   else
-    return reverse_l4_u64_fastpath (l4, is_ip6);
+    *out = reverse_l4_u64_fastpath (l4, is_ip6);
+
+  return 1;
 }
 
-always_inline u64
-reverse_l4_u64 (u64 l4, int is_ip6)
+always_inline void
+reverse_session_add_del_ip6 (acl_main_t * am,
+                            clib_bihash_kv_40_8_t * pkv, int is_add)
 {
-  fa_session_l4_key_t l4i = {.as_u64 = l4 };
-
-  if (PREDICT_FALSE (l4i.is_slowpath))
+  clib_bihash_kv_40_8_t kv2;
+  kv2.key[0] = pkv->key[2];
+  kv2.key[1] = pkv->key[3];
+  kv2.key[2] = pkv->key[0];
+  kv2.key[3] = pkv->key[1];
+  /* the last u64 needs special treatment (ports, etc.) so we do it last */
+  kv2.value = pkv->value;
+  if (PREDICT_FALSE (is_session_l4_key_u64_slowpath (pkv->key[4])))
     {
-      return reverse_l4_u64_slowpath (l4, is_ip6);
+      if (reverse_l4_u64_slowpath_valid (pkv->key[4], 1, &kv2.key[4]))
+       clib_bihash_add_del_40_8 (&am->fa_ip6_sessions_hash, &kv2, is_add);
     }
   else
     {
-      return reverse_l4_u64_fastpath (l4, is_ip6);
+      kv2.key[4] = reverse_l4_u64_fastpath (pkv->key[4], 1);
+      clib_bihash_add_del_40_8 (&am->fa_ip6_sessions_hash, &kv2, is_add);
     }
 }
 
 always_inline void
-reverse_session_add_del (acl_main_t * am, const int is_ip6,
-                        clib_bihash_kv_40_8_t * pkv, int is_add)
+reverse_session_add_del_ip4 (acl_main_t * am,
+                            clib_bihash_kv_16_8_t * pkv, int is_add)
 {
-  clib_bihash_kv_40_8_t kv2;
-  /* the first 4xu64 is two addresses, so just swap them */
-  kv2.key[0] = pkv->key[2];
-  kv2.key[1] = pkv->key[3];
-  kv2.key[2] = pkv->key[0];
-  kv2.key[3] = pkv->key[1];
-  /* the last u64 needs special treatment (ports, etc.) */
-  kv2.key[4] = reverse_l4_u64 (pkv->key[4], is_ip6);
+  clib_bihash_kv_16_8_t kv2;
+  kv2.key[0] =
+    ((pkv->key[0] & 0xffffffff) << 32) | ((pkv->key[0] >> 32) & 0xffffffff);
+  /* the last u64 needs special treatment (ports, etc.) so we do it last */
   kv2.value = pkv->value;
-  clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv2, is_add);
+  if (PREDICT_FALSE (is_session_l4_key_u64_slowpath (pkv->key[1])))
+    {
+      if (reverse_l4_u64_slowpath_valid (pkv->key[1], 0, &kv2.key[1]))
+       clib_bihash_add_del_16_8 (&am->fa_ip4_sessions_hash, &kv2, is_add);
+    }
+  else
+    {
+      kv2.key[1] = reverse_l4_u64_fastpath (pkv->key[1], 0);
+      clib_bihash_add_del_16_8 (&am->fa_ip4_sessions_hash, &kv2, is_add);
+    }
 }
 
 always_inline void
-acl_fa_delete_session (acl_main_t * am, u32 sw_if_index,
-                      fa_full_session_id_t sess_id)
+acl_fa_deactivate_session (acl_main_t * am, u32 sw_if_index,
+                          fa_full_session_id_t sess_id)
 {
-  void *oldheap = clib_mem_set_heap (am->acl_mheap);
   fa_session_t *sess =
     get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
   ASSERT (sess->thread_index == os_get_thread_index ());
-  clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &sess->info.kv, 0);
+  void *oldheap = clib_mem_set_heap (am->acl_mheap);
+  if (sess->is_ip6)
+    {
+      clib_bihash_add_del_40_8 (&am->fa_ip6_sessions_hash,
+                               &sess->info.kv_40_8, 0);
+      reverse_session_add_del_ip6 (am, &sess->info.kv_40_8, 0);
+    }
+  else
+    {
+      clib_bihash_add_del_16_8 (&am->fa_ip4_sessions_hash,
+                               &sess->info.kv_16_8, 0);
+      reverse_session_add_del_ip4 (am, &sess->info.kv_16_8, 0);
+    }
 
-  reverse_session_add_del (am, sess->info.pkt.is_ip6, &sess->info.kv, 0);
+  sess->deleted = 1;
+  clib_atomic_fetch_add (&am->fa_session_total_deactivations, 1);
+  clib_mem_set_heap (oldheap);
+}
 
+always_inline void
+acl_fa_put_session (acl_main_t * am, u32 sw_if_index,
+                   fa_full_session_id_t sess_id)
+{
+  if (sess_id.thread_index != os_get_thread_index ())
+    {
+      clib_error
+       ("Attempting to delete session belonging to thread %d by thread %d",
+        sess_id.thread_index, os_get_thread_index ());
+    }
+  void *oldheap = clib_mem_set_heap (am->acl_mheap);
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index];
   pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
   /* Deleting from timer structures not needed,
@@ -364,7 +443,26 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index,
   vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index);
   clib_mem_set_heap (oldheap);
   pw->fa_session_dels_by_sw_if_index[sw_if_index]++;
-  clib_smp_atomic_add (&am->fa_session_total_dels, 1);
+  clib_atomic_fetch_add (&am->fa_session_total_dels, 1);
+}
+
+always_inline int
+acl_fa_two_stage_delete_session (acl_main_t * am, u32 sw_if_index,
+                                fa_full_session_id_t sess_id, u64 now)
+{
+  fa_session_t *sess =
+    get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
+  if (sess->deleted)
+    {
+      acl_fa_put_session (am, sw_if_index, sess_id);
+      return 1;
+    }
+  else
+    {
+      acl_fa_deactivate_session (am, sw_if_index, sess_id);
+      acl_fa_conn_list_add_session (am, sess_id, now);
+      return 0;
+    }
 }
 
 always_inline int
@@ -372,35 +470,56 @@ acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index)
 {
   u64 curr_sess_count;
   curr_sess_count = am->fa_session_total_adds - am->fa_session_total_dels;
-  return (curr_sess_count < am->fa_conn_table_max_entries);
+  return (curr_sess_count + vec_len (vlib_mains) <
+         am->fa_conn_table_max_entries);
 }
 
 
 always_inline void
 acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index,
-                           u32 sw_if_index)
+                           u32 sw_if_index, u64 now)
 {
   /* try to recycle a TCP transient session */
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
-  u8 timeout_type = ACL_TIMEOUT_TCP_TRANSIENT;
-  fa_full_session_id_t sess_id;
-  sess_id.session_index = pw->fa_conn_list_head[timeout_type];
-  if (~0 != sess_id.session_index)
+  fa_full_session_id_t volatile sess_id;
+  int n_recycled = 0;
+
+  /* clean up sessions from purgatory, if we can */
+  sess_id.session_index = pw->fa_conn_list_head[ACL_TIMEOUT_PURGATORY];
+  while ((FA_SESSION_BOGUS_INDEX != sess_id.session_index)
+        && n_recycled < am->fa_max_deleted_sessions_per_interval)
+    {
+      sess_id.thread_index = thread_index;
+      fa_session_t *sess =
+       get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
+      if (sess->link_enqueue_time + fa_session_get_timeout (am, sess) < now)
+       {
+         acl_fa_conn_list_delete_session (am, sess_id, now);
+         /* interface that needs the sessions may not be the interface of the session. */
+         acl_fa_put_session (am, sess->sw_if_index, sess_id);
+         n_recycled++;
+       }
+      else
+       break;                  /* too early to try to recycle from here, bail out */
+      sess_id.session_index = pw->fa_conn_list_head[ACL_TIMEOUT_PURGATORY];
+    }
+  sess_id.session_index = pw->fa_conn_list_head[ACL_TIMEOUT_TCP_TRANSIENT];
+  if (FA_SESSION_BOGUS_INDEX != sess_id.session_index)
     {
       sess_id.thread_index = thread_index;
-      acl_fa_conn_list_delete_session (am, sess_id);
-      acl_fa_delete_session (am, sw_if_index, sess_id);
+      acl_fa_conn_list_delete_session (am, sess_id, now);
+      acl_fa_deactivate_session (am, sw_if_index, sess_id);
+      /* this goes to purgatory list */
+      acl_fa_conn_list_add_session (am, sess_id, now);
     }
 }
 
 
-always_inline fa_session_t *
+always_inline fa_full_session_id_t
 acl_fa_add_session (acl_main_t * am, int is_input, int is_ip6,
                    u32 sw_if_index, u64 now, fa_5tuple_t * p5tuple,
                    u16 current_policy_epoch)
 {
-  clib_bihash_kv_40_8_t *pkv = &p5tuple->kv;
-  clib_bihash_kv_40_8_t kv;
   fa_full_session_id_t f_sess_id;
   uword thread_index = os_get_thread_index ();
   void *oldheap = clib_mem_set_heap (am->acl_mheap);
@@ -409,48 +528,143 @@ acl_fa_add_session (acl_main_t * am, int is_input, int is_ip6,
   f_sess_id.thread_index = thread_index;
   fa_session_t *sess;
 
+  if (f_sess_id.as_u64 == ~0)
+    {
+      clib_error ("Adding session with invalid value");
+    }
+
   pool_get_aligned (pw->fa_sessions_pool, sess, CLIB_CACHE_LINE_BYTES);
   f_sess_id.session_index = sess - pw->fa_sessions_pool;
   f_sess_id.intf_policy_epoch = current_policy_epoch;
 
-  kv.key[0] = pkv->key[0];
-  kv.key[1] = pkv->key[1];
-  kv.key[2] = pkv->key[2];
-  kv.key[3] = pkv->key[3];
-  kv.key[4] = pkv->key[4];
-  kv.value = f_sess_id.as_u64;
+  if (is_ip6)
+    {
+      sess->info.kv_40_8.key[0] = p5tuple->kv_40_8.key[0];
+      sess->info.kv_40_8.key[1] = p5tuple->kv_40_8.key[1];
+      sess->info.kv_40_8.key[2] = p5tuple->kv_40_8.key[2];
+      sess->info.kv_40_8.key[3] = p5tuple->kv_40_8.key[3];
+      sess->info.kv_40_8.key[4] = p5tuple->kv_40_8.key[4];
+      sess->info.kv_40_8.value = f_sess_id.as_u64;
+    }
+  else
+    {
+      sess->info.kv_16_8.key[0] = p5tuple->kv_16_8.key[0];
+      sess->info.kv_16_8.key[1] = p5tuple->kv_16_8.key[1];
+      sess->info.kv_16_8.value = f_sess_id.as_u64;
+    }
 
-  memcpy (sess, pkv, sizeof (pkv->key));
   sess->last_active_time = now;
   sess->sw_if_index = sw_if_index;
   sess->tcp_flags_seen.as_u16 = 0;
   sess->thread_index = thread_index;
-  sess->link_list_id = ~0;
-  sess->link_prev_idx = ~0;
-  sess->link_next_idx = ~0;
-
-  ASSERT (am->fa_sessions_hash_is_initialized == 1);
-  clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv, 1);
-
-  reverse_session_add_del (am, is_ip6, &kv, 1);
+  sess->link_list_id = ACL_TIMEOUT_UNUSED;
+  sess->link_prev_idx = FA_SESSION_BOGUS_INDEX;
+  sess->link_next_idx = FA_SESSION_BOGUS_INDEX;
+  sess->deleted = 0;
+  sess->is_ip6 = is_ip6;
 
   acl_fa_conn_list_add_session (am, f_sess_id, now);
 
+  ASSERT (am->fa_sessions_hash_is_initialized == 1);
+  if (is_ip6)
+    {
+      reverse_session_add_del_ip6 (am, &sess->info.kv_40_8, 1);
+      clib_bihash_add_del_40_8 (&am->fa_ip6_sessions_hash,
+                               &sess->info.kv_40_8, 1);
+    }
+  else
+    {
+      reverse_session_add_del_ip4 (am, &sess->info.kv_16_8, 1);
+      clib_bihash_add_del_16_8 (&am->fa_ip4_sessions_hash,
+                               &sess->info.kv_16_8, 1);
+    }
+
   vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);
   clib_mem_set_heap (oldheap);
   pw->fa_session_adds_by_sw_if_index[sw_if_index]++;
-  clib_smp_atomic_add (&am->fa_session_total_adds, 1);
-  return sess;
+  clib_atomic_fetch_add (&am->fa_session_total_adds, 1);
+  return f_sess_id;
 }
 
 always_inline int
-acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple,
-                    clib_bihash_kv_40_8_t * pvalue_sess)
+acl_fa_find_session (acl_main_t * am, int is_ip6, u32 sw_if_index0,
+                    fa_5tuple_t * p5tuple, u64 * pvalue_sess)
+{
+  int res = 0;
+  if (is_ip6)
+    {
+      clib_bihash_kv_40_8_t kv_result;
+      res = (clib_bihash_search_inline_2_40_8
+            (&am->fa_ip6_sessions_hash, &p5tuple->kv_40_8, &kv_result) == 0);
+      *pvalue_sess = kv_result.value;
+    }
+  else
+    {
+      clib_bihash_kv_16_8_t kv_result;
+      res = (clib_bihash_search_inline_2_16_8
+            (&am->fa_ip4_sessions_hash, &p5tuple->kv_16_8, &kv_result) == 0);
+      *pvalue_sess = kv_result.value;
+    }
+  return res;
+}
+
+always_inline u64
+acl_fa_make_session_hash (acl_main_t * am, int is_ip6, u32 sw_if_index0,
+                         fa_5tuple_t * p5tuple)
+{
+  if (is_ip6)
+    return clib_bihash_hash_40_8 (&p5tuple->kv_40_8);
+  else
+    return clib_bihash_hash_16_8 (&p5tuple->kv_16_8);
+}
+
+always_inline void
+acl_fa_prefetch_session_bucket_for_hash (acl_main_t * am, int is_ip6,
+                                        u64 hash)
 {
-  return (clib_bihash_search_40_8
-         (&am->fa_sessions_hash, &p5tuple->kv, pvalue_sess) == 0);
+  if (is_ip6)
+    clib_bihash_prefetch_bucket_40_8 (&am->fa_ip6_sessions_hash, hash);
+  else
+    clib_bihash_prefetch_bucket_16_8 (&am->fa_ip4_sessions_hash, hash);
 }
 
+always_inline void
+acl_fa_prefetch_session_data_for_hash (acl_main_t * am, int is_ip6, u64 hash)
+{
+  if (is_ip6)
+    clib_bihash_prefetch_data_40_8 (&am->fa_ip6_sessions_hash, hash);
+  else
+    clib_bihash_prefetch_data_16_8 (&am->fa_ip4_sessions_hash, hash);
+}
+
+always_inline int
+acl_fa_find_session_with_hash (acl_main_t * am, int is_ip6, u32 sw_if_index0,
+                              u64 hash, fa_5tuple_t * p5tuple,
+                              u64 * pvalue_sess)
+{
+  int res = 0;
+  if (is_ip6)
+    {
+      clib_bihash_kv_40_8_t kv_result;
+      kv_result.value = ~0ULL;
+      res = (clib_bihash_search_inline_2_with_hash_40_8
+            (&am->fa_ip6_sessions_hash, hash, &p5tuple->kv_40_8,
+             &kv_result) == 0);
+      *pvalue_sess = kv_result.value;
+    }
+  else
+    {
+      clib_bihash_kv_16_8_t kv_result;
+      kv_result.value = ~0ULL;
+      res = (clib_bihash_search_inline_2_with_hash_16_8
+            (&am->fa_ip4_sessions_hash, hash, &p5tuple->kv_16_8,
+             &kv_result) == 0);
+      *pvalue_sess = kv_result.value;
+    }
+  return res;
+}
+
+
 /*
  * fd.io coding-style-patch-verification: ON
  *