acl-plugin: implement an optional session reclassification when ACL is (re-)applied
[vpp.git] / src / plugins / acl / fa_node.c
index c71429e..833b0fa 100644 (file)
 #include <vnet/vnet.h>
 #include <vnet/pg/pg.h>
 #include <vppinfra/error.h>
+
+
 #include <acl/acl.h>
-#include "bihash_40_8.h"
+#include <vnet/ip/icmp46_packet.h>
+
+#include <plugins/acl/fa_node.h>
+#include <plugins/acl/acl.h>
+#include <plugins/acl/lookup_context.h>
+#include <plugins/acl/public_inlines.h>
 
+#include <vppinfra/bihash_40_8.h>
 #include <vppinfra/bihash_template.h>
 #include <vppinfra/bihash_template.c>
 
-#include "fa_node.h"
-
 typedef struct
 {
   u32 next_index;
   u32 sw_if_index;
+  u32 lc_index;
   u32 match_acl_in_index;
   u32 match_rule_index;
   u64 packet_info[6];
@@ -38,6 +45,61 @@ typedef struct
   u8 action;
 } acl_fa_trace_t;
 
+/* ICMPv4 invert type for stateful ACL */
+static const u8 icmp4_invmap[] = {
+  [ICMP4_echo_reply] = ICMP4_echo_request + 1,
+  [ICMP4_timestamp_reply] = ICMP4_timestamp_request + 1,
+  [ICMP4_information_reply] = ICMP4_information_request + 1,
+  [ICMP4_address_mask_reply] = ICMP4_address_mask_request + 1
+};
+
+/* Supported ICMPv4 messages for session creation */
+static const u8 icmp4_valid_new[] = {
+  [ICMP4_echo_request] = 1,
+  [ICMP4_timestamp_request] = 1,
+  [ICMP4_information_request] = 1,
+  [ICMP4_address_mask_request] = 1
+};
+
+/* ICMPv6 invert type for stateful ACL */
+static const u8 icmp6_invmap[] = {
+  [ICMP6_echo_reply - 128]   = ICMP6_echo_request + 1,
+  [ICMP6_node_information_response - 128] = ICMP6_node_information_request + 1
+};
+
+/* Supported ICMPv6 messages for session creation */
+static const u8 icmp6_valid_new[] = {
+  [ICMP6_echo_request - 128] = 1,
+  [ICMP6_node_information_request - 128] = 1
+};
+
+/* IP4 and IP6 protocol numbers of ICMP */
+static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 };
+
+static u8 *
+format_fa_5tuple (u8 * s, va_list * args)
+{
+  fa_5tuple_t *p5t = va_arg (*args, fa_5tuple_t *);
+
+  return format(s, "lc_index %d (lsb16 of sw_if_index %d) l3 %s%s %U -> %U"
+                   " l4 proto %d l4_valid %d port %d -> %d tcp flags (%s) %02x rsvd %x",
+                p5t->pkt.lc_index, p5t->l4.lsb_of_sw_if_index, p5t->pkt.is_ip6 ? "ip6" : "ip4",
+                p5t->pkt.is_nonfirst_fragment ? " non-initial fragment" : "",
+                format_ip46_address, &p5t->addr[0], p5t->pkt.is_ip6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+                format_ip46_address, &p5t->addr[1], p5t->pkt.is_ip6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+                p5t->l4.proto, p5t->pkt.l4_valid,
+                p5t->l4.port[0], p5t->l4.port[1],
+                p5t->pkt.tcp_flags_valid ? "valid": "invalid",
+                p5t->pkt.tcp_flags,
+                p5t->pkt.flags_reserved);
+}
+
+u8 *
+format_acl_plugin_5tuple (u8 * s, va_list * args)
+{
+  return format_fa_5tuple(s, args);
+}
+
 /* packet trace format function */
 static u8 *
 format_acl_fa_trace (u8 * s, va_list * args)
@@ -48,12 +110,15 @@ format_acl_fa_trace (u8 * s, va_list * args)
 
   s =
     format (s,
-           "acl-plugin: sw_if_index %d, next index %d, action: %d, match: acl %d rule %d trace_bits %08x\n"
+           "acl-plugin: lc_index: %d, sw_if_index %d, next index %d, action: %d, match: acl %d rule %d trace_bits %08x\n"
            "  pkt info %016llx %016llx %016llx %016llx %016llx %016llx",
-           t->sw_if_index, t->next_index, t->action, t->match_acl_in_index,
+           t->lc_index, t->sw_if_index, t->next_index, t->action, t->match_acl_in_index,
            t->match_rule_index, t->trace_bitmap,
            t->packet_info[0], t->packet_info[1], t->packet_info[2],
            t->packet_info[3], t->packet_info[4], t->packet_info[5]);
+
+  /* Now also print out the packet_info in a form usable by humans */
+  s = format (s, "\n   %U", format_fa_5tuple, t->packet_info);
   return s;
 }
 
@@ -83,434 +148,88 @@ static char *acl_fa_error_strings[] = {
 };
 /* *INDENT-ON* */
 
-static void *
-get_ptr_to_offset (vlib_buffer_t * b0, int offset)
-{
-  u8 *p = vlib_buffer_get_current (b0) + offset;
-  return p;
-}
-
-
 static int
-fa_acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2,
-                  int prefixlen, int is_ip6)
+acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0)
 {
-  if (prefixlen == 0)
-    {
-      /* match any always succeeds */
-      return 1;
-    }
-  if (is_ip6)
-    {
-      if (memcmp (addr1, addr2, prefixlen / 8))
-       {
-         /* If the starting full bytes do not match, no point in bittwidling the thumbs further */
-         return 0;
-       }
-      if (prefixlen % 8)
-       {
-         u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8);
-         u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8);
-         u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1));
-         return (b1 & mask0) == b2;
-       }
-      else
-       {
-         /* The prefix fits into integer number of bytes, so nothing left to do */
-         return 1;
-       }
-    }
-  else
-    {
-      uint32_t a1 = ntohl (addr1->ip4.as_u32);
-      uint32_t a2 = ntohl (addr2->ip4.as_u32);
-      uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1);
-      return (a1 & mask0) == a2;
-    }
+  return am->fa_sessions_hash_is_initialized;
 }
 
 static int
-fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6)
+acl_fa_ifc_has_in_acl (acl_main_t * am, int sw_if_index0)
 {
-  return ((port >= port_first) && (port <= port_last));
+  int it_has = clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index0);
+  return it_has;
 }
 
-int
-acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple,
-                 int is_ip6, u8 * r_action, u32 * r_acl_match_p,
-                 u32 * r_rule_match_p, u32 * trace_bitmap)
+static int
+acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0)
 {
-  int i;
-  acl_list_t *a;
-  acl_rule_t *r;
-
-  if (pool_is_free_index (am->acls, acl_index))
-    {
-      if (r_acl_match_p)
-       *r_acl_match_p = acl_index;
-      if (r_rule_match_p)
-       *r_rule_match_p = -1;
-      /* the ACL does not exist but is used for policy. Block traffic. */
-      return 0;
-    }
-  a = am->acls + acl_index;
-  for (i = 0; i < a->count; i++)
-    {
-      r = a->rules + i;
-      if (is_ip6 != r->is_ipv6)
-       {
-         continue;
-       }
-      if (!fa_acl_match_addr
-         (&pkt_5tuple->addr[1], &r->dst, r->dst_prefixlen, is_ip6))
-       continue;
-
-#ifdef FA_NODE_VERBOSE_DEBUG
-      clib_warning
-       ("ACL_FA_NODE_DBG acl %d rule %d pkt dst addr %U match rule addr %U/%d",
-        acl_index, i, format_ip46_address, &pkt_5tuple->addr[1],
-        IP46_TYPE_ANY, format_ip46_address, &r->dst, IP46_TYPE_ANY,
-        r->dst_prefixlen);
-#endif
-
-      if (!fa_acl_match_addr
-         (&pkt_5tuple->addr[0], &r->src, r->src_prefixlen, is_ip6))
-       continue;
-
-#ifdef FA_NODE_VERBOSE_DEBUG
-      clib_warning
-       ("ACL_FA_NODE_DBG acl %d rule %d pkt src addr %U match rule addr %U/%d",
-        acl_index, i, format_ip46_address, &pkt_5tuple->addr[0],
-        IP46_TYPE_ANY, format_ip46_address, &r->src, IP46_TYPE_ANY,
-        r->src_prefixlen);
-      clib_warning
-       ("ACL_FA_NODE_DBG acl %d rule %d trying to match pkt proto %d with rule %d",
-        acl_index, i, pkt_5tuple->l4.proto, r->proto);
-#endif
-      if (r->proto)
-       {
-         if (pkt_5tuple->l4.proto != r->proto)
-           continue;
-
-          if (PREDICT_FALSE (pkt_5tuple->pkt.is_nonfirst_fragment &&
-                     am->l4_match_nonfirst_fragment))
-          {
-            /* non-initial fragment with frag match configured - match this rule */
-            *trace_bitmap |= 0x80000000;
-            *r_action = r->is_permit;
-            if (r_acl_match_p)
-             *r_acl_match_p = acl_index;
-            if (r_rule_match_p)
-             *r_rule_match_p = i;
-            return 1;
-          }
-
-         /* A sanity check just to ensure we are about to match the ports extracted from the packet */
-         if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid))
-           continue;
-
-#ifdef FA_NODE_VERBOSE_DEBUG
-         clib_warning
-           ("ACL_FA_NODE_DBG acl %d rule %d pkt proto %d match rule %d",
-            acl_index, i, pkt_5tuple->l4.proto, r->proto);
-#endif
-
-         if (!fa_acl_match_port
-             (pkt_5tuple->l4.port[0], r->src_port_or_type_first,
-              r->src_port_or_type_last, is_ip6))
-           continue;
-
-#ifdef FA_NODE_VERBOSE_DEBUG
-         clib_warning
-           ("ACL_FA_NODE_DBG acl %d rule %d pkt sport %d match rule [%d..%d]",
-            acl_index, i, pkt_5tuple->l4.port[0], r->src_port_or_type_first,
-            r->src_port_or_type_last);
-#endif
-
-         if (!fa_acl_match_port
-             (pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
-              r->dst_port_or_code_last, is_ip6))
-           continue;
-
-#ifdef FA_NODE_VERBOSE_DEBUG
-         clib_warning
-           ("ACL_FA_NODE_DBG acl %d rule %d pkt dport %d match rule [%d..%d]",
-            acl_index, i, pkt_5tuple->l4.port[1], r->dst_port_or_code_first,
-            r->dst_port_or_code_last);
-#endif
-         if (pkt_5tuple->pkt.tcp_flags_valid
-             && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) !=
-                 r->tcp_flags_value))
-           continue;
-       }
-      /* everything matches! */
-#ifdef FA_NODE_VERBOSE_DEBUG
-      clib_warning ("ACL_FA_NODE_DBG acl %d rule %d FULL-MATCH, action %d",
-                   acl_index, i, r->is_permit);
-#endif
-      *r_action = r->is_permit;
-      if (r_acl_match_p)
-       *r_acl_match_p = acl_index;
-      if (r_rule_match_p)
-       *r_rule_match_p = i;
-      return 1;
-    }
-  return 0;
+  int it_has = clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index0);
+  return it_has;
 }
 
-static u8
-full_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
-                      int is_ip6, int is_input, u32 * acl_match_p,
-                      u32 * rule_match_p, u32 * trace_bitmap)
+/* Session keys match the packets received, and mirror the packets sent */
+static u32
+acl_make_5tuple_session_key (acl_main_t * am, int is_input, int is_ip6,
+                             u32 sw_if_index, fa_5tuple_t * p5tuple_pkt,
+                             fa_5tuple_t * p5tuple_sess)
 {
-  acl_main_t *am = &acl_main;
-  int i;
-  u32 *acl_vector;
-  u8 action = 0;
+  int src_index = is_input ? 0 : 1;
+  int dst_index = is_input ? 1 : 0;
+  u32 valid_new_sess = 1;
+  p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0];
+  p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1];
+  p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64;
 
-  if (is_input)
+  if (PREDICT_TRUE(p5tuple_pkt->l4.proto != icmp_protos[is_ip6]))
     {
-      vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
-      acl_vector = am->input_acl_vec_by_sw_if_index[sw_if_index];
+      p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0];
+      p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1];
     }
   else
     {
-      vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
-      acl_vector = am->output_acl_vec_by_sw_if_index[sw_if_index];
-    }
-  for (i = 0; i < vec_len (acl_vector); i++)
-    {
-#ifdef FA_NODE_VERBOSE_DEBUG
-      clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d",
-                   acl_vector[i]);
-#endif
-      if (acl_match_5tuple
-         (am, acl_vector[i], pkt_5tuple, is_ip6, &action,
-          acl_match_p, rule_match_p, trace_bitmap))
-       {
-         return action;
-       }
-    }
-  if (vec_len (acl_vector) > 0)
-    {
-      /* If there are ACLs and none matched, deny by default */
-      return 0;
-    }
-#ifdef FA_NODE_VERBOSE_DEBUG
-  clib_warning ("ACL_FA_NODE_DBG: No ACL on sw_if_index %d", sw_if_index);
-#endif
-  /* Deny by default. If there are no ACLs defined we should not be here. */
-  return 0;
-}
-
-static int
-offset_within_packet (vlib_buffer_t * b0, int offset)
-{
-  /* For the purposes of this code, "within" means we have at least 8 bytes after it */
-  return (offset <= (b0->current_length - 8));
-}
-
-static void
-acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
-                int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt)
-{
-  int l3_offset = 14;
-  int l4_offset;
-  u16 ports[2];
-  u16 proto;
-  /* IP4 and IP6 protocol numbers of ICMP */
-  static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 };
-
-  if (is_input && !(is_l2_path))
-    {
-      l3_offset = 0;
-    }
+      static const u8 * icmp_invmap[] = { icmp4_invmap, icmp6_invmap };
+      static const u8 * icmp_valid_new[] = { icmp4_valid_new, icmp6_valid_new };
+      static const u8 icmp_invmap_size[] = { sizeof(icmp4_invmap),
+                                             sizeof(icmp6_invmap) };
+      static const u8 icmp_valid_new_size[] = { sizeof(icmp4_valid_new),
+                                                sizeof(icmp6_valid_new) };
+      int type = is_ip6 ? p5tuple_pkt->l4.port[0]-128: p5tuple_pkt->l4.port[0];
 
-  /* key[0..3] contains src/dst address and is cleared/set below */
-  /* Remainder of the key and per-packet non-key data */
-  p5tuple_pkt->kv.key[4] = 0;
-  p5tuple_pkt->kv.value = 0;
+      p5tuple_sess->l4.port[0] = p5tuple_pkt->l4.port[0];
+      p5tuple_sess->l4.port[1] = p5tuple_pkt->l4.port[1];
 
-  if (is_ip6)
-    {
-      clib_memcpy (&p5tuple_pkt->addr,
-                  get_ptr_to_offset (b0,
-                                     offsetof (ip6_header_t,
-                                               src_address) + l3_offset),
-                  sizeof (p5tuple_pkt->addr));
-      proto =
-       *(u8 *) get_ptr_to_offset (b0,
-                                  offsetof (ip6_header_t,
-                                            protocol) + l3_offset);
-      l4_offset = l3_offset + sizeof (ip6_header_t);
-#ifdef FA_NODE_VERBOSE_DEBUG
-      clib_warning ("ACL_FA_NODE_DBG: proto: %d, l4_offset: %d", proto,
-                   l4_offset);
-#endif
-      /* IP6 EH handling is here, increment l4_offset if needs to, update the proto */
-      int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
-      if (PREDICT_FALSE (need_skip_eh))
-       {
-         while (need_skip_eh && offset_within_packet (b0, l4_offset))
-           {
-             /* Fragment header needs special handling */
-             if (PREDICT_FALSE(ACL_EH_FRAGMENT == proto))
-               {
-                 proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
-                 u16 frag_offset;
-                 clib_memcpy (&frag_offset, get_ptr_to_offset (b0, 2 + l4_offset), sizeof(frag_offset));
-                 frag_offset = ntohs(frag_offset) >> 3;
-                 if (frag_offset)
-                   {
-                      p5tuple_pkt->pkt.is_nonfirst_fragment = 1;
-                      /* invalidate L4 offset so we don't try to find L4 info */
-                      l4_offset += b0->current_length;
-                   }
-                 else
-                   {
-                     /* First fragment: skip the frag header and move on. */
-                     l4_offset += 8;
-                   }
-               }
-              else
-                {
-                 u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset);
-                 proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
-                 l4_offset += 8 * (1 + (u16) nwords);
-                }
-#ifdef FA_NODE_VERBOSE_DEBUG
-             clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d",
-                           proto, l4_offset);
-#endif
-             need_skip_eh =
-               clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
-           }
-       }
-    }
-  else
-    {
-      p5tuple_pkt->kv.key[0] = 0;
-      p5tuple_pkt->kv.key[1] = 0;
-      p5tuple_pkt->kv.key[2] = 0;
-      p5tuple_pkt->kv.key[3] = 0;
-      clib_memcpy (&p5tuple_pkt->addr[0].ip4,
-                  get_ptr_to_offset (b0,
-                                     offsetof (ip4_header_t,
-                                               src_address) + l3_offset),
-                  sizeof (p5tuple_pkt->addr[0].ip4));
-      clib_memcpy (&p5tuple_pkt->addr[1].ip4,
-                  get_ptr_to_offset (b0,
-                                     offsetof (ip4_header_t,
-                                               dst_address) + l3_offset),
-                  sizeof (p5tuple_pkt->addr[1].ip4));
-      proto =
-       *(u8 *) get_ptr_to_offset (b0,
-                                  offsetof (ip4_header_t,
-                                            protocol) + l3_offset);
-      l4_offset = l3_offset + sizeof (ip4_header_t);
-      u16 flags_and_fragment_offset;
-      clib_memcpy (&flags_and_fragment_offset,
-                   get_ptr_to_offset (b0,
-                                      offsetof (ip4_header_t,
-                                                flags_and_fragment_offset)) + l3_offset,
-                                                sizeof(flags_and_fragment_offset));
-      flags_and_fragment_offset = ntohs (flags_and_fragment_offset);
-
-      /* non-initial fragments have non-zero offset */
-      if ((PREDICT_FALSE(0xfff & flags_and_fragment_offset)))
+      /*
+       * Invert ICMP type for valid icmp_invmap messages:
+       *  1) input node with outbound ACL interface
+       *  2) output node with inbound ACL interface
+       *
+       */
+      if ((is_input && acl_fa_ifc_has_out_acl(am, sw_if_index)) ||
+          (!is_input && acl_fa_ifc_has_in_acl(am, sw_if_index)))
         {
-          p5tuple_pkt->pkt.is_nonfirst_fragment = 1;
-          /* invalidate L4 offset so we don't try to find L4 info */
-          l4_offset += b0->current_length;
+          if (type >= 0 &&
+              type <= icmp_invmap_size[is_ip6] &&
+              icmp_invmap[is_ip6][type])
+            {
+              p5tuple_sess->l4.port[0] = icmp_invmap[is_ip6][type] - 1;
+            }
         }
 
-    }
-  p5tuple_pkt->l4.proto = proto;
-  if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
-    {
-      p5tuple_pkt->pkt.l4_valid = 1;
-      if (icmp_protos[is_ip6] == proto)
-       {
-         /* type */
-         p5tuple_pkt->l4.port[0] =
-           *(u8 *) get_ptr_to_offset (b0,
-                                      l4_offset + offsetof (icmp46_header_t,
-                                                            type));
-         /* code */
-         p5tuple_pkt->l4.port[1] =
-           *(u8 *) get_ptr_to_offset (b0,
-                                      l4_offset + offsetof (icmp46_header_t,
-                                                            code));
-       }
-      else if ((IPPROTO_TCP == proto) || (IPPROTO_UDP == proto))
-       {
-         clib_memcpy (&ports,
-                      get_ptr_to_offset (b0,
-                                         l4_offset + offsetof (tcp_header_t,
-                                                               src_port)),
-                      sizeof (ports));
-         p5tuple_pkt->l4.port[0] = ntohs (ports[0]);
-         p5tuple_pkt->l4.port[1] = ntohs (ports[1]);
-
-         p5tuple_pkt->pkt.tcp_flags =
-           *(u8 *) get_ptr_to_offset (b0,
-                                      l4_offset + offsetof (tcp_header_t,
-                                                            flags));
-         p5tuple_pkt->pkt.tcp_flags_valid = (proto == IPPROTO_TCP);
-       }
       /*
-       * FIXME: rather than the above conditional, here could
-       * be a nice generic mechanism to extract two L4 values:
-       *
-       * have a per-protocol array of 4 elements like this:
-       *   u8 offset; to take the byte from, off L4 header
-       *   u8 mask; to mask it with, before storing
-       *
-       * this way we can describe UDP, TCP and ICMP[46] semantics,
-       * and add a sort of FPM-type behavior for other protocols.
-       *
-       * Of course, is it faster ? and is it needed ?
-       *
+       * ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table
+       * are allowed to create stateful ACL.
+       * The other messages will be forwarded without creating a reflexive ACL.
        */
+      if (type < 0 ||
+          type > icmp_valid_new_size[is_ip6] ||
+          !icmp_valid_new[is_ip6][type])
+        {
+          valid_new_sess = 0;
+        }
     }
-}
 
-
-/* Session keys match the packets received, and mirror the packets sent */
-static void
-acl_make_5tuple_session_key (int is_input, fa_5tuple_t * p5tuple_pkt,
-                            fa_5tuple_t * p5tuple_sess)
-{
-  int src_index = is_input ? 0 : 1;
-  int dst_index = is_input ? 1 : 0;
-  p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0];
-  p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1];
-  p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64;
-  p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0];
-  p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1];
-}
-
-
-static int
-acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0)
-{
-  int has_sessions =
-    clib_bitmap_get (am->fa_sessions_on_sw_if_index, sw_if_index0);
-  return has_sessions;
-}
-
-static int
-acl_fa_ifc_has_in_acl (acl_main_t * am, int sw_if_index0)
-{
-  int it_has = clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index0);
-  return it_has;
-}
-
-static int
-acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0)
-{
-  int it_has = clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index0);
-  return it_has;
+    return valid_new_sess;
 }
 
 
@@ -542,6 +261,39 @@ fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
 }
 
 
+static u64
+fa_session_get_shortest_timeout(acl_main_t * am)
+{
+  int timeout_type;
+  u64 timeout = ~0LL;
+  for(timeout_type = 0; timeout_type < ACL_N_TIMEOUTS; timeout_type++) {
+    if (timeout > am->session_timeout_sec[timeout_type]) {
+      timeout = am->session_timeout_sec[timeout_type];
+    }
+  }
+  return timeout;
+}
+
+/*
+ * Get the timeout of the session in a list since its enqueue time.
+ */
+
+static u64
+fa_session_get_list_timeout (acl_main_t * am, fa_session_t * sess)
+{
+  u64 timeout = am->vlib_main->clib_time.clocks_per_second;
+  /*
+   * we have the shortest possible timeout type in all the lists
+   * (see README-multicore for the rationale)
+   */
+  timeout *= fa_session_get_shortest_timeout(am);
+  return timeout;
+}
+
+/*
+ * Get the idle timeout of a session.
+ */
+
 static u64
 fa_session_get_timeout (acl_main_t * am, fa_session_t * sess)
 {
@@ -552,85 +304,129 @@ fa_session_get_timeout (acl_main_t * am, fa_session_t * sess)
 }
 
 static void
-acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0)
+acl_fa_verify_init_sessions (acl_main_t * am)
 {
-#ifdef FA_NODE_VERBOSE_DEBUG
-  clib_warning
-    ("Initializing bihash for sw_if_index %d num buckets %lu memory size %llu",
-     sw_if_index0, am->fa_conn_table_hash_num_buckets,
-     am->fa_conn_table_hash_memory_size);
-#endif
-  vec_validate (am->fa_sessions_by_sw_if_index, sw_if_index0);
-  BV (clib_bihash_init) (&am->fa_sessions_by_sw_if_index
-                        [sw_if_index0], "ACL plugin FA session bihash",
+  if (!am->fa_sessions_hash_is_initialized) {
+    u16 wk;
+    /* Allocate the per-worker sessions pools */
+    for (wk = 0; wk < vec_len (am->per_worker_data); wk++) {
+      acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk];
+
+      /*
+      * // In lieu of trying to preallocate the pool and its free bitmap, rather use pool_init_fixed
+      * pool_alloc_aligned(pw->fa_sessions_pool, am->fa_conn_table_max_entries, CLIB_CACHE_LINE_BYTES);
+      * clib_bitmap_validate(pool_header(pw->fa_sessions_pool)->free_bitmap, am->fa_conn_table_max_entries);
+      */
+      pool_init_fixed(pw->fa_sessions_pool, am->fa_conn_table_max_entries);
+    }
+
+    /* ... and the interface session hash table */
+    BV (clib_bihash_init) (&am->fa_sessions_hash,
+                        "ACL plugin FA session bihash",
                         am->fa_conn_table_hash_num_buckets,
                         am->fa_conn_table_hash_memory_size);
-  am->fa_sessions_on_sw_if_index =
-    clib_bitmap_set (am->fa_sessions_on_sw_if_index, sw_if_index0, 1);
+    am->fa_sessions_hash_is_initialized = 1;
+  }
+}
+
+static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index)
+{
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+  fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool, session_index) ? 0 : pool_elt_at_index(pw->fa_sessions_pool, session_index);
+  return sess;
+}
+
+static inline int is_valid_session_ptr(acl_main_t *am, u16 thread_index, fa_session_t *sess)
+{
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+  return ((sess != 0) && ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool)));
 }
 
 static void
-acl_fa_conn_list_add_session (acl_main_t * am, u32 sess_id, u64 now)
+acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 now)
 {
-  fa_session_t *sess = am->fa_sessions_pool + sess_id;
+  fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
   u8 list_id = fa_session_get_timeout_type(am, sess);
+  uword thread_index = os_get_thread_index ();
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+  /* the retrieved session thread index must be necessarily the same as the one in the key */
+  ASSERT (sess->thread_index == sess_id.thread_index);
+  /* the retrieved session thread index must be the same as current thread */
+  ASSERT (sess->thread_index == thread_index);
   sess->link_enqueue_time = now;
   sess->link_list_id = list_id;
   sess->link_next_idx = ~0;
-  sess->link_prev_idx = am->fa_conn_list_tail[list_id];
-  if (~0 != am->fa_conn_list_tail[list_id]) {
-    fa_session_t *prev_sess = am->fa_sessions_pool + am->fa_conn_list_tail[list_id];
-    prev_sess->link_next_idx = sess_id;
+  sess->link_prev_idx = pw->fa_conn_list_tail[list_id];
+  if (~0 != pw->fa_conn_list_tail[list_id]) {
+    fa_session_t *prev_sess = get_session_ptr(am, thread_index, pw->fa_conn_list_tail[list_id]);
+    prev_sess->link_next_idx = sess_id.session_index;
+    /* We should never try to link with a session on another thread */
+    ASSERT(prev_sess->thread_index == sess->thread_index);
   }
-  am->fa_conn_list_tail[list_id] = sess_id;
-
-  if (~0 == am->fa_conn_list_head[list_id]) {
-    am->fa_conn_list_head[list_id] = sess_id;
-    /* If it is a first conn in any list, kick off the cleaner */
-    vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
-                                 ACL_FA_CLEANER_RESCHEDULE, 0);
+  pw->fa_conn_list_tail[list_id] = sess_id.session_index;
+  
+#ifdef FA_NODE_VERBOSE_DEBUG
+    clib_warning("FA-SESSION-DEBUG: add session id %d on thread %d sw_if_index %d", sess_id.session_index, thread_index, sess->sw_if_index);
+#endif
+  pw->serviced_sw_if_index_bitmap = clib_bitmap_set(pw->serviced_sw_if_index_bitmap, sess->sw_if_index, 1);
 
+  if (~0 == pw->fa_conn_list_head[list_id]) {
+    pw->fa_conn_list_head[list_id] = sess_id.session_index;
   }
 }
 
-static void
-acl_fa_conn_list_delete_session (acl_main_t *am, u32 sess_id)
+static int
+acl_fa_conn_list_delete_session (acl_main_t *am, fa_full_session_id_t sess_id)
 {
-  fa_session_t *sess = am->fa_sessions_pool + sess_id;
+  uword thread_index = os_get_thread_index ();
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+  if (thread_index != sess_id.thread_index) {
+    /* If another thread attempts to delete the session, fail it. */
+#ifdef FA_NODE_VERBOSE_DEBUG
+    clib_warning("thread id in key %d != curr thread index, not deleting");
+#endif
+    return 0;
+  }
+  fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
+  /* we should never try to delete the session with another thread index */
+  ASSERT(sess->thread_index == thread_index);
   if (~0 != sess->link_prev_idx) {
-    fa_session_t *prev_sess = am->fa_sessions_pool + sess->link_prev_idx;
+    fa_session_t *prev_sess = get_session_ptr(am, thread_index, sess->link_prev_idx);
+    /* the previous session must be in the same list as this one */
+    ASSERT(prev_sess->link_list_id == sess->link_list_id);
     prev_sess->link_next_idx = sess->link_next_idx;
-    if (prev_sess->link_list_id != sess->link_list_id)
-      clib_warning("(prev_sess->link_list_id != sess->link_list_id)");
   }
   if (~0 != sess->link_next_idx) {
-    fa_session_t *next_sess = am->fa_sessions_pool + sess->link_next_idx;
+    fa_session_t *next_sess = get_session_ptr(am, thread_index, sess->link_next_idx);
+    /* The next session must be in the same list as the one we are deleting */
+    ASSERT(next_sess->link_list_id == sess->link_list_id);
     next_sess->link_prev_idx = sess->link_prev_idx;
-    if (next_sess->link_list_id != sess->link_list_id)
-      clib_warning("(next_sess->link_list_id != sess->link_list_id)");
   }
-  if (am->fa_conn_list_head[sess->link_list_id] == sess_id) {
-    am->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx;
+  if (pw->fa_conn_list_head[sess->link_list_id] == sess_id.session_index) {
+    pw->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx;
   }
-  if (am->fa_conn_list_tail[sess->link_list_id] == sess_id) {
-    am->fa_conn_list_tail[sess->link_list_id] = sess->link_prev_idx;
+  if (pw->fa_conn_list_tail[sess->link_list_id] == sess_id.session_index) {
+    pw->fa_conn_list_tail[sess->link_list_id] = sess->link_prev_idx;
   }
+  return 1;
 }
 
-
-int
-acl_fa_session_is_dead (acl_main_t * am, u32 sw_if_index, u64 now,
-                       u32 sess_id)
-{
-  return 0;
-}
-
-static void
-acl_fa_restart_timer_for_session (acl_main_t * am, u64 now, u32 sess_id)
+static int
+acl_fa_restart_timer_for_session (acl_main_t * am, u64 now, fa_full_session_id_t sess_id)
 {
-  // fa_session_t *sess = am->fa_sessions_pool + sess_id;
-  acl_fa_conn_list_delete_session(am, sess_id);
-  acl_fa_conn_list_add_session(am, sess_id, now);
+  if (acl_fa_conn_list_delete_session(am, sess_id)) {
+    acl_fa_conn_list_add_session(am, sess_id, now);
+    return 1;
+  } else {
+    /*
+     * Our thread does not own this connection, so we can not delete
+     * The session. To avoid the complicated signaling, we simply
+     * pick the list waiting time to be the shortest of the timeouts.
+     * This way we do not have to do anything special, and let
+     * the regular requeue check take care of everything.
+     */
+    return 0;
+  }
 }
 
 
@@ -648,92 +444,202 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
 
 
 static void
-acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, u32 sess_id)
+acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t sess_id)
 {
-  fa_session_t *sess = (fa_session_t *) am->fa_sessions_pool + sess_id;
-  BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index],
+  void *oldheap = clib_mem_set_heap(am->acl_mheap);
+  fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
+  ASSERT(sess->thread_index == os_get_thread_index ());
+  BV (clib_bihash_add_del) (&am->fa_sessions_hash,
                            &sess->info.kv, 0);
-  pool_put_index (am->fa_sessions_pool, sess_id);
-  /* Deleting from timer wheel not needed, as the cleaner deals with the timers. */
-  vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index);
-  am->fa_session_dels_by_sw_if_index[sw_if_index]++;
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index];
+  pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
+  /* Deleting from timer structures not needed,
+     as the caller must have dealt with the timers. */
+  vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index);
+  clib_mem_set_heap (oldheap);
+  pw->fa_session_dels_by_sw_if_index[sw_if_index]++;
+  clib_smp_atomic_add(&am->fa_session_total_dels, 1);
 }
 
 static int
 acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index)
 {
-  u64 curr_sess;
-  vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index);
-  vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index);
-  curr_sess =
-    am->fa_session_adds_by_sw_if_index[sw_if_index] -
-    am->fa_session_dels_by_sw_if_index[sw_if_index];
-  return (curr_sess < am->fa_conn_table_max_entries);
+  u64 curr_sess_count;
+  curr_sess_count = am->fa_session_total_adds - am->fa_session_total_dels;
+  return (curr_sess_count < am->fa_conn_table_max_entries);
+}
+
+static u64
+acl_fa_get_list_head_expiry_time(acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, int timeout_type)
+{
+  fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]);
+  /*
+   * We can not check just the index here because inbetween the worker thread might
+   * dequeue the connection from the head just as we are about to check it.
+   */
+  if (!is_valid_session_ptr(am, thread_index, sess)) {
+    return ~0LL; // infinity.
+  } else {
+    u64 timeout_time =
+              sess->link_enqueue_time + fa_session_get_list_timeout (am, sess);
+    return timeout_time;
+  }
+}
+
+static int
+acl_fa_conn_time_to_check (acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, u32 session_index)
+{
+  fa_session_t *sess = get_session_ptr(am, thread_index, session_index);
+  u64 timeout_time =
+              sess->link_enqueue_time + fa_session_get_list_timeout (am, sess);
+  return (timeout_time < now) || (sess->link_enqueue_time <= pw->swipe_end_time);
+}
+
+/*
+ * see if there are sessions ready to be checked,
+ * do the maintenance (requeue or delete), and
+ * return the total number of sessions reclaimed.
+ */
+static int
+acl_fa_check_idle_sessions(acl_main_t *am, u16 thread_index, u64 now)
+{
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+  fa_full_session_id_t fsid;
+  fsid.thread_index = thread_index;
+  int total_expired = 0;
+
+  {
+    u8 tt = 0;
+    for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) {
+      while((vec_len(pw->expired) < am->fa_max_deleted_sessions_per_interval)
+           && (~0 != pw->fa_conn_list_head[tt])
+           && (acl_fa_conn_time_to_check(am, pw, now, thread_index,
+                                         pw->fa_conn_list_head[tt]))) {
+       fsid.session_index = pw->fa_conn_list_head[tt];
+        elog_acl_maybe_trace_X2(am, "acl_fa_check_idle_sessions: expire session %d on thread %d", "i4i4", (u32)fsid.session_index, (u32)thread_index);
+       vec_add1(pw->expired, fsid.session_index);
+       acl_fa_conn_list_delete_session(am, fsid);
+      }
+    }
+  }
+
+  u32 *psid = NULL;
+  vec_foreach (psid, pw->expired)
+  {
+    fsid.session_index = *psid;
+    if (!pool_is_free_index (pw->fa_sessions_pool, fsid.session_index))
+      {
+       fa_session_t *sess = get_session_ptr(am, thread_index, fsid.session_index);
+       u32 sw_if_index = sess->sw_if_index;
+       u64 sess_timeout_time =
+         sess->last_active_time + fa_session_get_timeout (am, sess);
+       if ((now < sess_timeout_time) && (0 == clib_bitmap_get(pw->pending_clear_sw_if_index_bitmap, sw_if_index)))
+         {
+#ifdef FA_NODE_VERBOSE_DEBUG
+           clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d, sw_if_index %d",
+              (int) fsid.session_index, sess->sw_if_index);
+#endif
+           /* There was activity on the session, so the idle timeout
+              has not passed. Enqueue for another time period. */
+
+           acl_fa_conn_list_add_session(am, fsid, now);
+           pw->cnt_session_timer_restarted++;
+         }
+       else
+         {
+#ifdef FA_NODE_VERBOSE_DEBUG
+           clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d, sw_if_index %d",
+              (int) fsid.session_index, sess->sw_if_index);
+#endif
+           acl_fa_delete_session (am, sw_if_index, fsid);
+           pw->cnt_deleted_sessions++;
+         }
+      }
+    else
+      {
+       pw->cnt_already_deleted_sessions++;
+      }
+  }
+  total_expired = vec_len(pw->expired);
+  /* zero out the vector which we have acted on */
+  if (pw->expired)
+    _vec_len (pw->expired) = 0;
+  /* if we were advancing and reached the end
+   * (no more sessions to recycle), reset the fast-forward timestamp */
+
+  if (pw->swipe_end_time && 0 == total_expired)
+    pw->swipe_end_time = 0;
+  return (total_expired);
 }
 
 always_inline void
-acl_fa_try_recycle_session (acl_main_t * am, int is_input, u32 sw_if_index)
+acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index, u32 sw_if_index)
 {
   /* try to recycle a TCP transient session */
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
   u8 timeout_type = ACL_TIMEOUT_TCP_TRANSIENT;
-  u32 sess_id = am->fa_conn_list_head[timeout_type];
-  if (~0 != sess_id) {
+  fa_full_session_id_t sess_id;
+  sess_id.session_index = pw->fa_conn_list_head[timeout_type];
+  if (~0 != sess_id.session_index) {
+    sess_id.thread_index = thread_index;
     acl_fa_conn_list_delete_session(am, sess_id);
     acl_fa_delete_session(am, sw_if_index, sess_id);
   }
 }
 
-static void
+static fa_session_t *
 acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
-                   fa_5tuple_t * p5tuple)
+                   fa_5tuple_t * p5tuple, u16 current_policy_epoch)
 {
   clib_bihash_kv_40_8_t *pkv = &p5tuple->kv;
   clib_bihash_kv_40_8_t kv;
-  u32 sess_id;
-  fa_session_t *sess;
+  fa_full_session_id_t f_sess_id;
+  uword thread_index = os_get_thread_index();
+  void *oldheap = clib_mem_set_heap(am->acl_mheap);
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
 
-  pool_get (am->fa_sessions_pool, sess);
-  sess_id = sess - am->fa_sessions_pool;
+  f_sess_id.thread_index = thread_index;
+  fa_session_t *sess;
 
+  pool_get_aligned (pw->fa_sessions_pool, sess, CLIB_CACHE_LINE_BYTES);
+  f_sess_id.session_index = sess - pw->fa_sessions_pool;
+  f_sess_id.intf_policy_epoch = current_policy_epoch;
 
   kv.key[0] = pkv->key[0];
   kv.key[1] = pkv->key[1];
   kv.key[2] = pkv->key[2];
   kv.key[3] = pkv->key[3];
   kv.key[4] = pkv->key[4];
-  kv.value = sess_id;
+  kv.value = f_sess_id.as_u64;
 
   memcpy (sess, pkv, sizeof (pkv->key));
   sess->last_active_time = now;
   sess->sw_if_index = sw_if_index;
   sess->tcp_flags_seen.as_u16 = 0;
-  sess->reserved1 = 0;
+  sess->thread_index = thread_index;
   sess->link_list_id = ~0;
   sess->link_prev_idx = ~0;
   sess->link_next_idx = ~0;
 
 
 
-  if (!acl_fa_ifc_has_sessions (am, sw_if_index))
-    {
-      acl_fa_ifc_init_sessions (am, sw_if_index);
-    }
-
-  BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index],
+  ASSERT(am->fa_sessions_hash_is_initialized == 1);
+  BV (clib_bihash_add_del) (&am->fa_sessions_hash,
                            &kv, 1);
-  acl_fa_conn_list_add_session(am, sess_id, now);
+  acl_fa_conn_list_add_session(am, f_sess_id, now);
 
-  vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index);
-  am->fa_session_adds_by_sw_if_index[sw_if_index]++;
+  vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);
+  clib_mem_set_heap (oldheap);
+  pw->fa_session_adds_by_sw_if_index[sw_if_index]++;
+  clib_smp_atomic_add(&am->fa_session_total_adds, 1);
+  return sess;
 }
 
 static int
 acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple,
                     clib_bihash_kv_40_8_t * pvalue_sess)
 {
-  return (BV (clib_bihash_search)
-         (&am->fa_sessions_by_sw_if_index[sw_if_index0], &p5tuple->kv,
-          pvalue_sess) == 0);
+  return (clib_bihash_search_40_8 (&am->fa_sessions_hash, &p5tuple->kv, pvalue_sess) == 0);
 }
 
 
@@ -751,12 +657,13 @@ acl_fa_node_fn (vlib_main_t * vm,
   u32 pkts_acl_permit = 0;
   u32 pkts_restart_session_timer = 0;
   u32 trace_bitmap = 0;
-  u32 feature_bitmap0;
   acl_main_t *am = &acl_main;
   fa_5tuple_t fa_5tuple, kv_sess;
   clib_bihash_kv_40_8_t value_sess;
   vlib_node_runtime_t *error_node;
   u64 now = clib_cpu_time_now ();
+  uword thread_index = os_get_thread_index ();
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -777,10 +684,13 @@ acl_fa_node_fn (vlib_main_t * vm,
          u32 next0 = 0;
          u8 action = 0;
          u32 sw_if_index0;
+         u32 lc_index0;
          int acl_check_needed = 1;
          u32 match_acl_in_index = ~0;
+         u32 match_acl_pos = ~0;
          u32 match_rule_index = ~0;
          u8 error0 = 0;
+         u32 valid_new_sess;
 
          /* speculatively enqueue b0 to the current next frame */
          bi0 = from[0];
@@ -796,24 +706,35 @@ acl_fa_node_fn (vlib_main_t * vm,
            sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
          else
            sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
-         if (is_l2_path)
-           feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap;
 
+         if (is_input)
+           lc_index0 = am->input_lc_index_by_sw_if_index[sw_if_index0];
+         else
+           lc_index0 = am->output_lc_index_by_sw_if_index[sw_if_index0];
+
+          u32 **p_epoch_vec = is_input ? &am->input_policy_epoch_by_sw_if_index
+                                       :  &am->output_policy_epoch_by_sw_if_index;
+          u16 current_policy_epoch = sw_if_index0 < vec_len(*p_epoch_vec) ? vec_elt(*p_epoch_vec, sw_if_index0) : (is_input * FA_POLICY_EPOCH_IS_INPUT);
          /*
           * Extract the L3/L4 matching info into a 5-tuple structure,
           * then create a session key whose layout is independent on forward or reverse
           * direction of the packet.
           */
 
-         acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple);
-         acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess);
+         acl_plugin_fill_5tuple_inline (lc_index0, b0, is_ip6, is_input, is_l2_path, (fa_5tuple_opaque_t *)&fa_5tuple);
+          fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff;
+         fa_5tuple.pkt.lc_index = lc_index0;
+         valid_new_sess = acl_make_5tuple_session_key (am, is_input, is_ip6, sw_if_index0,  &fa_5tuple, &kv_sess);
+          fa_5tuple.pkt.is_ip6 = is_ip6;
+          // XXDEL fa_5tuple.pkt.is_input = is_input;
+          fa_5tuple.pkt.mask_type_index_lsb = ~0;
 #ifdef FA_NODE_VERBOSE_DEBUG
          clib_warning
-           ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
+           ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
             kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2],
             kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value);
          clib_warning
-           ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
+           ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
             fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2],
             fa_5tuple.kv.key[3], fa_5tuple.kv.key[4], fa_5tuple.kv.value);
 #endif
@@ -827,16 +748,19 @@ acl_fa_node_fn (vlib_main_t * vm,
                {
                  trace_bitmap |= 0x80000000;
                  error0 = ACL_FA_ERROR_ACL_EXIST_SESSION;
-                 // FIXME assert(value_sess.value == (0xffffffff & value_sess.value));
-                 u32 sess_id = value_sess.value;
-                 fa_session_t *sess = am->fa_sessions_pool + sess_id;
+                 fa_full_session_id_t f_sess_id;
+
+                  f_sess_id.as_u64 = value_sess.value;
+                  ASSERT(f_sess_id.thread_index < vec_len(vlib_mains));
+
+                 fa_session_t *sess = get_session_ptr(am, f_sess_id.thread_index, f_sess_id.session_index);
                  int old_timeout_type =
                    fa_session_get_timeout_type (am, sess);
                  action =
                    acl_fa_track_session (am, is_input, sw_if_index0, now,
                                          sess, &fa_5tuple);
                  /* expose the session id to the tracer */
-                 match_rule_index = sess_id;
+                 match_rule_index = f_sess_id.session_index;
                  int new_timeout_type =
                    fa_session_get_timeout_type (am, sess);
                  acl_check_needed = 0;
@@ -844,20 +768,49 @@ acl_fa_node_fn (vlib_main_t * vm,
                  /* Tracking might have changed the session timeout type, e.g. from transient to established */
                  if (PREDICT_FALSE (old_timeout_type != new_timeout_type))
                    {
-                     acl_fa_restart_timer_for_session (am, now, sess_id);
+                     acl_fa_restart_timer_for_session (am, now, f_sess_id);
                      pkts_restart_session_timer++;
                      trace_bitmap |=
                        0x00010000 + ((0xff & old_timeout_type) << 8) +
                        (0xff & new_timeout_type);
                    }
+                  /*
+                   * I estimate the likelihood to be very low - the VPP needs
+                   * to have >64K interfaces to start with and then on
+                   * exactly 64K indices apart needs to be exactly the same
+                   * 5-tuple... Anyway, since this probability is nonzero -
+                   * print an error and drop the unlucky packet.
+                   * If this shows up in real world, we would need to bump
+                   * the hash key length.
+                   */
+                 if (PREDICT_FALSE(sess->sw_if_index != sw_if_index0)) {
+                    clib_warning("BUG: session LSB16(sw_if_index) and 5-tuple collision!");
+                    acl_check_needed = 0;
+                    action = 0;
+                  }
+                  if (PREDICT_FALSE(am->reclassify_sessions)) {
+                   /* if the MSB of policy epoch matches but not the LSB means it is a stale session */
+                   if ( (0 == ((current_policy_epoch ^ f_sess_id.intf_policy_epoch) & FA_POLICY_EPOCH_IS_INPUT))
+                        && (current_policy_epoch != f_sess_id.intf_policy_epoch) ) {
+                      /* delete session and increment the counter */
+                      vec_validate (pw->fa_session_epoch_change_by_sw_if_index, sw_if_index0);
+                      vec_elt (pw->fa_session_epoch_change_by_sw_if_index, sw_if_index0)++;
+                      if(acl_fa_conn_list_delete_session(am, f_sess_id)) {
+                        /* delete the session only if we were able to unlink it */
+                        acl_fa_delete_session (am, sw_if_index0, f_sess_id);
+                      }
+                      acl_check_needed = 1;
+                      trace_bitmap |= 0x40000000;
+                    }
+                  }
                }
            }
 
          if (acl_check_needed)
            {
-             action =
-               full_acl_match_5tuple (sw_if_index0, &fa_5tuple, is_l2_path,
-                                      is_ip6, is_input, &match_acl_in_index,
+              action = 0; /* deny by default */
+             acl_plugin_match_5tuple_inline (lc_index0, (fa_5tuple_opaque_t *)&fa_5tuple,
+                                      is_ip6, &action, &match_acl_pos, &match_acl_in_index,
                                       &match_rule_index, &trace_bitmap);
              error0 = action;
              if (1 == action)
@@ -865,13 +818,25 @@ acl_fa_node_fn (vlib_main_t * vm,
              if (2 == action)
                {
                  if (!acl_fa_can_add_session (am, is_input, sw_if_index0))
-                    acl_fa_try_recycle_session (am, is_input, sw_if_index0);
+                    acl_fa_try_recycle_session (am, is_input, thread_index, sw_if_index0);
 
                  if (acl_fa_can_add_session (am, is_input, sw_if_index0))
                    {
-                      acl_fa_add_session (am, is_input, sw_if_index0, now,
-                                         &kv_sess);
-                     pkts_new_session += 1;
+                      if (PREDICT_TRUE (valid_new_sess)) {
+                        fa_session_t *sess = acl_fa_add_session (am, is_input,
+                                                                 sw_if_index0,
+                                                                 now, &kv_sess, current_policy_epoch);
+                        acl_fa_track_session (am, is_input, sw_if_index0, now,
+                                              sess, &fa_5tuple);
+                        pkts_new_session += 1;
+                      } else {
+                        /*
+                         *  ICMP packets with non-icmp_valid_new type will be
+                         *  forwared without being dropped.
+                         */
+                        action = 1;
+                        pkts_acl_permit += 1;
+                      }
                    }
                  else
                    {
@@ -886,18 +851,20 @@ acl_fa_node_fn (vlib_main_t * vm,
          if (action > 0)
            {
              if (is_l2_path)
-               next0 =
-                 feat_bitmap_get_next_node_index (l2_feat_next_node_index,
-                                                  feature_bitmap0);
+               next0 = vnet_l2_feature_next (b0, l2_feat_next_node_index, 0);
              else
                vnet_feature_next (sw_if_index0, &next0, b0);
            }
+#ifdef FA_NODE_VERBOSE_DEBUG
+          clib_warning("ACL_FA_NODE_DBG: sw_if_index %d lc_index %d action %d acl_index %d rule_index %d", sw_if_index0, lc_index0, action, match_acl_in_index, match_rule_index);
+#endif
 
          if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
                             && (b0->flags & VLIB_BUFFER_IS_TRACED)))
            {
              acl_fa_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
              t->sw_if_index = sw_if_index0;
+             t->lc_index = lc_index0;
              t->next_index = next0;
              t->match_acl_in_index = match_acl_in_index;
              t->match_rule_index = match_rule_index;
@@ -1024,16 +991,9 @@ acl_out_ip4_fa_node_fn (vlib_main_t * vm,
 }
 
 /*
- * This process performs all the connection clean up - both for idle connections,
- * as well as receiving the signals to clean up the connections in case of sw_if_index deletion,
- * or (maybe in the future) the connection deletion due to policy reasons.
- *
- * The previous iteration (l2sess) attempted to clean up the connections in small increments,
- * in-band, but the problem it tried to preemptively address (process starvation) is yet to be seen.
- *
- * The approach with a single thread deleting the connections is simpler, thus we use it until
- * there is a real starvation problem to solve.
- *
+ * This process ensures the connection cleanup happens every so often
+ * even in absence of traffic, as well as provides general orchestration
+ * for requests like connection deletion on a given sw_if_index.
  */
 
 
@@ -1056,92 +1016,185 @@ static char *acl_fa_cleaner_error_strings[] = {
 #undef _
 };
 
-static int
-acl_fa_clean_sessions_by_sw_if_index (acl_main_t *am, u32 sw_if_index, u32 *count)
-{
-
-  int undeleted = 0;
-  fa_session_t *sess;
-  uword *dv = NULL;
-  uword *ii;
-
-  pool_foreach(sess, am->fa_sessions_pool, ({
-    if ( (~0 == sw_if_index) || (sw_if_index == sess->sw_if_index) )
-      vec_add1(dv, sess-am->fa_sessions_pool);
-  }));
-  vec_foreach(ii, dv)
-  {
-    sess =  pool_elt_at_index(am->fa_sessions_pool, *ii);
-    acl_fa_delete_session(am, sess->sw_if_index, *ii);
-    (*count)++;
-  }
-
-  pool_foreach(sess, am->fa_sessions_pool, ({
-    if ( (~0 == sw_if_index) || (sw_if_index == sess->sw_if_index) )
-      undeleted++;
-  }));
-  if (undeleted == 0)
-    {
-      if (~0 == sw_if_index)
-        {
-          /* FIXME: clean-up tables ? */
-        }
-      else
-        {
-          /* FIXME: clean-up tables ? */
-        }
-    }
-  return (undeleted == 0);
-}
 /* *INDENT-ON* */
 
 static vlib_node_registration_t acl_fa_session_cleaner_process_node;
+static vlib_node_registration_t acl_fa_worker_session_cleaner_process_node;
 
-static int
-acl_fa_conn_time_to_check (acl_main_t *am, u64 now, u32 session_index)
+/*
+ * Per-worker thread interrupt-driven cleaner thread
+ * to clean idle connections if there are no packets
+ */
+static uword
+acl_fa_worker_conn_cleaner_process(vlib_main_t * vm,
+              vlib_node_runtime_t * rt, vlib_frame_t * f)
 {
-  fa_session_t *sess = am->fa_sessions_pool + session_index;
-  u64 timeout_time =
-              sess->link_enqueue_time + fa_session_get_timeout (am, sess);
-  return (timeout_time < now);
+   acl_main_t *am = &acl_main;
+   u64 now = clib_cpu_time_now ();
+   u16 thread_index = os_get_thread_index ();
+   acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+   int num_expired;
+   elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner interrupt: now %lu", "i8", now);
+   /* allow another interrupt to be queued */
+   pw->interrupt_is_pending = 0;
+   if (pw->clear_in_process) {
+     if (0 == pw->swipe_end_time) {
+       /*
+        * Someone has just set the flag to start clearing.
+        * we do this by combing through the connections up to a "time T"
+        * which is now, and requeueing everything except the expired
+        * connections and those matching the interface(s) being cleared.
+        */
+
+       /*
+        * first filter the sw_if_index bitmap that they want from us, by
+        * a bitmap of sw_if_index for which we actually have connections.
+        */
+       if ((pw->pending_clear_sw_if_index_bitmap == 0)
+           || (pw->serviced_sw_if_index_bitmap == 0)) {
+         elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, someone tried to call clear but one of the bitmaps are empty", "i8", now);
+        clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap);
+       } else {
+#ifdef FA_NODE_VERBOSE_DEBUG
+         clib_warning("WORKER-CLEAR: (before and) swiping sw-if-index bitmap: %U, my serviced bitmap %U",
+                      format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap,
+                      format_bitmap_hex, pw->serviced_sw_if_index_bitmap);
+#endif
+         pw->pending_clear_sw_if_index_bitmap = clib_bitmap_and(pw->pending_clear_sw_if_index_bitmap,
+                                                             pw->serviced_sw_if_index_bitmap);
+       }
+
+       if (clib_bitmap_is_zero(pw->pending_clear_sw_if_index_bitmap)) {
+         /* if the cross-section is a zero vector, no need to do anything. */
+         elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, clearing done, nothing to do", "i8", now);
+         pw->clear_in_process = 0;
+       } else {
+#ifdef FA_NODE_VERBOSE_DEBUG
+         clib_warning("WORKER-CLEAR: swiping sw-if-index bitmap: %U, my serviced bitmap %U",
+                      format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap,
+                      format_bitmap_hex, pw->serviced_sw_if_index_bitmap);
+#endif
+         elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: swiping until %lu", "i8", now);
+         /* swipe through the connection lists until enqueue timestamps become above "now" */
+         pw->swipe_end_time = now;
+       }
+     }
+   }
+   num_expired = acl_fa_check_idle_sessions(am, thread_index, now);
+   // clib_warning("WORKER-CLEAR: checked %d sessions (clear_in_progress: %d)", num_expired, pw->clear_in_process);
+   elog_acl_maybe_trace_X2(am, "acl_fa_worker_conn_cleaner: checked %d sessions (clear_in_process: %d)", "i4i4", (u32)num_expired, (u32)pw->clear_in_process);
+   if (pw->clear_in_process) {
+     if (0 == num_expired) {
+       /* we were clearing but we could not process any more connections. time to stop. */
+       clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap);
+       pw->clear_in_process = 0;
+       elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, clearing done - all done", "i8", now);
+     } else {
+       elog_acl_maybe_trace_X1(am, "acl_fa_worker_conn_cleaner: now %lu, more work to do - requesting interrupt", "i8", now);
+       /* should continue clearing.. So could they please sent an interrupt again? */
+       pw->interrupt_is_needed = 1;
+     }
+   } else {
+     if (num_expired >= am->fa_max_deleted_sessions_per_interval) {
+       /* there was too much work, we should get an interrupt ASAP */
+       pw->interrupt_is_needed = 1;
+       pw->interrupt_is_unwanted = 0;
+     } else if (num_expired <= am->fa_min_deleted_sessions_per_interval) {
+       /* signal that they should trigger us less */
+       pw->interrupt_is_needed = 0;
+       pw->interrupt_is_unwanted = 1;
+     } else {
+       /* the current rate of interrupts is ok */
+       pw->interrupt_is_needed = 0;
+       pw->interrupt_is_unwanted = 0;
+     }
+     elog_acl_maybe_trace_X3(am, "acl_fa_worker_conn_cleaner: now %lu, interrupt needed: %u, interrupt unwanted: %u", "i8i4i4", now, ((u32)pw->interrupt_is_needed), ((u32)pw->interrupt_is_unwanted));
+   }
+   pw->interrupt_generation = am->fa_interrupt_generation;
+   return 0;
 }
 
+static void
+send_one_worker_interrupt (vlib_main_t * vm, acl_main_t *am, int thread_index)
+{
+  acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
+  if (!pw->interrupt_is_pending) {
+    pw->interrupt_is_pending = 1;
+    vlib_node_set_interrupt_pending (vlib_mains[thread_index],
+                  acl_fa_worker_session_cleaner_process_node.index);
+    elog_acl_maybe_trace_X1(am, "send_one_worker_interrupt: send interrupt to worker %d", "i4", ((u32)thread_index));
+    /* if the interrupt was requested, mark that done. */
+    /* pw->interrupt_is_needed = 0; */
+  }
+}
+
+static void
+send_interrupts_to_workers (vlib_main_t * vm, acl_main_t *am)
+{
+  int i;
+  /* Can't use vec_len(am->per_worker_data) since the threads might not have come up yet; */
+  int n_threads = vec_len(vlib_mains);
+  for (i = 0; i < n_threads; i++) {
+    send_one_worker_interrupt(vm, am, i);
+  }
+}
 
+/* centralized process to drive per-worker cleaners */
 static uword
 acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
                                vlib_frame_t * f)
 {
   acl_main_t *am = &acl_main;
-  u64 now = clib_cpu_time_now ();
+  u64 now;
   f64 cpu_cps = vm->clib_time.clocks_per_second;
   u64 next_expire;
-  /* We should call timer wheel at least twice a second */
+  /* We should check if there are connections to clean up - at least twice a second */
   u64 max_timer_wait_interval = cpu_cps / 2;
-  am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval;
-
-  u32 *expired = NULL;
   uword event_type, *event_data = 0;
+  acl_fa_per_worker_data_t *pw0;
 
+  am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval;
   am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index;
-
+  am->fa_interrupt_generation = 1;
   while (1)
     {
-      u32 count_deleted_sessions = 0;
-      u32 count_already_deleted = 0;
       now = clib_cpu_time_now ();
       next_expire = now + am->fa_current_cleaner_timer_wait_interval;
       int has_pending_conns = 0;
+      u16 ti;
       u8 tt;
-      for(tt = 0; tt < ACL_N_TIMEOUTS; tt++)
-        {
-          if (~0 != am->fa_conn_list_head[tt])
+
+      /*
+       * walk over all per-thread list heads of different timeouts,
+       * and see if there are any connections pending.
+       * If there aren't - we do not need to wake up until the
+       * worker code signals that it has added a connection.
+       *
+       * Also, while we are at it, calculate the earliest we need to wake up.
+       */
+      for(ti = 0; ti < vec_len(vlib_mains); ti++) {
+        if (ti >= vec_len(am->per_worker_data)) {
+          continue;
+        }
+        acl_fa_per_worker_data_t *pw = &am->per_worker_data[ti];
+        for(tt = 0; tt < vec_len(pw->fa_conn_list_head); tt++) {
+          u64 head_expiry = acl_fa_get_list_head_expiry_time(am, pw, now, ti, tt);
+          if ((head_expiry < next_expire) && !pw->interrupt_is_pending) {
+            elog_acl_maybe_trace_X3(am, "acl_fa_session_cleaner_process: now %lu, worker: %d tt: %d", "i8i2i2", now, ti, tt);
+            elog_acl_maybe_trace_X2(am, "acl_fa_session_cleaner_process: head expiry: %lu, is earlier than curr next expire: %lu", "i8i8", head_expiry, next_expire);
+            next_expire = head_expiry;
+         }
+          if (~0 != pw->fa_conn_list_head[tt]) {
             has_pending_conns = 1;
+          }
         }
+      }
 
-      /* If no pending connections then no point in timing out */
-      if (!has_pending_conns)
+      /* If no pending connections and no ACL applied then no point in timing out */
+      if (!has_pending_conns && (0 == am->fa_total_enabled_count))
         {
           am->fa_cleaner_cnt_wait_without_timeout++;
+          elog_acl_maybe_trace_X1(am, "acl_conn_cleaner: now %lu entering wait without timeout", "i8", now);
           (void) vlib_process_wait_for_event (vm);
           event_type = vlib_process_get_events (vm, &event_data);
         }
@@ -1155,16 +1208,13 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
            }
          else
            {
-             /* Timing wheel code is happier if it is called regularly */
-             if (timeout > 0.5)
-               timeout = 0.5;
               am->fa_cleaner_cnt_wait_with_timeout++;
+              elog_acl_maybe_trace_X2(am, "acl_conn_cleaner: now %lu entering wait with timeout %.6f sec", "i8f8", now, timeout);
              (void) vlib_process_wait_for_event_or_clock (vm, timeout);
              event_type = vlib_process_get_events (vm, &event_data);
            }
        }
 
-      now = clib_cpu_time_now ();
       switch (event_type)
        {
        case ~0:
@@ -1175,22 +1225,77 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
          break;
        case ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX:
          {
+            uword *clear_sw_if_index_bitmap = 0;
            uword *sw_if_index0;
+            int clear_all = 0;
+            now = clib_cpu_time_now ();
+            elog_acl_maybe_trace_X1(am, "acl_fa_session_cleaner_process: now %lu, received ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX", "i8", now);
            vec_foreach (sw_if_index0, event_data)
            {
               am->fa_cleaner_cnt_delete_by_sw_index++;
+              elog_acl_maybe_trace_X1(am, "acl_fa_session_cleaner_process: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX %d", "i4", *sw_if_index0);
+              if (*sw_if_index0 == ~0)
+                {
+                  clear_all = 1;
+                }
+              else
+                {
+                  if (!pool_is_free_index (am->vnet_main->interface_main.sw_interfaces, *sw_if_index0))
+                    {
+                      clear_sw_if_index_bitmap = clib_bitmap_set(clear_sw_if_index_bitmap, *sw_if_index0, 1);
+                    }
+                }
+           }
 #ifdef FA_NODE_VERBOSE_DEBUG
-             clib_warning
-               ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d",
-                *sw_if_index0);
+           clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U", format_bitmap_hex, clear_sw_if_index_bitmap);
 #endif
-             u32 count = 0;
-             int result =
-               acl_fa_clean_sessions_by_sw_if_index (am, *sw_if_index0,
-                                                     &count);
-             count_deleted_sessions += count;
-              am->fa_cleaner_cnt_delete_by_sw_index_ok += result;
-           }
+           vec_foreach(pw0, am->per_worker_data) {
+              CLIB_MEMORY_BARRIER ();
+             while (pw0->clear_in_process) {
+                CLIB_MEMORY_BARRIER ();
+                elog_acl_maybe_trace_X1(am, "ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d", "i4", (u32)(pw0 - am->per_worker_data));
+                vlib_process_suspend(vm, 0.0001);
+                if (pw0->interrupt_is_needed) {
+                  send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data));
+                }
+              }
+              if (pw0->clear_in_process) {
+                clib_warning("ERROR-BUG! Could not initiate cleaning on worker because another cleanup in progress");
+             } else {
+                if (clear_all)
+                  {
+                    /* if we need to clear all, then just clear the interfaces that we are servicing */
+                    pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(pw0->serviced_sw_if_index_bitmap);
+                  }
+                else
+                  {
+                    pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(clear_sw_if_index_bitmap);
+                  }
+                pw0->clear_in_process = 1;
+              }
+            }
+            /* send some interrupts so they can start working */
+            send_interrupts_to_workers(vm, am);
+
+            /* now wait till they all complete */
+#ifdef FA_NODE_VERBOSE_DEBUG
+           clib_warning("CLEANER mains len: %d per-worker len: %d", vec_len(vlib_mains), vec_len(am->per_worker_data));
+#endif
+           vec_foreach(pw0, am->per_worker_data) {
+              CLIB_MEMORY_BARRIER ();
+             while (pw0->clear_in_process) {
+                CLIB_MEMORY_BARRIER ();
+                elog_acl_maybe_trace_X1(am, "ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d", "i4", (u32)(pw0 - am->per_worker_data));
+                vlib_process_suspend(vm, 0.0001);
+                if (pw0->interrupt_is_needed) {
+                  send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data));
+                }
+              }
+            }
+#ifdef FA_NODE_VERBOSE_DEBUG
+            clib_warning("ACL_FA_NODE_CLEAN: cleaning done");
+#endif
+            clib_bitmap_free(clear_sw_if_index_bitmap);
          }
          break;
        default:
@@ -1206,74 +1311,54 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
          break;
        }
 
-      {
-        u8 tt = 0;
-        for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) {
-          while((vec_len(expired) < 2*am->fa_max_deleted_sessions_per_interval)
-                && (~0 != am->fa_conn_list_head[tt])
-                && (acl_fa_conn_time_to_check(am, now,
-                                              am->fa_conn_list_head[tt]))) {
-            u32 sess_id = am->fa_conn_list_head[tt];
-            vec_add1(expired, sess_id);
-            acl_fa_conn_list_delete_session(am, sess_id);
-          }
-        }
-      }
+      send_interrupts_to_workers(vm, am);
 
-      u32 *psid = NULL;
-      vec_foreach (psid, expired)
-      {
-       u32 session_index = *psid;
-       if (!pool_is_free_index (am->fa_sessions_pool, session_index))
-         {
-           fa_session_t *sess = am->fa_sessions_pool + session_index;
-           u32 sw_if_index = sess->sw_if_index;
-           u64 sess_timeout_time =
-             sess->last_active_time + fa_session_get_timeout (am, sess);
-           if (now < sess_timeout_time)
-             {
-               /* clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d",
-                  (int) session_index); */
-
-                /* There was activity on the session, so the idle timeout
-                   has not passed. Enqueue for another time period. */
-
-                acl_fa_conn_list_add_session(am, session_index, now);
-
-               /* FIXME: When/if moving to timer wheel,
-                   pretend we did this in the past,
-                   at last_active moment, so the timer is accurate */
-                am->fa_cleaner_cnt_timer_restarted++;
-             }
-           else
-             {
-               /* clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d",
-                  (int) session_index); */
-               acl_fa_delete_session (am, sw_if_index, session_index);
-                count_deleted_sessions++;
-             }
-         }
-       else
-         {
-           count_already_deleted++;
-         }
-      }
-      if (expired)
-       _vec_len (expired) = 0;
       if (event_data)
        _vec_len (event_data) = 0;
 
-      if (count_deleted_sessions > am->fa_max_deleted_sessions_per_interval) {
-        /* if there was too many sessions to delete, do less waiting around next time */
+      /*
+       * If the interrupts were not processed yet, ensure we wait a bit,
+       * but up to a point.
+       */
+      int need_more_wait = 0;
+      int max_wait_cycles = 100;
+      do {
+        need_more_wait = 0;
+        vec_foreach(pw0, am->per_worker_data) {
+          if (pw0->interrupt_generation != am->fa_interrupt_generation) {
+            need_more_wait = 1;
+          }
+        }
+        if (need_more_wait) {
+          vlib_process_suspend(vm, 0.0001);
+        }
+      } while (need_more_wait && (--max_wait_cycles > 0));
+
+      int interrupts_needed = 0;
+      int interrupts_unwanted = 0;
+
+      vec_foreach(pw0, am->per_worker_data) {
+        if (pw0->interrupt_is_needed) {
+          interrupts_needed++;
+          /* the per-worker value is reset when sending the interrupt */
+        }
+        if (pw0->interrupt_is_unwanted) {
+          interrupts_unwanted++;
+          pw0->interrupt_is_unwanted = 0;
+        }
+      }
+      if (interrupts_needed) {
+        /* they need more interrupts, do less waiting around next time */
         am->fa_current_cleaner_timer_wait_interval /= 2;
-      } else if (count_deleted_sessions < am->fa_min_deleted_sessions_per_interval) {
-        /* Too few deleted sessions, slowly increase the amount of sleep up to a limit */
+        /* never go into zero-wait either though - we need to give the space to others */
+        am->fa_current_cleaner_timer_wait_interval += 1; 
+      } else if (interrupts_unwanted) {
+        /* slowly increase the amount of sleep up to a limit */
         if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval)
           am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment;
       }
       am->fa_cleaner_cnt_event_cycles++;
-      am->fa_cleaner_cnt_deleted_sessions += count_deleted_sessions;
-      am->fa_cleaner_cnt_already_deleted += count_already_deleted;
+      am->fa_interrupt_generation++;
     }
   /* NOT REACHED */
   return 0;
@@ -1284,22 +1369,39 @@ void
 acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
 {
   acl_main_t *am = &acl_main;
+  if (enable_disable) {
+    acl_fa_verify_init_sessions(am);
+    am->fa_total_enabled_count++;
+    void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
+    vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
+                                 ACL_FA_CLEANER_RESCHEDULE, 0);
+    clib_mem_set_heap (oldheap);
+  } else {
+    am->fa_total_enabled_count--;
+  }
+
   if (is_input)
     {
+      ASSERT(clib_bitmap_get(am->fa_in_acl_on_sw_if_index, sw_if_index) != enable_disable);
+      void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
       vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa",
                                   sw_if_index, enable_disable, 0, 0);
       vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa",
                                   sw_if_index, enable_disable, 0, 0);
+      clib_mem_set_heap (oldheap);
       am->fa_in_acl_on_sw_if_index =
        clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index,
                         enable_disable);
     }
   else
     {
+      ASSERT(clib_bitmap_get(am->fa_out_acl_on_sw_if_index, sw_if_index) != enable_disable);
+      void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
       vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa",
                                   sw_if_index, enable_disable, 0, 0);
       vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa",
                                   sw_if_index, enable_disable, 0, 0);
+      clib_mem_set_heap (oldheap);
       am->fa_out_acl_on_sw_if_index =
        clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index,
                         enable_disable);
@@ -1307,16 +1409,38 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
   if ((!enable_disable) && (!acl_fa_ifc_has_in_acl (am, sw_if_index))
       && (!acl_fa_ifc_has_out_acl (am, sw_if_index)))
     {
+#ifdef FA_NODE_VERBOSE_DEBUG
+      clib_warning("ENABLE-DISABLE: clean the connections on interface %d", sw_if_index);
+#endif
+      void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
       vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
                                 ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
                                 sw_if_index);
+      clib_mem_set_heap (oldheap);
     }
 }
 
+void
+show_fa_sessions_hash(vlib_main_t * vm, u32 verbose)
+{
+  acl_main_t *am = &acl_main;
+  if (am->fa_sessions_hash_is_initialized) {
+    vlib_cli_output(vm, "\nSession lookup hash table:\n%U\n\n",
+                  BV (format_bihash), &am->fa_sessions_hash, verbose);
+  } else {
+    vlib_cli_output(vm, "\nSession lookup hash table is not allocated.\n\n");
+  }
+}
 
 
 /* *INDENT-OFF* */
 
+VLIB_REGISTER_NODE (acl_fa_worker_session_cleaner_process_node, static) = {
+  .function = acl_fa_worker_conn_cleaner_process,
+  .name = "acl-plugin-fa-worker-cleaner-process",
+  .type = VLIB_NODE_TYPE_INPUT,
+  .state = VLIB_NODE_STATE_INTERRUPT,
+};
 
 VLIB_REGISTER_NODE (acl_fa_session_cleaner_process_node, static) = {
   .function = acl_fa_session_cleaner_process,