acl-plugin: create forward and return sessions in lieu of making a special per-packet... 23/12723/4
authorAndrew Yourtchenko <ayourtch@gmail.com>
Thu, 24 May 2018 14:53:27 +0000 (16:53 +0200)
committerDamjan Marion <dmarion.lists@gmail.com>
Sat, 26 May 2018 16:56:02 +0000 (16:56 +0000)
Using a separate session key has proven to be tricky for the following reasons:

- it's a lot of storage to have what looks to be nearly identical to 5tuple,
just maybe with some fields swapped

- shuffling the fields from 5tuple adds to memory pressure

- the fact that the fields do not coincide with the packet memory
  means for any staged processing we need to use up a lot of memory

Thus, just add two entries into the bihash table pointing to
the same session entry, so we could match the packets from either
direction.

With this we have the key layout of L3 info (which takes up
the majority of space for IPv6 case) the same as in the packet,
thus, opening up the possibility for other optimizations.

Not having to create and store a separate session key
should also give us a small performance win in itself.

Also, add the routine to show the session bihash in a better
way than a bunch of numbers.

Alas, the memory usage in the bihash obviously doubles.

Change-Id: I8fd2ed4714ad7fc447c4fa224d209bc0b736b371
Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
src/plugins/acl/dataplane_node.c
src/plugins/acl/fa_node.h
src/plugins/acl/public_inlines.h
src/plugins/acl/sess_mgmt_node.c
src/plugins/acl/session_inlines.h

index 98e9661..58db6f7 100644 (file)
@@ -82,7 +82,7 @@ acl_fa_node_fn (vlib_main_t * vm,
   u32 pkts_restart_session_timer = 0;
   u32 trace_bitmap = 0;
   acl_main_t *am = &acl_main;
-  fa_5tuple_t fa_5tuple, kv_sess;
+  fa_5tuple_t fa_5tuple;
   clib_bihash_kv_40_8_t value_sess;
   vlib_node_runtime_t *error_node;
   u64 now = clib_cpu_time_now ();
@@ -114,7 +114,6 @@ acl_fa_node_fn (vlib_main_t * vm,
          u32 match_acl_pos = ~0;
          u32 match_rule_index = ~0;
          u8 error0 = 0;
-         u32 valid_new_sess;
 
          /* speculatively enqueue b0 to the current next frame */
          bi0 = from[0];
@@ -144,25 +143,15 @@ acl_fa_node_fn (vlib_main_t * vm,
                                                             sw_if_index0)
            : (is_input * FA_POLICY_EPOCH_IS_INPUT);
          /*
-          * Extract the L3/L4 matching info into a 5-tuple structure,
-          * then create a session key whose layout is independent on forward or reverse
-          * direction of the packet.
+          * Extract the L3/L4 matching info into a 5-tuple structure.
           */
 
          acl_plugin_fill_5tuple_inline (lc_index0, b0, is_ip6, is_input,
                                         is_l2_path,
                                         (fa_5tuple_opaque_t *) & fa_5tuple);
          fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff;
-         valid_new_sess =
-           acl_make_5tuple_session_key (am, is_input, is_ip6, sw_if_index0,
-                                        &fa_5tuple, &kv_sess);
-         // XXDEL fa_5tuple.pkt.is_input = is_input;
          fa_5tuple.pkt.mask_type_index_lsb = ~0;
 #ifdef FA_NODE_VERBOSE_DEBUG
-         clib_warning
-           ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
-            kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2],
-            kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value);
          clib_warning
            ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
             fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2],
@@ -174,7 +163,7 @@ acl_fa_node_fn (vlib_main_t * vm,
          if (acl_fa_ifc_has_sessions (am, sw_if_index0))
            {
              if (acl_fa_find_session
-                 (am, sw_if_index0, &kv_sess, &value_sess))
+                 (am, sw_if_index0, &fa_5tuple, &value_sess))
                {
                  trace_bitmap |= 0x80000000;
                  error0 = ACL_FA_ERROR_ACL_EXIST_SESSION;
@@ -272,26 +261,14 @@ acl_fa_node_fn (vlib_main_t * vm,
 
                  if (acl_fa_can_add_session (am, is_input, sw_if_index0))
                    {
-                     if (PREDICT_TRUE (valid_new_sess))
-                       {
-                         fa_session_t *sess =
-                           acl_fa_add_session (am, is_input,
-                                               sw_if_index0,
-                                               now, &kv_sess,
-                                               current_policy_epoch);
-                         acl_fa_track_session (am, is_input, sw_if_index0,
-                                               now, sess, &fa_5tuple);
-                         pkts_new_session += 1;
-                       }
-                     else
-                       {
-                         /*
-                          *  ICMP packets with non-icmp_valid_new type will be
-                          *  forwared without being dropped.
-                          */
-                         action = 1;
-                         pkts_acl_permit += 1;
-                       }
+                     fa_session_t *sess =
+                       acl_fa_add_session (am, is_input, is_ip6,
+                                           sw_if_index0,
+                                           now, &fa_5tuple,
+                                           current_policy_epoch);
+                     acl_fa_track_session (am, is_input, sw_if_index0,
+                                           now, sess, &fa_5tuple);
+                     pkts_new_session += 1;
                    }
                  else
                    {
index 263cf14..8d79e42 100644 (file)
@@ -39,8 +39,16 @@ typedef union {
   u64 as_u64;
   struct {
     u16 port[2];
-    u16 proto;
-    u16 lsb_of_sw_if_index;
+    union {
+      struct {
+        u8 proto;
+        u8 is_input: 1;
+        u8 is_slowpath: 1;
+        u8 reserved0: 6;
+        u16 lsb_of_sw_if_index;
+      };
+      u32 non_port_l4_data;
+    };
   };
 } fa_session_l4_key_t;
 
index a2b8fc9..3e6c95a 100644 (file)
@@ -192,7 +192,7 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
   int l3_offset;
   int l4_offset;
   u16 ports[2];
-  u16 proto;
+  u8 proto;
 
   if (is_l2_path)
     {
@@ -307,6 +307,8 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
 
     }
   p5tuple_pkt->l4.proto = proto;
+  p5tuple_pkt->l4.is_input = is_input;
+
   if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
     {
       p5tuple_pkt->pkt.l4_valid = 1;
@@ -322,6 +324,7 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
            *(u8 *) get_ptr_to_offset (b0,
                                       l4_offset + offsetof (icmp46_header_t,
                                                             code));
+          p5tuple_pkt->l4.is_slowpath = 1;
        }
       else if ((IP_PROTOCOL_TCP == proto) || (IP_PROTOCOL_UDP == proto))
        {
@@ -338,21 +341,12 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
                                       l4_offset + offsetof (tcp_header_t,
                                                             flags));
          p5tuple_pkt->pkt.tcp_flags_valid = (proto == IP_PROTOCOL_TCP);
+          p5tuple_pkt->l4.is_slowpath = 0;
        }
-      /*
-       * FIXME: rather than the above conditional, here could
-       * be a nice generic mechanism to extract two L4 values:
-       *
-       * have a per-protocol array of 4 elements like this:
-       *   u8 offset; to take the byte from, off L4 header
-       *   u8 mask; to mask it with, before storing
-       *
-       * this way we can describe UDP, TCP and ICMP[46] semantics,
-       * and add a sort of FPM-type behavior for other protocols.
-       *
-       * Of course, is it faster ? and is it needed ?
-       *
-       */
+      else
+        {
+          p5tuple_pkt->l4.is_slowpath = 1;
+        }
     }
 }
 
index b4faf55..103db35 100644 (file)
@@ -48,6 +48,26 @@ fa_session_get_shortest_timeout (acl_main_t * am)
   return timeout;
 }
 
+static u8 *
+format_session_bihash_5tuple (u8 * s, va_list * args)
+{
+  fa_5tuple_t *p5t = va_arg (*args, fa_5tuple_t *);
+  fa_full_session_id_t *sess = (void *) &p5t->pkt;
+
+  return format (s, "l3 %U -> %U"
+                " l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d l4_reserved0 %d port %d -> %d | sess id %d thread id %d epoch %04x",
+                format_ip46_address, &p5t->addr[0],
+                IP46_TYPE_ANY,
+                format_ip46_address, &p5t->addr[1],
+                IP46_TYPE_ANY,
+                p5t->l4.lsb_of_sw_if_index,
+                p5t->l4.proto, p5t->l4.is_input, p5t->l4.is_slowpath,
+                p5t->l4.reserved0, p5t->l4.port[0], p5t->l4.port[1],
+                sess->session_index, sess->thread_index,
+                sess->intf_policy_epoch);
+}
+
+
 static void
 acl_fa_verify_init_sessions (acl_main_t * am)
 {
@@ -73,6 +93,8 @@ acl_fa_verify_init_sessions (acl_main_t * am)
                             "ACL plugin FA session bihash",
                             am->fa_conn_table_hash_num_buckets,
                             am->fa_conn_table_hash_memory_size);
+      clib_bihash_set_kvp_format_fn_40_8 (&am->fa_sessions_hash,
+                                         format_session_bihash_5tuple);
       am->fa_sessions_hash_is_initialized = 1;
     }
 }
index e75582b..d43e550 100644 (file)
@@ -67,72 +67,6 @@ acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0)
   return it_has;
 }
 
-/* Session keys match the packets received, and mirror the packets sent */
-always_inline u32
-acl_make_5tuple_session_key (acl_main_t * am, int is_input, int is_ip6,
-                            u32 sw_if_index, fa_5tuple_t * p5tuple_pkt,
-                            fa_5tuple_t * p5tuple_sess)
-{
-  int src_index = is_input ? 0 : 1;
-  int dst_index = is_input ? 1 : 0;
-  u32 valid_new_sess = 1;
-  p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0];
-  p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1];
-  p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64;
-
-  if (PREDICT_TRUE (p5tuple_pkt->l4.proto != icmp_protos[is_ip6]))
-    {
-      p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0];
-      p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1];
-    }
-  else
-    {
-      static const u8 *icmp_invmap[] = { icmp4_invmap, icmp6_invmap };
-      static const u8 *icmp_valid_new[] =
-       { icmp4_valid_new, icmp6_valid_new };
-      static const u8 icmp_invmap_size[] = { sizeof (icmp4_invmap),
-       sizeof (icmp6_invmap)
-      };
-      static const u8 icmp_valid_new_size[] = { sizeof (icmp4_valid_new),
-       sizeof (icmp6_valid_new)
-      };
-      int type =
-       is_ip6 ? p5tuple_pkt->l4.port[0] - 128 : p5tuple_pkt->l4.port[0];
-
-      p5tuple_sess->l4.port[0] = p5tuple_pkt->l4.port[0];
-      p5tuple_sess->l4.port[1] = p5tuple_pkt->l4.port[1];
-
-      /*
-       * Invert ICMP type for valid icmp_invmap messages:
-       *  1) input node with outbound ACL interface
-       *  2) output node with inbound ACL interface
-       *
-       */
-      if ((is_input && acl_fa_ifc_has_out_acl (am, sw_if_index)) ||
-         (!is_input && acl_fa_ifc_has_in_acl (am, sw_if_index)))
-       {
-         if (type >= 0 &&
-             type <= icmp_invmap_size[is_ip6] && icmp_invmap[is_ip6][type])
-           {
-             p5tuple_sess->l4.port[0] = icmp_invmap[is_ip6][type] - 1;
-           }
-       }
-
-      /*
-       * ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table
-       * are allowed to create stateful ACL.
-       * The other messages will be forwarded without creating a reflexive ACL.
-       */
-      if (type < 0 ||
-         type > icmp_valid_new_size[is_ip6] || !icmp_valid_new[is_ip6][type])
-       {
-         valid_new_sess = 0;
-       }
-    }
-
-  return valid_new_sess;
-}
-
 always_inline int
 fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
 {
@@ -316,6 +250,100 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
   return 3;
 }
 
+always_inline u64
+reverse_l4_u64_fastpath (u64 l4, int is_ip6)
+{
+  fa_session_l4_key_t l4i = {.as_u64 = l4 };
+  fa_session_l4_key_t l4o;
+
+  l4o.port[1] = l4i.port[0];
+  l4o.port[0] = l4i.port[1];
+
+  l4o.non_port_l4_data = l4i.non_port_l4_data;
+  l4o.is_input = 1 - l4i.is_input;
+  return l4o.as_u64;
+}
+
+always_inline u64
+reverse_l4_u64_slowpath (u64 l4, int is_ip6)
+{
+  fa_session_l4_key_t l4i = {.as_u64 = l4 };
+  fa_session_l4_key_t l4o;
+
+  if (l4i.proto == icmp_protos[is_ip6])
+    {
+      static const u8 *icmp_invmap[] = { icmp4_invmap, icmp6_invmap };
+      static const u8 *icmp_valid_new[] =
+       { icmp4_valid_new, icmp6_valid_new };
+      static const u8 icmp_invmap_size[] = { sizeof (icmp4_invmap),
+       sizeof (icmp6_invmap)
+      };
+      static const u8 icmp_valid_new_size[] = { sizeof (icmp4_valid_new),
+       sizeof (icmp6_valid_new)
+      };
+      int type = is_ip6 ? l4i.port[0] - 128 : l4i.port[0];
+
+      l4o.non_port_l4_data = l4i.non_port_l4_data;
+      l4o.port[0] = l4i.port[0];
+      l4o.port[1] = l4i.port[1];
+
+
+      /*
+       * ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table
+       * are allowed to create stateful ACL.
+       * The other messages will be forwarded without creating a reverse session.
+       */
+
+      if (type >= 0 && (type <= icmp_valid_new_size[is_ip6])
+         && (icmp_valid_new[is_ip6][type])
+         && (type <= icmp_invmap_size[is_ip6]) && icmp_invmap[is_ip6][type])
+       {
+         /*
+          * we set the inverse direction and correct the port,
+          * if it is okay to add the reverse session.
+          * If not, then the same session will be added twice
+          * to bihash, which is the same as adding just one session.
+          */
+         l4o.is_input = 1 - l4i.is_input;
+         l4o.port[0] = icmp_invmap[is_ip6][type] - 1;
+       }
+
+      return l4o.as_u64;
+    }
+  else
+    return reverse_l4_u64_fastpath (l4, is_ip6);
+}
+
+always_inline u64
+reverse_l4_u64 (u64 l4, int is_ip6)
+{
+  fa_session_l4_key_t l4i = {.as_u64 = l4 };
+
+  if (PREDICT_FALSE (l4i.is_slowpath))
+    {
+      return reverse_l4_u64_slowpath (l4, is_ip6);
+    }
+  else
+    {
+      return reverse_l4_u64_fastpath (l4, is_ip6);
+    }
+}
+
+always_inline void
+reverse_session_add_del (acl_main_t * am, const int is_ip6,
+                        clib_bihash_kv_40_8_t * pkv, int is_add)
+{
+  clib_bihash_kv_40_8_t kv2;
+  /* the first 4xu64 is two addresses, so just swap them */
+  kv2.key[0] = pkv->key[2];
+  kv2.key[1] = pkv->key[3];
+  kv2.key[2] = pkv->key[0];
+  kv2.key[3] = pkv->key[1];
+  /* the last u64 needs special treatment (ports, etc.) */
+  kv2.key[4] = reverse_l4_u64 (pkv->key[4], is_ip6);
+  kv2.value = pkv->value;
+  clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv2, is_add);
+}
 
 always_inline void
 acl_fa_delete_session (acl_main_t * am, u32 sw_if_index,
@@ -326,6 +354,9 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index,
     get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
   ASSERT (sess->thread_index == os_get_thread_index ());
   clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &sess->info.kv, 0);
+
+  reverse_session_add_del (am, sess->info.pkt.is_ip6, &sess->info.kv, 0);
+
   acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index];
   pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
   /* Deleting from timer structures not needed,
@@ -362,9 +393,11 @@ acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index,
     }
 }
 
+
 always_inline fa_session_t *
-acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
-                   fa_5tuple_t * p5tuple, u16 current_policy_epoch)
+acl_fa_add_session (acl_main_t * am, int is_input, int is_ip6,
+                   u32 sw_if_index, u64 now, fa_5tuple_t * p5tuple,
+                   u16 current_policy_epoch)
 {
   clib_bihash_kv_40_8_t *pkv = &p5tuple->kv;
   clib_bihash_kv_40_8_t kv;
@@ -396,10 +429,11 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
   sess->link_prev_idx = ~0;
   sess->link_next_idx = ~0;
 
-
-
   ASSERT (am->fa_sessions_hash_is_initialized == 1);
   clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv, 1);
+
+  reverse_session_add_del (am, is_ip6, &kv, 1);
+
   acl_fa_conn_list_add_session (am, f_sess_id, now);
 
   vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);