nat: optimize flow matching in ED NAT 36/31336/3
authorKlement Sekera <ksekera@cisco.com>
Wed, 17 Feb 2021 17:48:35 +0000 (18:48 +0100)
committerOle Tr�an <otroan@employees.org>
Fri, 26 Feb 2021 13:15:32 +0000 (13:15 +0000)
This saves 6 clocks in nat44-ed-in2out node. (112->106 per packet)

Type: improvement
Signed-off-by: Klement Sekera <ksekera@cisco.com>
Change-Id: I48e757e7f4b6b0d250a432a4659fe6955fc52a07

src/plugins/nat/nat44-ed/nat44_ed.c
src/plugins/nat/nat44-ed/nat44_ed.h
src/plugins/nat/nat44-ed/nat44_ed_in2out.c
src/plugins/nat/nat44-ed/nat44_ed_inlines.h
src/plugins/nat/nat44-ed/nat44_ed_out2in.c

index d9d35fc..fcc3e6b 100644 (file)
@@ -3318,15 +3318,15 @@ nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f)
       f->rewrite.dport = f->match.dport;
     }
   if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE &&
-      f->rewrite.icmp_id != f->match.icmp_id)
+      f->rewrite.icmp_id != f->match.sport)
     {
       f->l4_csum_delta =
        ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id);
-      f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.icmp_id);
+      f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport);
     }
   else
     {
-      f->rewrite.icmp_id = f->match.icmp_id;
+      f->rewrite.icmp_id = f->match.sport;
     }
   if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
     {
index 10d1207..902e5e3 100644 (file)
@@ -204,15 +204,22 @@ typedef CLIB_PACKED(struct
 }) per_vrf_sessions_t;
 /* *INDENT-ON* */
 
-typedef struct
+typedef union
 {
-  ip4_address_t saddr, daddr;
-  u32 fib_index;
-  u16 sport, dport;
-  u16 icmp_id;
-  u8 proto;
+  struct
+  {
+    ip4_address_t saddr, daddr;
+    u16 sport; // ICMP id for ICMP case
+    u16 dport;
+    u32 fib_index : 24;
+    u8 proto;
+  };
+  u64 as_u64[2];
+  u64x2u as_u128;
 } nat_6t_t;
 
+STATIC_ASSERT_SIZEOF (nat_6t_t, 2 * sizeof (u64));
+
 typedef struct
 {
 #define NAT_FLOW_OP_SADDR_REWRITE   (1 << 1)
@@ -223,7 +230,15 @@ typedef struct
 #define NAT_FLOW_OP_TXFIB_REWRITE   (1 << 6)
   int ops;
   nat_6t_t match;
-  nat_6t_t rewrite;
+  struct
+  {
+    ip4_address_t saddr, daddr;
+    u16 sport;
+    u16 dport;
+    u32 fib_index;
+    u8 proto;
+    u16 icmp_id;
+  } rewrite;
   uword l3_csum_delta;
   uword l4_csum_delta;
 } nat_6t_flow_t;
index 784dea0..b99b336 100644 (file)
@@ -918,10 +918,11 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
 }
 
 static inline uword
-nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
-                                         vlib_node_runtime_t * node,
-                                         vlib_frame_t * frame,
-                                         int is_output_feature)
+nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
+                                         vlib_node_runtime_t *node,
+                                         vlib_frame_t *frame,
+                                         int is_output_feature,
+                                         int is_multi_worker)
 {
   u32 n_left_from, *from;
   snat_main_t *sm = &snat_main;
@@ -948,9 +949,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
       clib_bihash_kv_16_8_t kv0, value0;
       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
       nat_6t_flow_t *f = 0;
-      ip4_address_t lookup_saddr, lookup_daddr;
-      u16 lookup_sport, lookup_dport;
-      u8 lookup_protocol;
+      nat_6t_t lookup;
       int lookup_skipped = 0;
 
       b0 = *b;
@@ -981,6 +980,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
       rx_fib_index0 =
        fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
+      lookup.fib_index = rx_fib_index0;
 
       if (PREDICT_FALSE (ip0->ttl == 1))
        {
@@ -1016,8 +1016,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
              goto trace0;
            }
          int err = nat_get_icmp_session_lookup_values (
-           b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr,
-           &lookup_dport, &lookup_protocol);
+           b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
+           &lookup.dport, &lookup.proto);
          if (err != 0)
            {
              b0->error = node->errors[err];
@@ -1027,30 +1027,27 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
        }
       else
        {
-         lookup_protocol = ip0->protocol;
-         lookup_saddr = ip0->src_address;
-         lookup_daddr = ip0->dst_address;
-         lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port;
-         lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
+         lookup.proto = ip0->protocol;
+         lookup.saddr.as_u32 = ip0->src_address.as_u32;
+         lookup.daddr.as_u32 = ip0->dst_address.as_u32;
+         lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
+         lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
        }
 
       /* there might be a stashed index in vnet_buffer2 from handoff or
        * classify node, see if it can be used */
-      if (!pool_is_free_index (tsm->sessions,
+      if (is_multi_worker &&
+         !pool_is_free_index (tsm->sessions,
                               vnet_buffer2 (b0)->nat.cached_session_index))
        {
          s0 = pool_elt_at_index (tsm->sessions,
                                  vnet_buffer2 (b0)->nat.cached_session_index);
          if (PREDICT_TRUE (
-               nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport,
-                                  lookup_daddr, lookup_dport, lookup_protocol,
-                                  rx_fib_index0)
+               nat_6t_t_eq (&s0->i2o.match, &lookup)
                // for some hairpinning cases there are two "i2i" flows instead
                // of i2o and o2i as both hosts are on inside
                || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
-                   nat_6t_flow_match (
-                     &s0->o2i, b0, lookup_saddr, lookup_sport, lookup_daddr,
-                     lookup_dport, lookup_protocol, rx_fib_index0))))
+                   nat_6t_t_eq (&s0->o2i.match, &lookup))))
            {
              /* yes, this is the droid we're looking for */
              lookup_skipped = 1;
@@ -1059,8 +1056,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
          s0 = NULL;
        }
 
-      init_ed_k (&kv0, ip0->src_address, lookup_sport, ip0->dst_address,
-                lookup_dport, rx_fib_index0, lookup_protocol);
+      init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
+                lookup.fib_index, lookup.proto);
 
       // lookup flow
       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
@@ -1117,16 +1114,12 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm,
 
       b0->flags |= VNET_BUFFER_F_IS_NATED;
 
-      if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport,
-                            lookup_daddr, lookup_dport, lookup_protocol,
-                            rx_fib_index0))
+      if (nat_6t_t_eq (&s0->i2o.match, &lookup))
        {
          f = &s0->i2o;
        }
       else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING &&
-              nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport,
-                                 lookup_daddr, lookup_dport, lookup_protocol,
-                                 rx_fib_index0))
+              nat_6t_t_eq (&s0->o2i.match, &lookup))
        {
          f = &s0->o2i;
        }
@@ -1459,7 +1452,14 @@ VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm,
                                     vlib_node_runtime_t * node,
                                     vlib_frame_t * frame)
 {
-  return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0);
+  if (snat_main.num_workers > 1)
+    {
+      return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1);
+    }
+  else
+    {
+      return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0);
+    }
 }
 
 VLIB_REGISTER_NODE (nat44_ed_in2out_node) = {
@@ -1477,7 +1477,14 @@ VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm,
                                            vlib_node_runtime_t * node,
                                            vlib_frame_t * frame)
 {
-  return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1);
+  if (snat_main.num_workers > 1)
+    {
+      return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1);
+    }
+  else
+    {
+      return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0);
+    }
 }
 
 VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = {
index 0623940..d1cd85e 100644 (file)
@@ -592,15 +592,9 @@ nat_6t_o2i_flow_init (snat_main_t *sm, u32 thread_idx, snat_session_t *s,
 }
 
 static_always_inline int
-nat_6t_flow_match (nat_6t_flow_t *f, vlib_buffer_t *b, ip4_address_t saddr,
-                  u16 sport, ip4_address_t daddr, u16 dport, u8 protocol,
-                  u32 fib_index)
-{
-  return f->match.daddr.as_u32 == daddr.as_u32 &&
-        f->match.dport == vnet_buffer (b)->ip.reass.l4_dst_port &&
-        f->match.proto == protocol && f->match.fib_index == fib_index &&
-        f->match.saddr.as_u32 == saddr.as_u32 &&
-        f->match.sport == vnet_buffer (b)->ip.reass.l4_src_port;
+nat_6t_t_eq (nat_6t_t *t1, nat_6t_t *t2)
+{
+  return t1->as_u64[0] == t2->as_u64[0] && t1->as_u64[1] == t2->as_u64[1];
 }
 
 static inline uword
index 4d354d3..90f2ec3 100644 (file)
@@ -793,17 +793,12 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
       clib_bihash_kv_16_8_t kv0, value0;
       nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
       nat_6t_flow_t *f = 0;
-      ip4_address_t lookup_saddr, lookup_daddr;
-      u16 lookup_sport, lookup_dport;
-      u8 lookup_protocol;
+      nat_6t_t lookup;
       int lookup_skipped = 0;
 
       b0 = *b;
       b++;
 
-      lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port;
-      lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
-
       /* Prefetch next iteration. */
       if (PREDICT_TRUE (n_left_from >= 2))
        {
@@ -818,6 +813,9 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
 
       next[0] = vnet_buffer2 (b0)->nat.arc_next;
 
+      lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port;
+      lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port;
+
       vnet_buffer (b0)->snat.flags = 0;
       ip0 = vlib_buffer_get_current (b0);
 
@@ -825,6 +823,8 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
       rx_fib_index0 =
        fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
 
+      lookup.fib_index = rx_fib_index0;
+
       if (PREDICT_FALSE (ip0->ttl == 1))
        {
          vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
@@ -851,8 +851,8 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
              goto trace0;
            }
          int err = nat_get_icmp_session_lookup_values (
-           b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr,
-           &lookup_dport, &lookup_protocol);
+           b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr,
+           &lookup.dport, &lookup.proto);
          if (err != 0)
            {
              b0->error = node->errors[err];
@@ -862,26 +862,22 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
        }
       else
        {
-         lookup_saddr.as_u32 = ip0->src_address.as_u32;
-         lookup_daddr.as_u32 = ip0->dst_address.as_u32;
-         lookup_protocol = ip0->protocol;
+         lookup.saddr.as_u32 = ip0->src_address.as_u32;
+         lookup.daddr.as_u32 = ip0->dst_address.as_u32;
+         lookup.proto = ip0->protocol;
        }
 
       /* there might be a stashed index in vnet_buffer2 from handoff or
        * classify node, see if it can be used */
-      if (!pool_is_free_index (tsm->sessions,
+      if (is_multi_worker &&
+         !pool_is_free_index (tsm->sessions,
                               vnet_buffer2 (b0)->nat.cached_session_index))
        {
          s0 = pool_elt_at_index (tsm->sessions,
                                  vnet_buffer2 (b0)->nat.cached_session_index);
-         if (PREDICT_TRUE (
-               nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport,
-                                  lookup_daddr, lookup_dport, lookup_protocol,
-                                  rx_fib_index0) ||
-               (s0->flags & SNAT_SESSION_FLAG_TWICE_NAT &&
-                nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport,
-                                   lookup_daddr, lookup_dport,
-                                   lookup_protocol, rx_fib_index0))))
+         if (PREDICT_TRUE (nat_6t_t_eq (&s0->o2i.match, &lookup)) ||
+             (s0->flags & SNAT_SESSION_FLAG_TWICE_NAT &&
+              nat_6t_t_eq (&s0->i2o.match, &lookup)))
            {
              /* yes, this is the droid we're looking for */
              lookup_skipped = 1;
@@ -890,8 +886,8 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
          s0 = NULL;
        }
 
-      init_ed_k (&kv0, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
-                rx_fib_index0, lookup_protocol);
+      init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
+                lookup.fib_index, lookup.proto);
 
       // lookup flow
       if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
@@ -944,16 +940,12 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
          goto trace0;
        }
 
-      if (nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport,
-                            lookup_daddr, lookup_dport, lookup_protocol,
-                            rx_fib_index0))
+      if (nat_6t_t_eq (&s0->o2i.match, &lookup))
        {
          f = &s0->o2i;
        }
       else if (s0->flags & SNAT_SESSION_FLAG_TWICE_NAT &&
-              nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport,
-                                 lookup_daddr, lookup_dport, lookup_protocol,
-                                 rx_fib_index0))
+              nat_6t_t_eq (&s0->i2o.match, &lookup))
        {
          f = &s0->i2o;
        }
@@ -979,9 +971,7 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
            }
          else
            {
-             if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport,
-                                    lookup_daddr, lookup_dport,
-                                    lookup_protocol, rx_fib_index0))
+             if (nat_6t_t_eq (&s0->i2o.match, &lookup))
                {
                  f = &s0->i2o;
                }