nat: handoff traffic matching for dynamic NAT 29/21429/4
authorFilip Varga <fivarga@cisco.com>
Mon, 12 Aug 2019 12:24:39 +0000 (14:24 +0200)
committerOle Trøan <otroan@employees.org>
Thu, 22 Aug 2019 14:13:42 +0000 (14:13 +0000)
Type: feature

Change-Id: I5c5af6f9acb340cc674323305104b8ce23e6d21d
Signed-off-by: Filip Varga <fivarga@cisco.com>
src/plugins/nat/in2out_ed.c
src/plugins/nat/nat.c
src/plugins/nat/nat.h
src/plugins/nat/nat44_cli.c
src/plugins/nat/nat44_hairpinning.c
src/plugins/nat/nat44_handoff.c
src/plugins/nat/nat_api.c
src/plugins/nat/nat_inlines.h
src/plugins/nat/out2in_ed.c

index 60f820a..de00c6e 100644 (file)
 #include <nat/nat_syslog.h>
 #include <nat/nat_ha.h>
 
-#define foreach_nat_in2out_ed_error                     \
-_(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
-_(IN2OUT_PACKETS, "good in2out packets processed")      \
-_(OUT_OF_PORTS, "out of ports")                         \
-_(BAD_ICMP_TYPE, "unsupported ICMP type")               \
-_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded")   \
-_(DROP_FRAGMENT, "drop fragment")                       \
-_(MAX_REASS, "maximum reassemblies exceeded")           \
-_(MAX_FRAG, "maximum fragments per reassembly exceeded")\
-_(NON_SYN, "non-SYN packet try to create session")      \
-_(TCP_PACKETS, "TCP packets")                           \
-_(UDP_PACKETS, "UDP packets")                           \
-_(ICMP_PACKETS, "ICMP packets")                         \
-_(OTHER_PACKETS, "other protocol packets")              \
-_(FRAGMENTS, "fragments")                               \
-_(CACHED_FRAGMENTS, "cached fragments")                 \
-_(PROCESSED_FRAGMENTS, "processed fragments")
-
-
-typedef enum
-{
-#define _(sym,str) NAT_IN2OUT_ED_ERROR_##sym,
-  foreach_nat_in2out_ed_error
-#undef _
-    NAT_IN2OUT_ED_N_ERROR,
-} nat_in2out_ed_error_t;
-
 static char *nat_in2out_ed_error_strings[] = {
 #define _(sym,string) string,
   foreach_nat_in2out_ed_error
@@ -100,55 +73,6 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args)
   return s;
 }
 
-static_always_inline int
-icmp_get_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
-{
-  icmp46_header_t *icmp0;
-  nat_ed_ses_key_t key0;
-  icmp_echo_header_t *echo0, *inner_echo0 = 0;
-  ip4_header_t *inner_ip0 = 0;
-  void *l4_header = 0;
-  icmp46_header_t *inner_icmp0;
-
-  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
-  echo0 = (icmp_echo_header_t *) (icmp0 + 1);
-
-  if (!icmp_is_error_message (icmp0))
-    {
-      key0.proto = IP_PROTOCOL_ICMP;
-      key0.l_addr = ip0->src_address;
-      key0.r_addr = ip0->dst_address;
-      key0.l_port = echo0->identifier;
-      key0.r_port = 0;
-    }
-  else
-    {
-      inner_ip0 = (ip4_header_t *) (echo0 + 1);
-      l4_header = ip4_next_header (inner_ip0);
-      key0.proto = inner_ip0->protocol;
-      key0.r_addr = inner_ip0->src_address;
-      key0.l_addr = inner_ip0->dst_address;
-      switch (ip_proto_to_snat_proto (inner_ip0->protocol))
-       {
-       case SNAT_PROTOCOL_ICMP:
-         inner_icmp0 = (icmp46_header_t *) l4_header;
-         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
-         key0.r_port = 0;
-         key0.l_port = inner_echo0->identifier;
-         break;
-       case SNAT_PROTOCOL_UDP:
-       case SNAT_PROTOCOL_TCP:
-         key0.l_port = ((tcp_udp_header_t *) l4_header)->dst_port;
-         key0.r_port = ((tcp_udp_header_t *) l4_header)->src_port;
-         break;
-       default:
-         return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
-       }
-    }
-  *p_key0 = key0;
-  return 0;
-}
-
 #ifndef CLIB_MARCH_VARIANT
 int
 nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
@@ -497,7 +421,7 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
   if (ip->protocol == IP_PROTOCOL_ICMP)
     {
       key.as_u64[0] = key.as_u64[1] = 0;
-      if (icmp_get_ed_key (ip, &key))
+      if (get_icmp_i2o_ed_key (ip, &key))
        return 0;
       key.fib_index = 0;
       kv.key[0] = key.as_u64[0];
@@ -616,7 +540,7 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node,
   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
 
   key.as_u64[0] = key.as_u64[1] = 0;
-  err = icmp_get_ed_key (ip, &key);
+  err = get_icmp_i2o_ed_key (ip, &key);
   if (err != 0)
     {
       b->error = node->errors[err];
index 85c4202..248cd75 100755 (executable)
@@ -935,7 +935,7 @@ snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
          ip4_header_t ip = {
            .src_address = m->local_addr,
          };
-         vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index));
+         vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index, 0));
          tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
        }
       else
@@ -1302,7 +1302,8 @@ nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
              };
              bitmap =
                clib_bitmap_set (bitmap,
-                                sm->worker_in2out_cb (&ip, m->fib_index), 1);
+                                sm->worker_in2out_cb (&ip, m->fib_index, 0),
+                                1);
            }
        }
 
@@ -1390,7 +1391,7 @@ nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
                 .src_address = local->addr,
               };
               tsm = vec_elt_at_index (sm->per_thread_data,
-                                      sm->worker_in2out_cb (&ip, m->fib_index));
+                                      sm->worker_in2out_cb (&ip, m->fib_index, 0));
             }
           else
             tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
@@ -1543,7 +1544,8 @@ nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
            .src_address = local->addr,
          };
          tsm = vec_elt_at_index (sm->per_thread_data,
-                                 sm->worker_in2out_cb (&ip, m->fib_index));
+                                 sm->worker_in2out_cb (&ip, m->fib_index,
+                                                       0));
        }
       else
        tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
@@ -1596,7 +1598,7 @@ nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port,
         ip4_header_t ip;
         ip.src_address.as_u32 = local->addr.as_u32,
         bitmap = clib_bitmap_set (bitmap,
-                                  sm->worker_in2out_cb (&ip, local->fib_index),
+                                  sm->worker_in2out_cb (&ip, local->fib_index, 0),
                                   1);
       }
   }));
@@ -2160,6 +2162,7 @@ snat_set_workers (uword * bitmap)
     ({
       vec_add1(sm->workers, i);
       sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j;
+      sm->per_thread_data[sm->first_worker_index + i].thread_index = i;
       j++;
     }));
   /* *INDENT-ON* */
@@ -2297,7 +2300,7 @@ snat_init (vlib_main_t * vm)
   sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT;
   sm->forwarding_enabled = 0;
   sm->log_class = vlib_log_register_class ("nat", 0);
-  sm->log_level = SNAT_LOG_NONE;
+  sm->log_level = SNAT_LOG_ERROR;
   sm->mss_clamping = 0;
 
   node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
@@ -2579,7 +2582,7 @@ snat_static_mapping_match (snat_main_t * sm,
                  .src_address = local->addr,
                };
 
-               if (sm->worker_in2out_cb (&ip, m->fib_index) ==
+               if (sm->worker_in2out_cb (&ip, m->fib_index, 0) ==
                    thread_index)
                   {
                     vec_add1 (tmp, i);
@@ -2946,7 +2949,8 @@ format_ed_session_kvp (u8 * s, va_list * args)
 }
 
 static u32
-snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0)
+snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0,
+                          u8 is_output)
 {
   snat_main_t *sm = &snat_main;
   u32 next_worker_index = 0;
@@ -2965,7 +2969,8 @@ snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0)
 }
 
 static u32
-snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0)
+snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0,
+                          u8 is_output)
 {
   snat_main_t *sm = &snat_main;
   udp_header_t *udp;
@@ -3101,16 +3106,178 @@ no_reass:
 }
 
 static u32
-nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index)
+nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index,
+                              u8 is_output)
+{
+  snat_main_t *sm = &snat_main;
+  u32 next_worker_index = sm->first_worker_index;
+  u32 hash;
+
+  clib_bihash_kv_16_8_t kv16, value16;
+  snat_main_per_thread_data_t *tsm;
+  udp_header_t *udp;
+
+  if (PREDICT_FALSE (is_output))
+    {
+      u32 fib_index = sm->outside_fib_index;
+      nat_outside_fib_t *outside_fib;
+      fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
+      fib_prefix_t pfx = {
+       .fp_proto = FIB_PROTOCOL_IP4,
+       .fp_len = 32,
+       .fp_addr = {
+                   .ip4.as_u32 = ip->dst_address.as_u32,
+                   }
+       ,
+      };
+
+      udp = ip4_next_header (ip);
+
+      switch (vec_len (sm->outside_fibs))
+       {
+       case 0:
+         fib_index = sm->outside_fib_index;
+         break;
+       case 1:
+         fib_index = sm->outside_fibs[0].fib_index;
+         break;
+       default:
+            /* *INDENT-OFF* */
+            vec_foreach (outside_fib, sm->outside_fibs)
+              {
+                fei = fib_table_lookup (outside_fib->fib_index, &pfx);
+                if (FIB_NODE_INDEX_INVALID != fei)
+                  {
+                    if (fib_entry_get_resolving_interface (fei) != ~0)
+                      {
+                        fib_index = outside_fib->fib_index;
+                        break;
+                      }
+                  }
+              }
+            /* *INDENT-ON* */
+         break;
+       }
+
+      make_ed_kv (&kv16, &ip->src_address, &ip->dst_address,
+                 ip->protocol, fib_index, udp->src_port, udp->dst_port);
+
+      /* *INDENT-OFF* */
+      vec_foreach (tsm, sm->per_thread_data)
+        {
+          if (PREDICT_TRUE (!clib_bihash_search_16_8 (&tsm->out2in_ed,
+                                                      &kv16, &value16)))
+            {
+              next_worker_index += tsm->thread_index;
+
+              nat_elog_debug_handoff (
+                "HANDOFF IN2OUT-OUTPUT-FEATURE (session)",
+                next_worker_index, fib_index,
+               clib_net_to_host_u32 (ip->src_address.as_u32),
+               clib_net_to_host_u32 (ip->dst_address.as_u32));
+
+              return next_worker_index;
+            }
+        }
+      /* *INDENT-ON* */
+    }
+
+  hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
+    (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
+
+  if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
+    next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
+  else
+    next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
+
+  if (PREDICT_TRUE (!is_output))
+    {
+      nat_elog_debug_handoff ("HANDOFF IN2OUT",
+                             next_worker_index, rx_fib_index,
+                             clib_net_to_host_u32 (ip->src_address.as_u32),
+                             clib_net_to_host_u32 (ip->dst_address.as_u32));
+    }
+  else
+    {
+      nat_elog_debug_handoff ("HANDOFF IN2OUT-OUTPUT-FEATURE",
+                             next_worker_index, rx_fib_index,
+                             clib_net_to_host_u32 (ip->src_address.as_u32),
+                             clib_net_to_host_u32 (ip->dst_address.as_u32));
+    }
+
+  return next_worker_index;
+}
+
+static u32
+nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index,
+                              u8 is_output)
 {
   snat_main_t *sm = &snat_main;
   clib_bihash_kv_8_8_t kv, value;
+  clib_bihash_kv_16_8_t kv16, value16;
+  snat_main_per_thread_data_t *tsm;
+
   u32 proto, next_worker_index = 0;
   udp_header_t *udp;
   u16 port;
   snat_static_mapping_t *m;
   u32 hash;
 
+  proto = ip_proto_to_snat_proto (ip->protocol);
+
+  if (PREDICT_TRUE (proto == SNAT_PROTOCOL_UDP || proto == SNAT_PROTOCOL_TCP))
+    {
+      udp = ip4_next_header (ip);
+
+      make_ed_kv (&kv16, &ip->dst_address, &ip->src_address,
+                 ip->protocol, rx_fib_index, udp->dst_port, udp->src_port);
+
+      /* *INDENT-OFF* */
+      vec_foreach (tsm, sm->per_thread_data)
+        {
+          if (PREDICT_TRUE (!clib_bihash_search_16_8 (&tsm->out2in_ed,
+                                                      &kv16, &value16)))
+            {
+              next_worker_index = sm->first_worker_index + tsm->thread_index;
+              nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
+                          next_worker_index, rx_fib_index,
+                         clib_net_to_host_u32 (ip->src_address.as_u32),
+                         clib_net_to_host_u32 (ip->dst_address.as_u32));
+              return next_worker_index;
+            }
+          }
+        /* *INDENT-ON* */
+    }
+  else if (proto == SNAT_PROTOCOL_ICMP)
+    {
+      nat_ed_ses_key_t key;
+
+      if (!get_icmp_o2i_ed_key (ip, &key))
+       {
+
+         key.fib_index = rx_fib_index;
+         kv16.key[0] = key.as_u64[0];
+         kv16.key[1] = key.as_u64[1];
+
+          /* *INDENT-OFF* */
+          vec_foreach (tsm, sm->per_thread_data)
+            {
+              if (PREDICT_TRUE (!clib_bihash_search_16_8 (&tsm->out2in_ed,
+                                                          &kv16, &value16)))
+                {
+                  next_worker_index = sm->first_worker_index +
+                                      tsm->thread_index;
+                  nat_elog_debug_handoff ("HANDOFF OUT2IN (session)",
+                              next_worker_index, rx_fib_index,
+                             clib_net_to_host_u32 (ip->src_address.as_u32),
+                             clib_net_to_host_u32 (ip->dst_address.as_u32));
+                  return next_worker_index;
+                }
+            }
+          /* *INDENT-ON* */
+       }
+    }
+
   /* first try static mappings without port */
   if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
     {
@@ -3119,17 +3286,17 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index)
          (&sm->static_mapping_by_external, &kv, &value))
        {
          m = pool_elt_at_index (sm->static_mappings, value.value);
-         return m->workers[0];
+         next_worker_index = m->workers[0];
+         goto done;
        }
     }
 
-  proto = ip_proto_to_snat_proto (ip->protocol);
-
   /* unknown protocol */
   if (PREDICT_FALSE (proto == ~0))
     {
       /* use current thread */
-      return vlib_get_thread_index ();
+      next_worker_index = vlib_get_thread_index ();
+      goto done;
     }
 
   udp = ip4_next_header (ip);
@@ -3158,7 +3325,8 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index)
              port = ((tcp_udp_header_t *) l4_header)->src_port;
              break;
            default:
-             return vlib_get_thread_index ();
+             next_worker_index = vlib_get_thread_index ();
+             goto done;
            }
        }
     }
@@ -3173,15 +3341,20 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index)
        {
          m = pool_elt_at_index (sm->static_mappings, value.value);
          if (!is_lb_static_mapping (m))
-           return m->workers[0];
+           {
+             next_worker_index = m->workers[0];
+             goto done;
+           }
 
          hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
            (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
 
          if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers))))
-           return m->workers[hash & (_vec_len (m->workers) - 1)];
+           next_worker_index =
+             m->workers[hash & (_vec_len (m->workers) - 1)];
          else
-           return m->workers[hash % _vec_len (m->workers)];
+           next_worker_index = m->workers[hash % _vec_len (m->workers)];
+         goto done;
        }
     }
 
@@ -3190,6 +3363,10 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index)
   next_worker_index +=
     sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
 
+done:
+  nat_elog_debug_handoff ("HANDOFF OUT2IN", next_worker_index, rx_fib_index,
+                         clib_net_to_host_u32 (ip->src_address.as_u32),
+                         clib_net_to_host_u32 (ip->dst_address.as_u32));
   return next_worker_index;
 }
 
@@ -3650,7 +3827,7 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
     {
       if (sm->endpoint_dependent)
        {
-         sm->worker_in2out_cb = snat_get_worker_in2out_cb;
+         sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb;
          sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb;
          sm->in2out_node_index = nat44_ed_in2out_node.index;
          sm->in2out_output_node_index = nat44_ed_in2out_output_node.index;
@@ -3976,7 +4153,7 @@ nat44_del_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
   if (sm->num_workers > 1)
     tsm =
       vec_elt_at_index (sm->per_thread_data,
-                       sm->worker_in2out_cb (&ip, fib_index));
+                       sm->worker_in2out_cb (&ip, fib_index, 0));
   else
     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
 
@@ -4020,7 +4197,7 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
   if (sm->num_workers > 1)
     tsm =
       vec_elt_at_index (sm->per_thread_data,
-                       sm->worker_in2out_cb (&ip, fib_index));
+                       sm->worker_in2out_cb (&ip, fib_index, 0));
   else
     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
 
index 664af39..f7ce5f9 100644 (file)
@@ -177,6 +177,60 @@ typedef enum
 #undef _
 } snat_session_state_t;
 
+#define foreach_nat_in2out_ed_error                     \
+_(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
+_(IN2OUT_PACKETS, "good in2out packets processed")      \
+_(OUT_OF_PORTS, "out of ports")                         \
+_(BAD_ICMP_TYPE, "unsupported ICMP type")               \
+_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded")   \
+_(DROP_FRAGMENT, "drop fragment")                       \
+_(MAX_REASS, "maximum reassemblies exceeded")           \
+_(MAX_FRAG, "maximum fragments per reassembly exceeded")\
+_(NON_SYN, "non-SYN packet try to create session")      \
+_(TCP_PACKETS, "TCP packets")                           \
+_(UDP_PACKETS, "UDP packets")                           \
+_(ICMP_PACKETS, "ICMP packets")                         \
+_(OTHER_PACKETS, "other protocol packets")              \
+_(FRAGMENTS, "fragments")                               \
+_(CACHED_FRAGMENTS, "cached fragments")                 \
+_(PROCESSED_FRAGMENTS, "processed fragments")
+
+typedef enum
+{
+#define _(sym,str) NAT_IN2OUT_ED_ERROR_##sym,
+  foreach_nat_in2out_ed_error
+#undef _
+    NAT_IN2OUT_ED_N_ERROR,
+} nat_in2out_ed_error_t;
+
+#define foreach_nat_out2in_ed_error                     \
+_(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
+_(OUT2IN_PACKETS, "good out2in packets processed")      \
+_(OUT_OF_PORTS, "out of ports")                         \
+_(BAD_ICMP_TYPE, "unsupported ICMP type")               \
+_(NO_TRANSLATION, "no translation")                     \
+_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded")   \
+_(DROP_FRAGMENT, "drop fragment")                       \
+_(MAX_REASS, "maximum reassemblies exceeded")           \
+_(MAX_FRAG, "maximum fragments per reassembly exceeded")\
+_(NON_SYN, "non-SYN packet try to create session")      \
+_(TCP_PACKETS, "TCP packets")                           \
+_(UDP_PACKETS, "UDP packets")                           \
+_(ICMP_PACKETS, "ICMP packets")                         \
+_(OTHER_PACKETS, "other protocol packets")              \
+_(FRAGMENTS, "fragments")                               \
+_(CACHED_FRAGMENTS, "cached fragments")                 \
+_(PROCESSED_FRAGMENTS, "processed fragments")
+
+typedef enum
+{
+#define _(sym,str) NAT_OUT2IN_ED_ERROR_##sym,
+  foreach_nat_out2in_ed_error
+#undef _
+    NAT_OUT2IN_ED_N_ERROR,
+} nat_out2in_ed_error_t;
+
+
 /* Endpoint dependent TCP session state */
 #define NAT44_SES_I2O_FIN 1
 #define NAT44_SES_O2I_FIN 2
@@ -422,6 +476,9 @@ typedef struct
 
   /* NAT thread index */
   u32 snat_thread_index;
+
+  /* real thread index */
+  u32 thread_index;
 } snat_main_per_thread_data_t;
 
 struct snat_main_s;
@@ -438,7 +495,7 @@ typedef u32 (snat_icmp_match_function_t) (struct snat_main_s * sm,
 
 /* Return worker thread index for given packet */
 typedef u32 (snat_get_worker_function_t) (ip4_header_t * ip,
-                                         u32 rx_fib_index);
+                                         u32 rx_fib_index, u8 is_output);
 
 /* NAT address and port allacotaion function */
 typedef int (nat_alloc_out_addr_and_port_function_t) (snat_address_t *
@@ -771,7 +828,7 @@ do                                                \
       {                                           \
         ELOG_TYPE_DECLARE (e) =                   \
           {                                       \
-            .format = "nat-msg" _str,             \
+            .format = "nat-msg " _str,            \
             .format_args = "",                    \
           };                                      \
         ELOG_DATA (&sm->vlib_main->elog_main, e); \
@@ -803,6 +860,86 @@ do                                                       \
     }                                                    \
   } while (0);
 
+#define nat_elog_debug_handoff(_str, _tid, _fib, _src, _dst)                \
+do                                                                          \
+  {                                                                         \
+  if (PREDICT_FALSE (sm->log_level >= SNAT_LOG_DEBUG))                      \
+    {                                                                       \
+      ELOG_TYPE_DECLARE (e) =                                               \
+        {                                                                   \
+          .format = "nat-msg " _str " ip src: %d.%d.%d.%d dst: %d.%d.%d.%d" \
+                                    " tid from: %d to: %d fib: %d",         \
+        .format_args = "i1i1i1i1i1i1i1i1i4i4i4",                            \
+      };                                                                    \
+      CLIB_PACKED(struct                                                    \
+        {                                                                   \
+          u8 src_oct1;                                                      \
+          u8 src_oct2;                                                      \
+          u8 src_oct3;                                                      \
+          u8 src_oct4;                                                      \
+          u8 dst_oct1;                                                      \
+          u8 dst_oct2;                                                      \
+          u8 dst_oct3;                                                      \
+          u8 dst_oct4;                                                      \
+          u32 ftid;                                                         \
+          u32 ttid;                                                         \
+          u32 fib;                                                          \
+        }) *ed;                                                             \
+      ed = ELOG_DATA (&vlib_global_main.elog_main, e);                      \
+      ed->src_oct1 = _src >> 24;                                            \
+      ed->src_oct2 = _src >> 16;                                            \
+      ed->src_oct3 = _src >> 8;                                             \
+      ed->src_oct4 = _src;                                                  \
+      ed->dst_oct1 = _dst >> 24;                                            \
+      ed->dst_oct2 = _dst >> 16;                                            \
+      ed->dst_oct3 = _dst >> 8;                                             \
+      ed->dst_oct4 = _dst;                                                  \
+      ed->ftid = vlib_get_thread_index ();                                  \
+      ed->ttid = _tid;                                                      \
+      ed->fib = _fib;                                                       \
+    }                                                                       \
+  } while (0);
+
+#define nat_elog_debug_handoff_v2(_str, _prt, _fib, _src, _dst)              \
+do                                                                           \
+  {                                                                          \
+  if (PREDICT_FALSE (sm->log_level >= SNAT_LOG_DEBUG))                       \
+    {                                                                        \
+      ELOG_TYPE_DECLARE (e) =                                                \
+        {                                                                    \
+          .format = "nat-msg " _str " ip_src:%d.%d.%d.%d ip_dst:%d.%d.%d.%d" \
+                                    " tid:%d prt:%d fib:%d",                 \
+        .format_args = "i1i1i1i1i1i1i1i1i4i4i4",                             \
+      };                                                                     \
+      CLIB_PACKED(struct                                                     \
+        {                                                                    \
+          u8 src_oct1;                                                       \
+          u8 src_oct2;                                                       \
+          u8 src_oct3;                                                       \
+          u8 src_oct4;                                                       \
+          u8 dst_oct1;                                                       \
+          u8 dst_oct2;                                                       \
+          u8 dst_oct3;                                                       \
+          u8 dst_oct4;                                                       \
+          u32 tid;                                                           \
+          u32 prt;                                                           \
+          u32 fib;                                                           \
+        }) *ed;                                                              \
+      ed = ELOG_DATA (&vlib_global_main.elog_main, e);                       \
+      ed->src_oct1 = _src >> 24;                                             \
+      ed->src_oct2 = _src >> 16;                                             \
+      ed->src_oct3 = _src >> 8;                                              \
+      ed->src_oct4 = _src;                                                   \
+      ed->dst_oct1 = _dst >> 24;                                             \
+      ed->dst_oct2 = _dst >> 16;                                             \
+      ed->dst_oct3 = _dst >> 8;                                              \
+      ed->dst_oct4 = _dst;                                                   \
+      ed->tid = vlib_get_thread_index ();                                    \
+      ed->prt = _prt;                                                        \
+      ed->fib = _fib;                                                        \
+    }                                                                        \
+  } while (0);
+
 #define nat_elog_X1(_level, _fmt, _arg, _val1)         \
 do                                                     \
   {                                                    \
index 4dcfb8d..e9d3ff5 100644 (file)
@@ -129,7 +129,7 @@ snat_set_log_level_command_fn (vlib_main_t * vm,
   if (!unformat_user (input, unformat_line_input, line_input))
     return 0;
 
-  if (!unformat (line_input, "level %d", &log_level))
+  if (!unformat (line_input, "%d", &log_level))
     {
       error = clib_error_return (0, "unknown input '%U'",
                                 format_unformat_error, line_input);
index 81478f4..331e7ca 100644 (file)
@@ -391,7 +391,7 @@ nat44_ed_hairpinning_unknown_proto (snat_main_t * sm,
   snat_main_per_thread_data_t *tsm;
 
   if (sm->num_workers > 1)
-    ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
+    ti = sm->worker_out2in_cb (ip, sm->outside_fib_index, 0);
   else
     ti = sm->num_workers;
   tsm = &sm->per_thread_data[ti];
index 04590e4..c97c958 100644 (file)
@@ -29,6 +29,7 @@ typedef struct
   u32 next_worker_index;
   u32 trace_index;
   u8 in2out;
+  u8 output;
 } nat44_handoff_trace_t;
 
 #define foreach_nat44_handoff_error                       \
@@ -57,12 +58,13 @@ format_nat44_handoff_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   nat44_handoff_trace_t *t = va_arg (*args, nat44_handoff_trace_t *);
-  char *tag;
+  char *tag, *output;
 
   tag = t->in2out ? "IN2OUT" : "OUT2IN";
+  output = t->output ? "OUTPUT-FEATURE" : "";
   s =
-    format (s, "NAT44_%s_WORKER_HANDOFF: next-worker %d trace index %d", tag,
-           t->next_worker_index, t->trace_index);
+    format (s, "NAT44_%s_WORKER_HANDOFF %s: next-worker %d trace index %d",
+           tag, output, t->next_worker_index, t->trace_index);
 
   return s;
 }
@@ -101,8 +103,8 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm,
   while (n_left_from >= 4)
     {
       u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
-      u32 rx_fib_index0 = 0, rx_fib_index1 = 0,
-       rx_fib_index2 = 0, rx_fib_index3 = 0;
+      u32 rx_fib_index0, rx_fib_index1, rx_fib_index2, rx_fib_index3;
+      u32 iph_offset0 = 0, iph_offset1 = 0, iph_offset2 = 0, iph_offset3 = 0;
       ip4_header_t *ip0, *ip1, *ip2, *ip3;
 
       if (PREDICT_TRUE (n_left_from >= 8))
@@ -117,32 +119,37 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm,
          CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, STORE);
        }
 
-      ip0 = vlib_buffer_get_current (b[0]);
-      ip1 = vlib_buffer_get_current (b[1]);
-      ip2 = vlib_buffer_get_current (b[2]);
-      ip3 = vlib_buffer_get_current (b[3]);
-
-      if (PREDICT_FALSE (is_in2out))
+      if (is_output)
        {
-         sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
-         sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
-         sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
-         sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
-
-         rx_fib_index0 =
-           ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
-         rx_fib_index1 =
-           ip4_fib_table_get_index_for_sw_if_index (sw_if_index1);
-         rx_fib_index2 =
-           ip4_fib_table_get_index_for_sw_if_index (sw_if_index2);
-         rx_fib_index3 =
-           ip4_fib_table_get_index_for_sw_if_index (sw_if_index3);
+         iph_offset0 = vnet_buffer (b[0])->ip.save_rewrite_length;
+         iph_offset1 = vnet_buffer (b[1])->ip.save_rewrite_length;
+         iph_offset2 = vnet_buffer (b[2])->ip.save_rewrite_length;
+         iph_offset3 = vnet_buffer (b[3])->ip.save_rewrite_length;
        }
 
-      ti[0] = get_worker (ip0, rx_fib_index0);
-      ti[1] = get_worker (ip1, rx_fib_index1);
-      ti[2] = get_worker (ip2, rx_fib_index2);
-      ti[3] = get_worker (ip3, rx_fib_index3);
+      ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[0]) +
+                             iph_offset0);
+      ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[1]) +
+                             iph_offset1);
+      ip2 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[2]) +
+                             iph_offset2);
+      ip3 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[3]) +
+                             iph_offset3);
+
+      sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+      sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_RX];
+      sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_RX];
+      sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_RX];
+
+      rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+      rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index1);
+      rx_fib_index2 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index2);
+      rx_fib_index3 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index3);
+
+      ti[0] = get_worker (ip0, rx_fib_index0, is_output);
+      ti[1] = get_worker (ip1, rx_fib_index1, is_output);
+      ti[2] = get_worker (ip2, rx_fib_index2, is_output);
+      ti[3] = get_worker (ip3, rx_fib_index3, is_output);
 
       if (ti[0] == thread_index)
        same_worker++;
@@ -172,19 +179,21 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm,
   while (n_left_from > 0)
     {
       u32 sw_if_index0;
-      u32 rx_fib_index0 = 0;
+      u32 rx_fib_index0;
+      u32 iph_offset0 = 0;
       ip4_header_t *ip0;
 
-      ip0 = vlib_buffer_get_current (b[0]);
 
-      if (PREDICT_FALSE (is_in2out))
-       {
-         sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
-         rx_fib_index0 =
-           ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
-       }
+      if (is_output)
+       iph_offset0 = vnet_buffer (b[0])->ip.save_rewrite_length;
+
+      ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[0]) +
+                             iph_offset0);
+
+      sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+      rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
 
-      ti[0] = get_worker (ip0, rx_fib_index0);
+      ti[0] = get_worker (ip0, rx_fib_index0, is_output);
 
       if (ti[0] == thread_index)
        same_worker++;
@@ -211,9 +220,10 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm,
              t->next_worker_index = ti[0];
              t->trace_index = vlib_buffer_get_trace_index (b[0]);
              t->in2out = is_in2out;
+             t->output = is_output;
 
-             b++;
-             ti++;
+             b += 1;
+             ti += 1;
            }
          else
            break;
index 5eaf767..ff46ae9 100644 (file)
@@ -1863,7 +1863,7 @@ vl_api_nat44_user_session_dump_t_handler (vl_api_nat44_user_session_dump_t *
   if (sm->num_workers > 1)
     tsm =
       vec_elt_at_index (sm->per_thread_data,
-                       sm->worker_in2out_cb (&ip, ukey.fib_index));
+                       sm->worker_in2out_cb (&ip, ukey.fib_index, 0));
   else
     tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
   if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value))
index ae8ed7f..e3a6f19 100644 (file)
@@ -343,6 +343,105 @@ make_sm_kv (clib_bihash_kv_8_8_t * kv, ip4_address_t * addr, u8 proto,
   kv->value = ~0ULL;
 }
 
+static_always_inline int
+get_icmp_i2o_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
+{
+  icmp46_header_t *icmp0;
+  nat_ed_ses_key_t key0;
+  icmp_echo_header_t *echo0, *inner_echo0 = 0;
+  ip4_header_t *inner_ip0 = 0;
+  void *l4_header = 0;
+  icmp46_header_t *inner_icmp0;
+
+  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+  echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+
+  if (!icmp_is_error_message (icmp0))
+    {
+      key0.proto = IP_PROTOCOL_ICMP;
+      key0.l_addr = ip0->src_address;
+      key0.r_addr = ip0->dst_address;
+      key0.l_port = echo0->identifier;
+      key0.r_port = 0;
+    }
+  else
+    {
+      inner_ip0 = (ip4_header_t *) (echo0 + 1);
+      l4_header = ip4_next_header (inner_ip0);
+      key0.proto = inner_ip0->protocol;
+      key0.r_addr = inner_ip0->src_address;
+      key0.l_addr = inner_ip0->dst_address;
+      switch (ip_proto_to_snat_proto (inner_ip0->protocol))
+       {
+       case SNAT_PROTOCOL_ICMP:
+         inner_icmp0 = (icmp46_header_t *) l4_header;
+         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+         key0.r_port = 0;
+         key0.l_port = inner_echo0->identifier;
+         break;
+       case SNAT_PROTOCOL_UDP:
+       case SNAT_PROTOCOL_TCP:
+         key0.l_port = ((tcp_udp_header_t *) l4_header)->dst_port;
+         key0.r_port = ((tcp_udp_header_t *) l4_header)->src_port;
+         break;
+       default:
+         return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
+       }
+    }
+  *p_key0 = key0;
+  return 0;
+}
+
+
+static_always_inline int
+get_icmp_o2i_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
+{
+  icmp46_header_t *icmp0;
+  nat_ed_ses_key_t key0;
+  icmp_echo_header_t *echo0, *inner_echo0 = 0;
+  ip4_header_t *inner_ip0;
+  void *l4_header = 0;
+  icmp46_header_t *inner_icmp0;
+
+  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+  echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+
+  if (!icmp_is_error_message (icmp0))
+    {
+      key0.proto = IP_PROTOCOL_ICMP;
+      key0.l_addr = ip0->dst_address;
+      key0.r_addr = ip0->src_address;
+      key0.l_port = echo0->identifier;
+      key0.r_port = 0;
+    }
+  else
+    {
+      inner_ip0 = (ip4_header_t *) (echo0 + 1);
+      l4_header = ip4_next_header (inner_ip0);
+      key0.proto = inner_ip0->protocol;
+      key0.l_addr = inner_ip0->src_address;
+      key0.r_addr = inner_ip0->dst_address;
+      switch (ip_proto_to_snat_proto (inner_ip0->protocol))
+       {
+       case SNAT_PROTOCOL_ICMP:
+         inner_icmp0 = (icmp46_header_t *) l4_header;
+         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+         key0.l_port = inner_echo0->identifier;
+         key0.r_port = 0;
+         break;
+       case SNAT_PROTOCOL_UDP:
+       case SNAT_PROTOCOL_TCP:
+         key0.l_port = ((tcp_udp_header_t *) l4_header)->src_port;
+         key0.r_port = ((tcp_udp_header_t *) l4_header)->dst_port;
+         break;
+       default:
+         return -1;
+       }
+    }
+  *p_key0 = key0;
+  return 0;
+}
+
 always_inline void
 mss_clamping (snat_main_t * sm, tcp_header_t * tcp, ip_csum_t * sum)
 {
index 560c23e..50abebd 100644 (file)
 #include <nat/nat_syslog.h>
 #include <nat/nat_ha.h>
 
-#define foreach_nat_out2in_ed_error                     \
-_(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
-_(OUT2IN_PACKETS, "good out2in packets processed")      \
-_(OUT_OF_PORTS, "out of ports")                         \
-_(BAD_ICMP_TYPE, "unsupported ICMP type")               \
-_(NO_TRANSLATION, "no translation")                     \
-_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded")   \
-_(DROP_FRAGMENT, "drop fragment")                       \
-_(MAX_REASS, "maximum reassemblies exceeded")           \
-_(MAX_FRAG, "maximum fragments per reassembly exceeded")\
-_(NON_SYN, "non-SYN packet try to create session")      \
-_(TCP_PACKETS, "TCP packets")                           \
-_(UDP_PACKETS, "UDP packets")                           \
-_(ICMP_PACKETS, "ICMP packets")                         \
-_(OTHER_PACKETS, "other protocol packets")              \
-_(FRAGMENTS, "fragments")                               \
-_(CACHED_FRAGMENTS, "cached fragments")                 \
-_(PROCESSED_FRAGMENTS, "processed fragments")
-
-typedef enum
-{
-#define _(sym,str) NAT_OUT2IN_ED_ERROR_##sym,
-  foreach_nat_out2in_ed_error
-#undef _
-    NAT_OUT2IN_ED_N_ERROR,
-} nat_out2in_ed_error_t;
-
 static char *nat_out2in_ed_error_strings[] = {
 #define _(sym,string) string,
   foreach_nat_out2in_ed_error
@@ -345,55 +318,6 @@ create_session_for_static_mapping_ed (snat_main_t * sm,
   return s;
 }
 
-static_always_inline int
-icmp_get_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0)
-{
-  icmp46_header_t *icmp0;
-  nat_ed_ses_key_t key0;
-  icmp_echo_header_t *echo0, *inner_echo0 = 0;
-  ip4_header_t *inner_ip0;
-  void *l4_header = 0;
-  icmp46_header_t *inner_icmp0;
-
-  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
-  echo0 = (icmp_echo_header_t *) (icmp0 + 1);
-
-  if (!icmp_is_error_message (icmp0))
-    {
-      key0.proto = IP_PROTOCOL_ICMP;
-      key0.l_addr = ip0->dst_address;
-      key0.r_addr = ip0->src_address;
-      key0.l_port = echo0->identifier;
-      key0.r_port = 0;
-    }
-  else
-    {
-      inner_ip0 = (ip4_header_t *) (echo0 + 1);
-      l4_header = ip4_next_header (inner_ip0);
-      key0.proto = inner_ip0->protocol;
-      key0.l_addr = inner_ip0->src_address;
-      key0.r_addr = inner_ip0->dst_address;
-      switch (ip_proto_to_snat_proto (inner_ip0->protocol))
-       {
-       case SNAT_PROTOCOL_ICMP:
-         inner_icmp0 = (icmp46_header_t *) l4_header;
-         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
-         key0.l_port = inner_echo0->identifier;
-         key0.r_port = 0;
-         break;
-       case SNAT_PROTOCOL_UDP:
-       case SNAT_PROTOCOL_TCP:
-         key0.l_port = ((tcp_udp_header_t *) l4_header)->src_port;
-         key0.r_port = ((tcp_udp_header_t *) l4_header)->dst_port;
-         break;
-       default:
-         return -1;
-       }
-    }
-  *p_key0 = key0;
-  return 0;
-}
-
 static int
 next_src_nat (snat_main_t * sm, ip4_header_t * ip, u8 proto, u16 src_port,
              u16 dst_port, u32 thread_index, u32 rx_fib_index)
@@ -423,7 +347,7 @@ create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index,
 
   if (ip->protocol == IP_PROTOCOL_ICMP)
     {
-      if (icmp_get_ed_key (ip, &key))
+      if (get_icmp_o2i_ed_key (ip, &key))
        return;
     }
   else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
@@ -515,7 +439,7 @@ create_bypass_for_fwd_worker (snat_main_t * sm, ip4_header_t * ip,
   ip4_header_t ip_wkr = {
     .src_address = ip->dst_address,
   };
-  u32 thread_index = sm->worker_in2out_cb (&ip_wkr, rx_fib_index);
+  u32 thread_index = sm->worker_in2out_cb (&ip_wkr, rx_fib_index, 0);
 
   create_bypass_for_fwd (sm, ip, rx_fib_index, thread_index);
 }
@@ -540,7 +464,7 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
   rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
 
-  if (icmp_get_ed_key (ip, &key))
+  if (get_icmp_o2i_ed_key (ip, &key))
     {
       b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL];
       next = NAT44_ED_OUT2IN_NEXT_DROP;