cnat: Fix session flag initialization
[vpp.git] / src / plugins / cnat / cnat_node.h
index 2e3b0e0..246fdb8 100644 (file)
 #define __CNAT_NODE_H__
 
 #include <vlibmemory/api.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+
 #include <cnat/cnat_session.h>
 #include <cnat/cnat_client.h>
+#include <cnat/cnat_inline.h>
+#include <cnat/cnat_translation.h>
+
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
 
 typedef uword (*cnat_node_sub_t) (vlib_main_t * vm,
                                  vlib_node_runtime_t * node,
@@ -26,6 +34,77 @@ typedef uword (*cnat_node_sub_t) (vlib_main_t * vm,
                                  cnat_node_ctx_t * ctx, int rv,
                                  cnat_session_t * session);
 
+typedef struct cnat_trace_element_t_
+{
+  cnat_session_t session;
+  cnat_translation_t tr;
+  u32 sw_if_index[VLIB_N_RX_TX];
+  u32 snat_policy_result;
+  u8 flags;
+} cnat_trace_element_t;
+
+typedef enum cnat_trace_element_flag_t_
+{
+  CNAT_TRACE_SESSION_FOUND = (1 << 0),
+  CNAT_TRACE_SESSION_CREATED = (1 << 1),
+  CNAT_TRACE_TRANSLATION_FOUND = (1 << 2),
+  CNAT_TRACE_NO_NAT = (1 << 3),
+} cnat_trace_element_flag_t;
+
+static_always_inline void
+cnat_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b,
+               cnat_session_t *session, const cnat_translation_t *ct,
+               u8 flags)
+{
+  cnat_trace_element_t *t;
+  if (NULL != ct)
+    flags |= CNAT_TRACE_TRANSLATION_FOUND;
+
+  t = vlib_add_trace (vm, node, b, sizeof (*t));
+  t->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_RX];
+  t->sw_if_index[VLIB_TX] = vnet_buffer (b)->sw_if_index[VLIB_TX];
+
+  if (flags & (CNAT_TRACE_SESSION_FOUND | CNAT_TRACE_SESSION_CREATED))
+    clib_memcpy (&t->session, session, sizeof (t->session));
+  if (flags & CNAT_TRACE_TRANSLATION_FOUND)
+    clib_memcpy (&t->tr, ct, sizeof (cnat_translation_t));
+  t->flags = flags;
+}
+
+static u8 *
+format_cnat_trace (u8 *s, va_list *args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  cnat_trace_element_t *t = va_arg (*args, cnat_trace_element_t *);
+  u32 indent = format_get_indent (s);
+  vnet_main_t *vnm = vnet_get_main ();
+
+  if (t->flags & CNAT_TRACE_SESSION_CREATED)
+    s = format (s, "created session");
+  else if (t->flags & CNAT_TRACE_SESSION_FOUND)
+    s = format (s, "found session");
+  else
+    s = format (s, "session not found");
+
+  if (t->flags & (CNAT_TRACE_NO_NAT))
+    s = format (s, " [policy:skip]");
+
+  s = format (s, "\n%Uin:%U out:%U ", format_white_space, indent,
+             format_vnet_sw_if_index_name, vnm, t->sw_if_index[VLIB_RX],
+             format_vnet_sw_if_index_name, vnm, t->sw_if_index[VLIB_TX]);
+
+  if (t->flags & (CNAT_TRACE_SESSION_CREATED | CNAT_TRACE_SESSION_FOUND))
+    s = format (s, "\n%U%U", format_white_space, indent, format_cnat_session,
+               &t->session, 1);
+
+  if (t->flags & CNAT_TRACE_TRANSLATION_FOUND)
+    s = format (s, "\n%Utranslation: %U", format_white_space, indent,
+               format_cnat_translation, &t->tr, 0);
+
+  return s;
+}
+
 static_always_inline u8
 icmp_type_is_error_message (u8 icmp_type)
 {
@@ -603,18 +682,25 @@ cnat_translation_ip6 (const cnat_session_t * session,
 }
 
 static_always_inline void
-cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
-                      clib_bihash_kv_40_48_t * bkey)
+cnat_session_make_key (vlib_buffer_t *b, ip_address_family_t af,
+                      cnat_session_location_t cs_loc, cnat_bihash_kv_t *bkey)
 {
   udp_header_t *udp;
   cnat_session_t *session = (cnat_session_t *) bkey;
+  u32 iph_offset = 0;
   session->key.cs_af = af;
-  session->key.__cs_pad[0] = 0;
-  session->key.__cs_pad[1] = 0;
+
+  session->key.cs_loc = cs_loc;
+  session->key.__cs_pad = 0;
+  if (cs_loc == CNAT_LOCATION_OUTPUT)
+    /* rewind buffer */
+    iph_offset = vnet_buffer (b)->ip.save_rewrite_length;
+
   if (AF_IP4 == af)
     {
       ip4_header_t *ip4;
-      ip4 = vlib_buffer_get_current (b);
+      ip4 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b) + iph_offset);
+
       if (PREDICT_FALSE (ip4->protocol == IP_PROTOCOL_ICMP))
        {
          icmp46_header_t *icmp = (icmp46_header_t *) (ip4 + 1);
@@ -645,7 +731,8 @@ cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
          else
            goto error;
        }
-      else
+      else if (ip4->protocol == IP_PROTOCOL_UDP ||
+              ip4->protocol == IP_PROTOCOL_TCP)
        {
          udp = (udp_header_t *) (ip4 + 1);
          ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
@@ -656,12 +743,13 @@ cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
          session->key.cs_port[VLIB_RX] = udp->src_port;
          session->key.cs_port[VLIB_TX] = udp->dst_port;
        }
-
+      else
+       goto error;
     }
   else
     {
       ip6_header_t *ip6;
-      ip6 = vlib_buffer_get_current (b);
+      ip6 = (ip6_header_t *) ((u8 *) vlib_buffer_get_current (b) + iph_offset);
       if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6))
        {
          icmp46_header_t *icmp = (icmp46_header_t *) (ip6 + 1);
@@ -692,7 +780,8 @@ cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
          else
            goto error;
        }
-      else
+      else if (ip6->protocol == IP_PROTOCOL_UDP ||
+              ip6->protocol == IP_PROTOCOL_TCP)
        {
          udp = (udp_header_t *) (ip6 + 1);
          ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
@@ -703,6 +792,8 @@ cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
          session->key.cs_port[VLIB_TX] = udp->dst_port;
          session->key.cs_proto = ip6->protocol;
        }
+      else
+       goto error;
     }
   return;
 
@@ -712,68 +803,96 @@ error:
   return;
 }
 
+static_always_inline cnat_ep_trk_t *
+cnat_load_balance (const cnat_translation_t *ct, ip_address_family_t af,
+                  ip4_header_t *ip4, ip6_header_t *ip6, u32 *dpoi_index)
+{
+  cnat_main_t *cm = &cnat_main;
+  const load_balance_t *lb0;
+  const dpo_id_t *dpo0;
+  u32 hash_c0, bucket0;
+
+  lb0 = load_balance_get (ct->ct_lb.dpoi_index);
+  if (PREDICT_FALSE (!lb0->lb_n_buckets))
+    return (NULL);
+
+  /* session table miss */
+  hash_c0 = (AF_IP4 == af ? ip4_compute_flow_hash (ip4, lb0->lb_hash_config) :
+                           ip6_compute_flow_hash (ip6, lb0->lb_hash_config));
+
+  if (PREDICT_FALSE (ct->lb_type == CNAT_LB_MAGLEV))
+    bucket0 = ct->lb_maglev[hash_c0 % cm->maglev_len];
+  else
+    bucket0 = hash_c0 % lb0->lb_n_buckets;
+
+  dpo0 = load_balance_get_fwd_bucket (lb0, bucket0);
+
+  *dpoi_index = dpo0->dpoi_index;
+
+  return &ct->ct_active_paths[bucket0];
+}
+
 /**
  * Create NAT sessions
+ * rsession_location is the location the (return) session will be
+ * matched at
  */
-
 static_always_inline void
-cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx,
+cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
+                    cnat_session_location_t rsession_location,
                     u8 rsession_flags)
 {
   cnat_client_t *cc;
-  clib_bihash_kv_40_48_t rkey;
+  cnat_bihash_kv_t rkey;
   cnat_session_t *rsession = (cnat_session_t *) & rkey;
-  clib_bihash_kv_40_48_t *bkey = (clib_bihash_kv_40_48_t *) session;
-  clib_bihash_kv_40_48_t rvalue;
+  cnat_bihash_kv_t *bkey = (cnat_bihash_kv_t *) session;
+  cnat_bihash_kv_t rvalue;
   int rv;
 
   session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
-  clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1);
+  cnat_bihash_add_del (&cnat_session_db, bkey, 1);
 
-  /* is this the first time we've seen this source address */
-  cc = (AF_IP4 == ctx->af ?
-       cnat_client_ip4_find (&session->value.cs_ip[VLIB_RX].ip4) :
-       cnat_client_ip6_find (&session->value.cs_ip[VLIB_RX].ip6));
-
-  if (NULL == cc)
+  if (!(rsession_flags & CNAT_SESSION_FLAG_NO_CLIENT))
     {
-      u64 r0 = 17;
-      if (AF_IP4 == ctx->af)
-       r0 = (u64) session->value.cs_ip[VLIB_RX].ip4.as_u32;
-      else
-       {
-         r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[0];
-         r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[1];
-       }
+      /* is this the first time we've seen this source address */
+      cc = (AF_IP4 == ctx->af ?
+             cnat_client_ip4_find (&session->value.cs_ip[VLIB_RX].ip4) :
+             cnat_client_ip6_find (&session->value.cs_ip[VLIB_RX].ip6));
 
-      /* Rate limit */
-      if (!throttle_check (&cnat_throttle, ctx->thread_index, r0, ctx->seed))
+      if (NULL == cc)
        {
-         cnat_learn_arg_t l;
-         l.addr.version = ctx->af;
-         ip46_address_copy (&l.addr.ip, &session->value.cs_ip[VLIB_RX]);
+         ip_address_t addr;
+         uword *p;
+         u32 refcnt;
+
+         addr.version = ctx->af;
+         ip46_address_copy (&addr.ip, &session->value.cs_ip[VLIB_RX]);
+
+         /* Throttle */
+         clib_spinlock_lock (&cnat_client_db.throttle_lock);
+
+         p = hash_get_mem (cnat_client_db.throttle_mem, &addr);
+         if (p)
+           {
+             refcnt = p[0] + 1;
+             hash_set_mem (cnat_client_db.throttle_mem, &addr, refcnt);
+           }
+         else
+           hash_set_mem_alloc (&cnat_client_db.throttle_mem, &addr, 0);
+
+         clib_spinlock_unlock (&cnat_client_db.throttle_lock);
+
          /* fire client create to the main thread */
-         vl_api_rpc_call_main_thread (cnat_client_learn,
-                                      (u8 *) & l, sizeof (l));
+         if (!p)
+           vl_api_rpc_call_main_thread (cnat_client_learn, (u8 *) &addr,
+                                        sizeof (addr));
        }
       else
        {
-         /* Will still need to count those for session refcnt */
-         ip_address_t *addr;
-         clib_spinlock_lock (&cnat_client_db.throttle_pool_lock
-                             [ctx->thread_index]);
-         pool_get (cnat_client_db.throttle_pool[ctx->thread_index], addr);
-         addr->version = ctx->af;
-         ip46_address_copy (&addr->ip, &session->value.cs_ip[VLIB_RX]);
-         clib_spinlock_unlock (&cnat_client_db.throttle_pool_lock
-                               [ctx->thread_index]);
+         /* Refcount reverse session */
+         cnat_client_cnt_session (cc);
        }
     }
-  else
-    {
-      /* Refcount reverse session */
-      cnat_client_cnt_session (cc);
-    }
 
   /* create the reverse flow key */
   ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
@@ -781,14 +900,14 @@ cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx,
   ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
                     &session->value.cs_ip[VLIB_RX]);
   rsession->key.cs_proto = session->key.cs_proto;
-  rsession->key.__cs_pad[0] = 0;
-  rsession->key.__cs_pad[1] = 0;
+  rsession->key.cs_loc = rsession_location;
+  rsession->key.__cs_pad = 0;
   rsession->key.cs_af = ctx->af;
   rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
   rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
 
   /* First search for existing reverse session */
-  rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue);
+  rv = cnat_bihash_search_i2 (&cnat_session_db, &rkey, &rvalue);
   if (!rv)
     {
       /* Reverse session already exists
@@ -803,26 +922,24 @@ cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx,
                     &session->key.cs_ip[VLIB_RX]);
   rsession->value.cs_ts_index = session->value.cs_ts_index;
   rsession->value.cs_lbi = INDEX_INVALID;
-  rsession->value.flags = rsession_flags;
+  rsession->value.flags = rsession_flags | CNAT_SESSION_IS_RETURN;
   rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
   rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
 
-  clib_bihash_add_del_40_48 (&cnat_session_db, &rkey, 1);
+  cnat_bihash_add_del (&cnat_session_db, &rkey, 1);
 }
 
 always_inline uword
-cnat_node_inline (vlib_main_t * vm,
-                 vlib_node_runtime_t * node,
-                 vlib_frame_t * frame,
-                 cnat_node_sub_t cnat_sub,
-                 ip_address_family_t af, u8 do_trace)
+cnat_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                 vlib_frame_t *frame, cnat_node_sub_t cnat_sub,
+                 ip_address_family_t af, cnat_session_location_t cs_loc,
+                 u8 do_trace)
 {
   u32 n_left, *from, thread_index;
   vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
   vlib_buffer_t **b = bufs;
   u16 nexts[VLIB_FRAME_SIZE], *next;
   f64 now;
-  u64 seed;
 
   thread_index = vm->thread_index;
   from = vlib_frame_vector_args (frame);
@@ -830,26 +947,25 @@ cnat_node_inline (vlib_main_t * vm,
   next = nexts;
   vlib_get_buffers (vm, from, bufs, n_left);
   now = vlib_time_now (vm);
-  seed = throttle_seed (&cnat_throttle, thread_index, vlib_time_now (vm));
   cnat_session_t *session[4];
-  clib_bihash_kv_40_48_t bkey[4], bvalue[4];
+  cnat_bihash_kv_t bkey[4], bvalue[4];
   u64 hash[4];
   int rv[4];
 
-  cnat_node_ctx_t ctx = { now, seed, thread_index, af, do_trace };
+  cnat_node_ctx_t ctx = { now, thread_index, af, do_trace };
 
   if (n_left >= 8)
     {
       /* Kickstart our state */
-      cnat_session_make_key (b[3], af, &bkey[3]);
-      cnat_session_make_key (b[2], af, &bkey[2]);
-      cnat_session_make_key (b[1], af, &bkey[1]);
-      cnat_session_make_key (b[0], af, &bkey[0]);
-
-      hash[3] = clib_bihash_hash_40_48 (&bkey[3]);
-      hash[2] = clib_bihash_hash_40_48 (&bkey[2]);
-      hash[1] = clib_bihash_hash_40_48 (&bkey[1]);
-      hash[0] = clib_bihash_hash_40_48 (&bkey[0]);
+      cnat_session_make_key (b[3], af, cs_loc, &bkey[3]);
+      cnat_session_make_key (b[2], af, cs_loc, &bkey[2]);
+      cnat_session_make_key (b[1], af, cs_loc, &bkey[1]);
+      cnat_session_make_key (b[0], af, cs_loc, &bkey[0]);
+
+      hash[3] = cnat_bihash_hash (&bkey[3]);
+      hash[2] = cnat_bihash_hash (&bkey[2]);
+      hash[1] = cnat_bihash_hash (&bkey[1]);
+      hash[0] = cnat_bihash_hash (&bkey[0]);
     }
 
   while (n_left >= 8)
@@ -862,53 +978,45 @@ cnat_node_inline (vlib_main_t * vm,
          vlib_prefetch_buffer_header (b[8], LOAD);
        }
 
-      rv[3] =
-       clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
-                                                    hash[3], &bkey[3],
-                                                    &bvalue[3]);
+      rv[3] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[3], &bkey[3],
+                                         &bvalue[3]);
       session[3] = (cnat_session_t *) (rv[3] ? &bkey[3] : &bvalue[3]);
       next[3] = cnat_sub (vm, node, b[3], &ctx, rv[3], session[3]);
 
-      rv[2] =
-       clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
-                                                    hash[2], &bkey[2],
-                                                    &bvalue[2]);
+      rv[2] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[2], &bkey[2],
+                                         &bvalue[2]);
       session[2] = (cnat_session_t *) (rv[2] ? &bkey[2] : &bvalue[2]);
       next[2] = cnat_sub (vm, node, b[2], &ctx, rv[2], session[2]);
 
-      rv[1] =
-       clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
-                                                    hash[1], &bkey[1],
-                                                    &bvalue[1]);
+      rv[1] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[1], &bkey[1],
+                                         &bvalue[1]);
       session[1] = (cnat_session_t *) (rv[1] ? &bkey[1] : &bvalue[1]);
       next[1] = cnat_sub (vm, node, b[1], &ctx, rv[1], session[1]);
 
-      rv[0] =
-       clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
-                                                    hash[0], &bkey[0],
-                                                    &bvalue[0]);
+      rv[0] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[0], &bkey[0],
+                                         &bvalue[0]);
       session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
       next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
 
-      cnat_session_make_key (b[7], af, &bkey[3]);
-      cnat_session_make_key (b[6], af, &bkey[2]);
-      cnat_session_make_key (b[5], af, &bkey[1]);
-      cnat_session_make_key (b[4], af, &bkey[0]);
+      cnat_session_make_key (b[7], af, cs_loc, &bkey[3]);
+      cnat_session_make_key (b[6], af, cs_loc, &bkey[2]);
+      cnat_session_make_key (b[5], af, cs_loc, &bkey[1]);
+      cnat_session_make_key (b[4], af, cs_loc, &bkey[0]);
 
-      hash[3] = clib_bihash_hash_40_48 (&bkey[3]);
-      hash[2] = clib_bihash_hash_40_48 (&bkey[2]);
-      hash[1] = clib_bihash_hash_40_48 (&bkey[1]);
-      hash[0] = clib_bihash_hash_40_48 (&bkey[0]);
+      hash[3] = cnat_bihash_hash (&bkey[3]);
+      hash[2] = cnat_bihash_hash (&bkey[2]);
+      hash[1] = cnat_bihash_hash (&bkey[1]);
+      hash[0] = cnat_bihash_hash (&bkey[0]);
 
-      clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[3]);
-      clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[2]);
-      clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[1]);
-      clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[0]);
+      cnat_bihash_prefetch_bucket (&cnat_session_db, hash[3]);
+      cnat_bihash_prefetch_bucket (&cnat_session_db, hash[2]);
+      cnat_bihash_prefetch_bucket (&cnat_session_db, hash[1]);
+      cnat_bihash_prefetch_bucket (&cnat_session_db, hash[0]);
 
-      clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[3]);
-      clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[2]);
-      clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[1]);
-      clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[0]);
+      cnat_bihash_prefetch_data (&cnat_session_db, hash[3]);
+      cnat_bihash_prefetch_data (&cnat_session_db, hash[2]);
+      cnat_bihash_prefetch_data (&cnat_session_db, hash[1]);
+      cnat_bihash_prefetch_data (&cnat_session_db, hash[0]);
 
       b += 4;
       next += 4;
@@ -917,9 +1025,8 @@ cnat_node_inline (vlib_main_t * vm,
 
   while (n_left > 0)
     {
-      cnat_session_make_key (b[0], af, &bkey[0]);
-      rv[0] = clib_bihash_search_inline_2_40_48 (&cnat_session_db,
-                                                &bkey[0], &bvalue[0]);
+      cnat_session_make_key (b[0], af, cs_loc, &bkey[0]);
+      rv[0] = cnat_bihash_search_i2 (&cnat_session_db, &bkey[0], &bvalue[0]);
 
       session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
       next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);