cnat: Fix conflicting rsession 09/35209/3
authorNathan Skrzypczak <nathan.skrzypczak@gmail.com>
Wed, 2 Feb 2022 18:31:43 +0000 (19:31 +0100)
committerBeno�t Ganne <bganne@cisco.com>
Fri, 18 Mar 2022 11:33:51 +0000 (11:33 +0000)
When dNAT-ing to a VIP, it can happen
that the return session conflicts with
another forward session than the one
we own.

This patchs adds a rsession_flags
CNAT_SESSION_RETRY_SNAT that makes cnat_session_create
search for a free src port to use for the
resulting return session.

It also makes forward & return session
share their fate in the session scanner.

Type: fix

Change-Id: Id0edf59abf8e5bc0c0d8941ba289c4563c77dee0
Signed-off-by: Nathan Skrzypczak <nathan.skrzypczak@gmail.com>
src/plugins/cnat/cnat_node.h
src/plugins/cnat/cnat_node_feature.c
src/plugins/cnat/cnat_session.c
src/plugins/cnat/cnat_session.h

index 246fdb8..80d803c 100644 (file)
@@ -846,11 +846,55 @@ cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
   cnat_bihash_kv_t rkey;
   cnat_session_t *rsession = (cnat_session_t *) & rkey;
   cnat_bihash_kv_t *bkey = (cnat_bihash_kv_t *) session;
-  cnat_bihash_kv_t rvalue;
-  int rv;
+  int rv, n_retries = 0;
+  static u32 sport_seed = 0;
 
   session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
-  cnat_bihash_add_del (&cnat_session_db, bkey, 1);
+
+  /* First create the return session */
+  ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
+                    &session->value.cs_ip[VLIB_TX]);
+  ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
+                    &session->value.cs_ip[VLIB_RX]);
+  rsession->key.cs_proto = session->key.cs_proto;
+  rsession->key.cs_loc = rsession_location;
+  rsession->key.__cs_pad = 0;
+  rsession->key.cs_af = ctx->af;
+  rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
+  rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
+
+  ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
+                    &session->key.cs_ip[VLIB_TX]);
+  ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
+                    &session->key.cs_ip[VLIB_RX]);
+  rsession->value.cs_ts_index = session->value.cs_ts_index;
+  rsession->value.cs_lbi = INDEX_INVALID;
+  rsession->value.flags = rsession_flags | CNAT_SESSION_IS_RETURN;
+  rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
+  rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
+
+retry_add_ression:
+  rv = cnat_bihash_add_del (&cnat_session_db, &rkey,
+                           2 /* add but don't overwrite */);
+  if (rv)
+    {
+      if (!(rsession_flags & CNAT_SESSION_RETRY_SNAT))
+       return;
+
+      /* return session add failed pick an new random src port */
+      rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX] =
+       random_u32 (&sport_seed);
+      if (n_retries++ < 100)
+       goto retry_add_ression;
+      else
+       {
+         clib_warning ("Could not find a free port after 100 tries");
+         /* translate this packet, but don't create state */
+         return;
+       }
+    }
+
+  cnat_bihash_add_del (&cnat_session_db, bkey, 1 /* add */);
 
   if (!(rsession_flags & CNAT_SESSION_FLAG_NO_CLIENT))
     {
@@ -894,39 +938,6 @@ cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
        }
     }
 
-  /* create the reverse flow key */
-  ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
-                    &session->value.cs_ip[VLIB_TX]);
-  ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
-                    &session->value.cs_ip[VLIB_RX]);
-  rsession->key.cs_proto = session->key.cs_proto;
-  rsession->key.cs_loc = rsession_location;
-  rsession->key.__cs_pad = 0;
-  rsession->key.cs_af = ctx->af;
-  rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
-  rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
-
-  /* First search for existing reverse session */
-  rv = cnat_bihash_search_i2 (&cnat_session_db, &rkey, &rvalue);
-  if (!rv)
-    {
-      /* Reverse session already exists
-         cleanup before creating for refcnts */
-      cnat_session_t *found_rsession = (cnat_session_t *) & rvalue;
-      cnat_session_free (found_rsession);
-    }
-  /* add the reverse flow */
-  ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
-                    &session->key.cs_ip[VLIB_TX]);
-  ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
-                    &session->key.cs_ip[VLIB_RX]);
-  rsession->value.cs_ts_index = session->value.cs_ts_index;
-  rsession->value.cs_lbi = INDEX_INVALID;
-  rsession->value.flags = rsession_flags | CNAT_SESSION_IS_RETURN;
-  rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
-  rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
-
-  cnat_bihash_add_del (&cnat_session_db, &rkey, 1);
 }
 
 always_inline uword
index aced4cd..76aa893 100644 (file)
@@ -321,7 +321,8 @@ cnat_output_feature_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
 
       trace_flags |= CNAT_TRACE_SESSION_CREATED;
       cnat_session_create (session, ctx, CNAT_LOCATION_INPUT,
-                          CNAT_SESSION_FLAG_NO_CLIENT);
+                          CNAT_SESSION_FLAG_NO_CLIENT |
+                            CNAT_SESSION_RETRY_SNAT);
     }
 
   if (AF_IP4 == ctx->af)
index 216d257..fa04de6 100644 (file)
@@ -172,15 +172,43 @@ cnat_session_purge (void)
   return (0);
 }
 
+void
+cnat_reverse_session_free (cnat_session_t *session)
+{
+  cnat_bihash_kv_t bkey, bvalue;
+  cnat_session_t *rsession = (cnat_session_t *) &bkey;
+  int rv;
+
+  ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
+                    &session->value.cs_ip[VLIB_TX]);
+  ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
+                    &session->value.cs_ip[VLIB_RX]);
+  rsession->key.cs_proto = session->key.cs_proto;
+  rsession->key.cs_loc = session->key.cs_loc == CNAT_LOCATION_OUTPUT ?
+                          CNAT_LOCATION_INPUT :
+                          CNAT_LOCATION_OUTPUT;
+  rsession->key.__cs_pad = 0;
+  rsession->key.cs_af = session->key.cs_af;
+  rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
+  rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
+
+  rv = cnat_bihash_search_i2 (&cnat_session_db, &bkey, &bvalue);
+  if (!rv)
+    {
+      /* other session is in bihash */
+      cnat_session_t *rsession = (cnat_session_t *) &bvalue;
+      cnat_session_free (rsession);
+    }
+}
+
 u64
 cnat_session_scan (vlib_main_t * vm, f64 start_time, int i)
 {
   BVT (clib_bihash) * h = &cnat_session_db;
   int j, k;
 
-  /* Don't scan the l2 fib if it hasn't been instantiated yet */
   if (alloc_arena (h) == 0)
-    return 0.0;
+    return 0;
 
   for ( /* caller saves starting point */ ; i < h->nbuckets; i++)
     {
@@ -219,6 +247,9 @@ cnat_session_scan (vlib_main_t * vm, f64 start_time, int i)
                  cnat_timestamp_exp (session->value.cs_ts_index))
                {
                  /* age it */
+                 cnat_reverse_session_free (session);
+                 /* this should be last as deleting the session memset it to
+                  * 0xff */
                  cnat_session_free (session);
 
                  /*
index 072bb10..a0a28c9 100644 (file)
@@ -129,6 +129,11 @@ typedef enum cnat_session_flag_t_
 
   /* Debug flag marking return sessions */
   CNAT_SESSION_IS_RETURN = (1 << 4),
+
+  /** On conflicts when adding the return session, try to sNAT the
+   * forward session, and dNAT the return session with a random port */
+  CNAT_SESSION_RETRY_SNAT = (1 << 5),
+
 } cnat_session_flag_t;
 
 typedef enum cnat_session_location_t_