cnat: Ip ICMP error support 43/28743/4
authorNathan Skrzypczak <nathan.skrzypczak@gmail.com>
Tue, 8 Sep 2020 13:16:08 +0000 (15:16 +0200)
committerDave Barach <openvpp@barachs.net>
Fri, 25 Sep 2020 19:55:39 +0000 (19:55 +0000)
Type: feature

Add CNAT translation for ICMP 4 & 6 errors
inner packet will be translated according
to existing sessions.

Change-Id: If118751988f44ef96b800878596296d1ab8ab6f8
Signed-off-by: Nathan Skrzypczak <nathan.skrzypczak@gmail.com>
src/plugins/cnat/cnat_client.c
src/plugins/cnat/cnat_node.h
src/plugins/cnat/cnat_node_snat.c
src/plugins/cnat/cnat_node_vip.c
src/plugins/cnat/cnat_session.h
src/plugins/cnat/test/test_cnat.py

index 10d9966..314000d 100644 (file)
@@ -61,12 +61,10 @@ cnat_client_free_by_ip (ip46_address_t * ip, u8 af)
   cnat_client_t *cc;
   cc = (AF_IP4 == af ?
        cnat_client_ip4_find (&ip->ip4) : cnat_client_ip6_find (&ip->ip6));
-  /* This can happen if the translation gets deleted
-     before the session */
-  if (NULL == cc)
-    return;
+  ASSERT (NULL != cc);
+
   if ((0 == cnat_client_uncnt_session (cc))
-      && (cc->flags & CNAT_FLAG_EXPIRES))
+      && (cc->flags & CNAT_FLAG_EXPIRES) && (0 == cc->tr_refcnt))
     cnat_client_destroy (cc);
 }
 
@@ -101,7 +99,6 @@ cnat_client_throttle_pool_process ()
       /* *INDENT-ON* */
       vec_foreach (ai, del_vec)
       {
-       /* Free session */
        addr = pool_elt_at_index (cnat_client_db.throttle_pool[i], *ai);
        pool_put (cnat_client_db.throttle_pool[i], addr);
       }
@@ -127,7 +124,7 @@ cnat_client_translation_deleted (index_t cci)
   ASSERT (!(cc->flags & CNAT_FLAG_EXPIRES));
   cc->tr_refcnt--;
 
-  if (0 == cc->tr_refcnt)
+  if (0 == cc->tr_refcnt && 0 == cc->session_refcnt)
     cnat_client_destroy (cc);
 }
 
@@ -171,6 +168,8 @@ cnat_client_add (const ip_address_t * ip, u8 flags)
   cci = cc - cnat_client_pool;
   cc->parent_cci = cci;
   cc->flags = flags;
+  cc->tr_refcnt = 0;
+  cc->session_refcnt = 0;
 
   ip_address_copy (&cc->cc_ip, ip);
   cnat_client_db_add (cc);
@@ -238,9 +237,16 @@ cnat_client_dpo_interpose (const dpo_id_t * original,
 int
 cnat_client_purge (void)
 {
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  int nthreads;
+  nthreads = tm->n_threads + 1;
   ASSERT (0 == hash_elts (cnat_client_db.crd_cip6));
   ASSERT (0 == hash_elts (cnat_client_db.crd_cip4));
   ASSERT (0 == pool_elts (cnat_client_pool));
+  for (int i = 0; i < nthreads; i++)
+    {
+      ASSERT (0 == pool_elts (cnat_client_db.throttle_pool[i]));
+    }
   return (0);
 }
 
index 58e81c1..a396796 100644 (file)
@@ -26,6 +26,42 @@ typedef uword (*cnat_node_sub_t) (vlib_main_t * vm,
                                  cnat_node_ctx_t * ctx, int rv,
                                  cnat_session_t * session);
 
+static_always_inline u8
+icmp_type_is_error_message (u8 icmp_type)
+{
+  switch (icmp_type)
+    {
+    case ICMP4_destination_unreachable:
+    case ICMP4_time_exceeded:
+    case ICMP4_parameter_problem:
+    case ICMP4_source_quench:
+    case ICMP4_redirect:
+    case ICMP4_alternate_host_address:
+      return 1;
+    }
+  return 0;
+}
+
+static_always_inline u8
+icmp6_type_is_error_message (u8 icmp_type)
+{
+  switch (icmp_type)
+    {
+    case ICMP6_destination_unreachable:
+    case ICMP6_time_exceeded:
+    case ICMP6_parameter_problem:
+      return 1;
+    }
+  return 0;
+}
+
+static_always_inline u8
+cmp_ip6_address (const ip6_address_t * a1, const ip6_address_t * a2)
+{
+  return ((a1->as_u64[0] == a2->as_u64[0])
+         && (a1->as_u64[1] == a2->as_u64[1]));
+}
+
 /**
  * Inline translation functions
  */
@@ -38,44 +74,52 @@ has_ip6_address (ip6_address_t * a)
 
 static_always_inline void
 cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp,
-                      u16 * checksum,
+                      ip_csum_t * sum,
                       ip4_address_t new_addr[VLIB_N_DIR],
                       u16 new_port[VLIB_N_DIR])
 {
   u16 old_port[VLIB_N_DIR];
   ip4_address_t old_addr[VLIB_N_DIR];
-  ip_csum_t sum;
+
+  /* Fastpath no checksum */
+  if (PREDICT_TRUE (0 == *sum))
+    {
+      udp->dst_port = new_port[VLIB_TX];
+      udp->src_port = new_port[VLIB_RX];
+      return;
+    }
 
   old_port[VLIB_TX] = udp->dst_port;
   old_port[VLIB_RX] = udp->src_port;
   old_addr[VLIB_TX] = ip4->dst_address;
   old_addr[VLIB_RX] = ip4->src_address;
 
-  sum = *checksum;
   if (new_addr[VLIB_TX].as_u32)
-    sum =
-      ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32,
-                     ip4_header_t, dst_address);
+    {
+      *sum =
+       ip_csum_update (*sum, old_addr[VLIB_TX].as_u32,
+                       new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
+    }
   if (new_port[VLIB_TX])
     {
       udp->dst_port = new_port[VLIB_TX];
-      sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
-                           ip4_header_t /* cheat */ ,
-                           length /* changed member */ );
+      *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
+                            ip4_header_t /* cheat */ ,
+                            length /* changed member */ );
     }
   if (new_addr[VLIB_RX].as_u32)
-    sum =
-      ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32,
-                     ip4_header_t, src_address);
-
+    {
+      *sum =
+       ip_csum_update (*sum, old_addr[VLIB_RX].as_u32,
+                       new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
+    }
   if (new_port[VLIB_RX])
     {
       udp->src_port = new_port[VLIB_RX];
-      sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
-                           ip4_header_t /* cheat */ ,
-                           length /* changed member */ );
+      *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
+                            ip4_header_t /* cheat */ ,
+                            length /* changed member */ );
     }
-  *checksum = ip_csum_fold (sum);
 }
 
 static_always_inline void
@@ -125,6 +169,94 @@ cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index)
     }
 }
 
+static_always_inline void
+cnat_translation_icmp4 (ip4_header_t * outer_ip4, udp_header_t * outer_udp,
+                       ip4_address_t outer_new_addr[VLIB_N_DIR],
+                       u16 outer_new_port[VLIB_N_DIR], u8 snat_outer_ip)
+{
+  icmp46_header_t *icmp = (icmp46_header_t *) outer_udp;
+  ip4_address_t new_addr[VLIB_N_DIR];
+  ip4_address_t old_addr[VLIB_N_DIR];
+  u16 new_port[VLIB_N_DIR];
+  u16 old_port[VLIB_N_DIR];
+  ip_csum_t sum, old_ip_sum, inner_l4_sum, inner_l4_old_sum;
+
+  if (!icmp_type_is_error_message (icmp->type))
+    return;
+
+  ip4_header_t *ip4 = (ip4_header_t *) (icmp + 2);
+  udp_header_t *udp = (udp_header_t *) (ip4 + 1);
+  tcp_header_t *tcp = (tcp_header_t *) udp;
+
+  /* Swap inner ports */
+  new_addr[VLIB_TX] = outer_new_addr[VLIB_RX];
+  new_addr[VLIB_RX] = outer_new_addr[VLIB_TX];
+  new_port[VLIB_TX] = outer_new_port[VLIB_RX];
+  new_port[VLIB_RX] = outer_new_port[VLIB_TX];
+
+  old_addr[VLIB_TX] = ip4->dst_address;
+  old_addr[VLIB_RX] = ip4->src_address;
+  old_port[VLIB_RX] = udp->src_port;
+  old_port[VLIB_TX] = udp->dst_port;
+
+  sum = icmp->checksum;
+  old_ip_sum = ip4->checksum;
+
+  /* translate outer ip. */
+  if (!snat_outer_ip)
+    outer_new_addr[VLIB_RX] = outer_ip4->src_address;
+  cnat_ip4_translate_l3 (outer_ip4, outer_new_addr);
+
+  if (ip4->protocol == IP_PROTOCOL_TCP)
+    {
+      inner_l4_old_sum = inner_l4_sum = tcp->checksum;
+      cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
+      tcp->checksum = ip_csum_fold (inner_l4_sum);
+    }
+  else if (ip4->protocol == IP_PROTOCOL_UDP)
+    {
+      inner_l4_old_sum = inner_l4_sum = udp->checksum;
+      cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
+      udp->checksum = ip_csum_fold (inner_l4_sum);
+    }
+  else
+    return;
+
+  /* UDP/TCP checksum changed */
+  sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
+                       ip4_header_t, checksum);
+
+  /* UDP/TCP Ports changed */
+  if (old_port[VLIB_TX] && new_port[VLIB_TX])
+    sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
+                         ip4_header_t /* cheat */ ,
+                         length /* changed member */ );
+
+  if (old_port[VLIB_RX] && new_port[VLIB_RX])
+    sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
+                         ip4_header_t /* cheat */ ,
+                         length /* changed member */ );
+
+
+  cnat_ip4_translate_l3 (ip4, new_addr);
+  ip_csum_t new_ip_sum = ip4->checksum;
+  /* IP checksum changed */
+  sum = ip_csum_update (sum, old_ip_sum, new_ip_sum, ip4_header_t, checksum);
+
+  /* IP src/dst addr changed */
+  if (new_addr[VLIB_TX].as_u32)
+    sum =
+      ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32,
+                     ip4_header_t, dst_address);
+
+  if (new_addr[VLIB_RX].as_u32)
+    sum =
+      ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32,
+                     ip4_header_t, src_address);
+
+  icmp->checksum = ip_csum_fold (sum);
+}
+
 static_always_inline void
 cnat_translation_ip4 (const cnat_session_t * session,
                      ip4_header_t * ip4, udp_header_t * udp)
@@ -140,27 +272,26 @@ cnat_translation_ip4 (const cnat_session_t * session,
 
   if (ip4->protocol == IP_PROTOCOL_TCP)
     {
-      if (PREDICT_FALSE (tcp->checksum))
-       cnat_ip4_translate_l4 (ip4, udp, &tcp->checksum, new_addr, new_port);
-      else
-       {
-         udp->dst_port = new_port[VLIB_TX];
-         udp->src_port = new_port[VLIB_RX];
-       }
+      ip_csum_t sum = tcp->checksum;
+      cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
+      tcp->checksum = ip_csum_fold (sum);
+      cnat_ip4_translate_l3 (ip4, new_addr);
       cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
     }
   else if (ip4->protocol == IP_PROTOCOL_UDP)
     {
-      if (PREDICT_FALSE (udp->checksum))
-       cnat_ip4_translate_l4 (ip4, udp, &udp->checksum, new_addr, new_port);
-      else
-       {
-         udp->dst_port = new_port[VLIB_TX];
-         udp->src_port = new_port[VLIB_RX];
-       }
+      ip_csum_t sum = udp->checksum;
+      cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
+      udp->checksum = ip_csum_fold (sum);
+      cnat_ip4_translate_l3 (ip4, new_addr);
+    }
+  else if (ip4->protocol == IP_PROTOCOL_ICMP)
+    {
+      /* SNAT only if src_addr was translated */
+      u8 snat_outer_ip =
+       (ip4->src_address.as_u32 == session->key.cs_ip[VLIB_RX].ip4.as_u32);
+      cnat_translation_icmp4 (ip4, udp, new_addr, new_port, snat_outer_ip);
     }
-
-  cnat_ip4_translate_l3 (ip4, new_addr);
 }
 
 static_always_inline void
@@ -174,51 +305,163 @@ cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR])
 
 static_always_inline void
 cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp,
-                      u16 * checksum,
+                      ip_csum_t * sum,
                       ip6_address_t new_addr[VLIB_N_DIR],
                       u16 new_port[VLIB_N_DIR])
 {
   u16 old_port[VLIB_N_DIR];
   ip6_address_t old_addr[VLIB_N_DIR];
-  ip_csum_t sum;
+
+  /* Fastpath no checksum */
+  if (PREDICT_TRUE (0 == *sum))
+    {
+      udp->dst_port = new_port[VLIB_TX];
+      udp->src_port = new_port[VLIB_RX];
+      return;
+    }
 
   old_port[VLIB_TX] = udp->dst_port;
   old_port[VLIB_RX] = udp->src_port;
   ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
   ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
 
-  sum = *checksum;
   if (has_ip6_address (&new_addr[VLIB_TX]))
     {
-      sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
-      sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
-      sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
-      sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
+      *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]);
+      *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]);
+      *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[0]);
+      *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[1]);
     }
 
   if (new_port[VLIB_TX])
     {
       udp->dst_port = new_port[VLIB_TX];
-      sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
-                           ip4_header_t /* cheat */ ,
-                           length /* changed member */ );
+      *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
+                            ip4_header_t /* cheat */ ,
+                            length /* changed member */ );
     }
   if (has_ip6_address (&new_addr[VLIB_RX]))
     {
-      sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
-      sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
-      sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
-      sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
+      *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]);
+      *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]);
+      *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[0]);
+      *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[1]);
     }
 
   if (new_port[VLIB_RX])
     {
       udp->src_port = new_port[VLIB_RX];
-      sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
-                           ip4_header_t /* cheat */ ,
-                           length /* changed member */ );
+      *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
+                            ip4_header_t /* cheat */ ,
+                            length /* changed member */ );
     }
-  *checksum = ip_csum_fold (sum);
+}
+
+static_always_inline void
+cnat_translation_icmp6 (ip6_header_t * outer_ip6, udp_header_t * outer_udp,
+                       ip6_address_t outer_new_addr[VLIB_N_DIR],
+                       u16 outer_new_port[VLIB_N_DIR], u8 snat_outer_ip)
+{
+  icmp46_header_t *icmp = (icmp46_header_t *) outer_udp;
+  ip6_address_t new_addr[VLIB_N_DIR];
+  ip6_address_t old_addr[VLIB_N_DIR];
+  ip6_address_t outer_old_addr[VLIB_N_DIR];
+  u16 new_port[VLIB_N_DIR];
+  u16 old_port[VLIB_N_DIR];
+  ip_csum_t sum, inner_l4_sum, inner_l4_old_sum;
+
+  if (!icmp6_type_is_error_message (icmp->type))
+    return;
+
+  ip6_header_t *ip6 = (ip6_header_t *) (icmp + 2);
+  udp_header_t *udp = (udp_header_t *) (ip6 + 1);
+  tcp_header_t *tcp = (tcp_header_t *) udp;
+
+  /* Swap inner ports */
+  ip6_address_copy (&new_addr[VLIB_RX], &outer_new_addr[VLIB_TX]);
+  ip6_address_copy (&new_addr[VLIB_TX], &outer_new_addr[VLIB_RX]);
+  new_port[VLIB_TX] = outer_new_port[VLIB_RX];
+  new_port[VLIB_RX] = outer_new_port[VLIB_TX];
+
+  ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
+  ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
+  old_port[VLIB_RX] = udp->src_port;
+  old_port[VLIB_TX] = udp->dst_port;
+
+  sum = icmp->checksum;
+  /* Translate outer ip */
+  ip6_address_copy (&outer_old_addr[VLIB_TX], &outer_ip6->dst_address);
+  ip6_address_copy (&outer_old_addr[VLIB_RX], &outer_ip6->src_address);
+  if (!snat_outer_ip)
+    ip6_address_copy (&outer_new_addr[VLIB_RX], &outer_ip6->src_address);
+  cnat_ip6_translate_l3 (outer_ip6, outer_new_addr);
+  if (has_ip6_address (&outer_new_addr[VLIB_TX]))
+    {
+      sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]);
+      sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]);
+      sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]);
+      sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]);
+    }
+
+  if (has_ip6_address (&outer_new_addr[VLIB_RX]))
+    {
+      sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]);
+      sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]);
+      sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]);
+      sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
+    }
+
+  if (ip6->protocol == IP_PROTOCOL_TCP)
+    {
+      inner_l4_old_sum = inner_l4_sum = tcp->checksum;
+      cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
+      tcp->checksum = ip_csum_fold (inner_l4_sum);
+    }
+  else if (ip6->protocol == IP_PROTOCOL_UDP)
+    {
+      inner_l4_old_sum = inner_l4_sum = udp->checksum;
+      cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
+      udp->checksum = ip_csum_fold (inner_l4_sum);
+    }
+  else
+    return;
+
+  /* UDP/TCP checksum changed */
+  sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
+                       ip4_header_t /* cheat */ ,
+                       checksum);
+
+  /* UDP/TCP Ports changed */
+  if (old_port[VLIB_TX] && new_port[VLIB_TX])
+    sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
+                         ip4_header_t /* cheat */ ,
+                         length /* changed member */ );
+
+  if (old_port[VLIB_RX] && new_port[VLIB_RX])
+    sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
+                         ip4_header_t /* cheat */ ,
+                         length /* changed member */ );
+
+
+  cnat_ip6_translate_l3 (ip6, new_addr);
+  /* IP src/dst addr changed */
+  if (has_ip6_address (&new_addr[VLIB_TX]))
+    {
+      sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
+      sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
+      sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
+      sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
+    }
+
+  if (has_ip6_address (&new_addr[VLIB_RX]))
+    {
+      sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
+      sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
+      sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
+      sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
+    }
+
+  icmp->checksum = ip_csum_fold (sum);
 }
 
 static_always_inline void
@@ -236,27 +479,26 @@ cnat_translation_ip6 (const cnat_session_t * session,
 
   if (ip6->protocol == IP_PROTOCOL_TCP)
     {
-      if (PREDICT_FALSE (tcp->checksum))
-       cnat_ip6_translate_l4 (ip6, udp, &tcp->checksum, new_addr, new_port);
-      else
-       {
-         udp->dst_port = new_port[VLIB_TX];
-         udp->src_port = new_port[VLIB_RX];
-       }
+      ip_csum_t sum = tcp->checksum;
+      cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
+      tcp->checksum = ip_csum_fold (sum);
+      cnat_ip6_translate_l3 (ip6, new_addr);
       cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
     }
   else if (ip6->protocol == IP_PROTOCOL_UDP)
     {
-      if (PREDICT_FALSE (udp->checksum))
-       cnat_ip6_translate_l4 (ip6, udp, &udp->checksum, new_addr, new_port);
-      else
-       {
-         udp->dst_port = new_port[VLIB_TX];
-         udp->src_port = new_port[VLIB_RX];
-       }
+      ip_csum_t sum = udp->checksum;
+      cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
+      udp->checksum = ip_csum_fold (sum);
+      cnat_ip6_translate_l3 (ip6, new_addr);
+    }
+  else if (ip6->protocol == IP_PROTOCOL_ICMP6)
+    {
+      /* SNAT only if src_addr was translated */
+      u8 snat_outer_ip = cmp_ip6_address (&ip6->src_address,
+                                         &session->key.cs_ip[VLIB_RX].ip6);
+      cnat_translation_icmp6 (ip6, udp, new_addr, new_port, snat_outer_ip);
     }
-
-  cnat_ip6_translate_l3 (ip6, new_addr);
 }
 
 static_always_inline void
@@ -265,36 +507,80 @@ cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
 {
   udp_header_t *udp;
   cnat_session_t *session = (cnat_session_t *) bkey;
+  session->key.cs_af = af;
+  session->key.__cs_pad[0] = 0;
+  session->key.__cs_pad[1] = 0;
   if (AF_IP4 == af)
     {
       ip4_header_t *ip4;
       ip4 = vlib_buffer_get_current (b);
-      udp = (udp_header_t *) (ip4 + 1);
-      session->key.cs_af = AF_IP4;
-      session->key.__cs_pad[0] = 0;
-      session->key.__cs_pad[1] = 0;
+      if (PREDICT_FALSE (ip4->protocol == IP_PROTOCOL_ICMP))
+       {
+         icmp46_header_t *icmp = (icmp46_header_t *) (ip4 + 1);
+         if (!icmp_type_is_error_message (icmp->type))
+           goto error;
+         ip4 = (ip4_header_t *) (icmp + 2);    /* Use inner packet */
+         udp = (udp_header_t *) (ip4 + 1);
+         /* Swap dst & src for search as ICMP payload is reversed */
+         ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
+                               &ip4->dst_address);
+         ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
+                               &ip4->src_address);
+         session->key.cs_proto = ip4->protocol;
+         session->key.cs_port[VLIB_TX] = udp->src_port;
+         session->key.cs_port[VLIB_RX] = udp->dst_port;
+       }
+      else
+       {
+         udp = (udp_header_t *) (ip4 + 1);
+         ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
+                               &ip4->dst_address);
+         ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
+                               &ip4->src_address);
+         session->key.cs_proto = ip4->protocol;
+         session->key.cs_port[VLIB_RX] = udp->src_port;
+         session->key.cs_port[VLIB_TX] = udp->dst_port;
+       }
 
-      ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX], &ip4->dst_address);
-      ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX], &ip4->src_address);
-      session->key.cs_port[VLIB_RX] = udp->src_port;
-      session->key.cs_port[VLIB_TX] = udp->dst_port;
-      session->key.cs_proto = ip4->protocol;
     }
   else
     {
       ip6_header_t *ip6;
       ip6 = vlib_buffer_get_current (b);
-      udp = (udp_header_t *) (ip6 + 1);
-      session->key.cs_af = AF_IP6;
-      session->key.__cs_pad[0] = 0;
-      session->key.__cs_pad[1] = 0;
-
-      ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX], &ip6->dst_address);
-      ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX], &ip6->src_address);
-      session->key.cs_port[VLIB_RX] = udp->src_port;
-      session->key.cs_port[VLIB_TX] = udp->dst_port;
-      session->key.cs_proto = ip6->protocol;
+      if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6))
+       {
+         icmp46_header_t *icmp = (icmp46_header_t *) (ip6 + 1);
+         if (!icmp6_type_is_error_message (icmp->type))
+           goto error;
+         ip6 = (ip6_header_t *) (icmp + 2);    /* Use inner packet */
+         udp = (udp_header_t *) (ip6 + 1);
+         /* Swap dst & src for search as ICMP payload is reversed */
+         ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
+                               &ip6->dst_address);
+         ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
+                               &ip6->src_address);
+         session->key.cs_proto = ip6->protocol;
+         session->key.cs_port[VLIB_TX] = udp->src_port;
+         session->key.cs_port[VLIB_RX] = udp->dst_port;
+       }
+      else
+       {
+         udp = (udp_header_t *) (ip6 + 1);
+         ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
+                               &ip6->dst_address);
+         ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
+                               &ip6->src_address);
+         session->key.cs_port[VLIB_RX] = udp->src_port;
+         session->key.cs_port[VLIB_TX] = udp->dst_port;
+         session->key.cs_proto = ip6->protocol;
+       }
     }
+  return;
+
+error:
+  /* Ensure we dont find anything */
+  session->key.cs_proto = 0;
+  return;
 }
 
 /**
@@ -312,32 +598,6 @@ cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx,
   clib_bihash_kv_40_48_t rvalue;
   int rv;
 
-  /* create the reverse flow key */
-  ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
-                    &session->value.cs_ip[VLIB_TX]);
-  ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
-                    &session->value.cs_ip[VLIB_RX]);
-  rsession->key.cs_proto = session->key.cs_proto;
-  rsession->key.__cs_pad[0] = 0;
-  rsession->key.__cs_pad[1] = 0;
-  rsession->key.cs_af = ctx->af;
-  rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
-  rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
-
-  /* First search for existing reverse session */
-  rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue);
-  if (!rv)
-    {
-      /* Reverse session already exists
-         corresponding client should also exist
-         we only need to refcnt the timestamp */
-      cnat_session_t *found_rsession = (cnat_session_t *) & rvalue;
-      session->value.cs_ts_index = found_rsession->value.cs_ts_index;
-      cnat_timestamp_inc_refcnt (session->value.cs_ts_index);
-      clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1 /* is_add */ );
-      goto create_rsession;
-    }
-
   session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
   clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1);
 
@@ -382,10 +642,31 @@ cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx,
     }
   else
     {
+      /* Refcount reverse session */
       cnat_client_cnt_session (cc);
     }
 
-create_rsession:
+  /* create the reverse flow key */
+  ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
+                    &session->value.cs_ip[VLIB_TX]);
+  ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
+                    &session->value.cs_ip[VLIB_RX]);
+  rsession->key.cs_proto = session->key.cs_proto;
+  rsession->key.__cs_pad[0] = 0;
+  rsession->key.__cs_pad[1] = 0;
+  rsession->key.cs_af = ctx->af;
+  rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
+  rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
+
+  /* First search for existing reverse session */
+  rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue);
+  if (!rv)
+    {
+      /* Reverse session already exists
+         cleanup before creating for refcnts */
+      cnat_session_t *found_rsession = (cnat_session_t *) & rvalue;
+      cnat_session_free (found_rsession);
+    }
   /* add the reverse flow */
   ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
                     &session->key.cs_ip[VLIB_TX]);
index cc1421b..aaa9e16 100644 (file)
@@ -81,7 +81,8 @@ cnat_snat_inline (vlib_main_t * vm,
   vnet_feature_next (&arc_next0, b);
   next0 = arc_next0;
 
-  if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP)
+  if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP
+      && iproto != IP_PROTOCOL_ICMP && iproto != IP_PROTOCOL_ICMP6)
     {
       /* Dont translate */
       goto trace;
index 574b72f..10f228f 100644 (file)
@@ -95,7 +95,8 @@ cnat_vip_inline (vlib_main_t * vm,
 
   cc = cnat_client_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
 
-  if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP)
+  if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP
+      && iproto != IP_PROTOCOL_ICMP && iproto != IP_PROTOCOL_ICMP6)
     {
       /* Dont translate & follow the fib programming */
       next0 = cc->cc_parent.dpoi_next_node;
@@ -214,6 +215,7 @@ cnat_vip_inline (vlib_main_t * vm,
        }
       session->value.cs_lbi = dpo0->dpoi_index;
 
+      /* refcnt session in current client */
       cnat_client_cnt_session (cc);
       cnat_session_create (session, ctx, rsession_flags);
       created_session = 1;
@@ -222,7 +224,6 @@ cnat_vip_inline (vlib_main_t * vm,
       vnet_buffer (b)->ip.adj_index[VLIB_TX] = session->value.cs_lbi;
     }
 
-
   if (AF_IP4 == ctx->af)
     cnat_translation_ip4 (session, ip4, udp0);
   else
index 9e1e893..4699dcc 100644 (file)
@@ -146,6 +146,11 @@ extern u64 cnat_session_scan (vlib_main_t * vm, f64 start_time, int i);
  */
 extern int cnat_session_purge (void);
 
+/**
+ * Free a session & update refcounts
+ */
+extern void cnat_session_free (cnat_session_t * session);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 518d733..34cd8b5 100644 (file)
@@ -7,8 +7,11 @@ from vpp_ip import DpoProto
 
 from scapy.packet import Raw
 from scapy.layers.l2 import Ether
-from scapy.layers.inet import IP, UDP, TCP
-from scapy.layers.inet6 import IPv6
+from scapy.layers.inet import IP, UDP, TCP, ICMP
+from scapy.layers.inet import IPerror, TCPerror, UDPerror, ICMPerror
+from scapy.layers.inet6 import IPv6, IPerror6, ICMPv6DestUnreach
+
+import struct
 
 from ipaddress import ip_address, ip_network, \
     IPv4Address, IPv6Address, IPv4Network, IPv6Network
@@ -39,6 +42,10 @@ class Ep(object):
         return {'addr': self.ip,
                 'port': self.port}
 
+    @property
+    def isV6(self):
+        return ":" in self.ip
+
     def __str__(self):
         return ("%s:%d" % (self.ip, self.port))
 
@@ -180,10 +187,10 @@ class TestCNatTranslation(VppTestCase):
             i.admin_down()
         super(TestCNatTranslation, self).tearDown()
 
-    def cnat_create_translation(self, vip, nbr, isV6=False):
-        ip_v = "ip6" if isV6 else "ip4"
+    def cnat_create_translation(self, vip, nbr):
+        ip_v = "ip6" if vip.isV6 else "ip4"
         dep = Ep(getattr(self.pg1.remote_hosts[nbr], ip_v), 4000 + nbr)
-        sep = Ep("::", 0) if isV6 else Ep("0.0.0.0", 0)
+        sep = Ep("::", 0) if vip.isV6 else Ep("0.0.0.0", 0)
         t1 = VppCNatTranslation(
             self, vip.l4p, vip,
             [EpTuple(sep, dep), EpTuple(sep, dep)])
@@ -341,7 +348,7 @@ class TestCNatTranslation(VppTestCase):
 
         trs = []
         for nbr, vip in enumerate(vips):
-            trs.append(self.cnat_create_translation(vip, nbr, isV6=isV6))
+            trs.append(self.cnat_create_translation(vip, nbr))
 
         self.logger.info(self.vapi.cli("sh cnat client"))
         self.logger.info(self.vapi.cli("sh cnat translation"))
@@ -372,8 +379,10 @@ class TestCNatTranslation(VppTestCase):
             n_tries += 1
             sessions = self.vapi.cnat_session_dump()
             self.sleep(2)
+            print(self.vapi.cli("show cnat session verbose"))
 
         self.assertTrue(n_tries < 100)
+        self.vapi.cli("test cnat scanner off")
 
         #
         # load some flows again and purge
@@ -398,6 +407,109 @@ class TestCNatTranslation(VppTestCase):
         self.vapi.cnat_session_purge()
         self.assertFalse(self.vapi.cnat_session_dump())
 
+    def test_icmp(self):
+        vips = [
+            Ep("30.0.0.1", 5555),
+            Ep("30.0.0.2", 5554),
+            Ep("30.0.0.2", 5553, UDP),
+            Ep("30::1", 6666),
+            Ep("30::2", 5553, UDP),
+        ]
+        sport = 1234
+
+        self.pg0.generate_remote_hosts(len(vips))
+        self.pg0.configure_ipv6_neighbors()
+        self.pg0.configure_ipv4_neighbors()
+
+        self.pg1.generate_remote_hosts(len(vips))
+        self.pg1.configure_ipv6_neighbors()
+        self.pg1.configure_ipv4_neighbors()
+
+        self.vapi.cli("test cnat scanner off")
+        trs = []
+        for nbr, vip in enumerate(vips):
+            trs.append(self.cnat_create_translation(vip, nbr))
+
+        self.logger.info(self.vapi.cli("sh cnat client"))
+        self.logger.info(self.vapi.cli("sh cnat translation"))
+
+        for nbr, vip in enumerate(vips):
+            if vip.isV6:
+                client_addr = self.pg0.remote_hosts[0].ip6
+                remote_addr = self.pg1.remote_hosts[nbr].ip6
+                remote2_addr = self.pg2.remote_hosts[0].ip6
+            else:
+                client_addr = self.pg0.remote_hosts[0].ip4
+                remote_addr = self.pg1.remote_hosts[nbr].ip4
+                remote2_addr = self.pg2.remote_hosts[0].ip4
+            IP46 = IPv6 if vip.isV6 else IP
+            # from client to vip
+            p1 = (Ether(dst=self.pg0.local_mac,
+                        src=self.pg0.remote_hosts[0].mac) /
+                  IP46(src=client_addr, dst=vip.ip) /
+                  vip.l4p(sport=sport, dport=vip.port) /
+                  Raw())
+
+            rxs = self.send_and_expect(self.pg0,
+                                       p1 * N_PKTS,
+                                       self.pg1)
+
+            for rx in rxs:
+                self.assert_packet_checksums_valid(rx)
+                self.assertEqual(rx[IP46].dst, remote_addr)
+                self.assertEqual(rx[vip.l4p].dport, 4000 + nbr)
+                self.assertEqual(rx[IP46].src, client_addr)
+                self.assertEqual(rx[vip.l4p].sport, sport)
+
+            InnerIP = rxs[0][IP46]
+
+            ICMP46 = ICMPv6DestUnreach if vip.isV6 else ICMP
+            ICMPelem = ICMPv6DestUnreach(code=1) if vip.isV6 else ICMP(type=11)
+            # from vip to client, ICMP error
+            p1 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) /
+                  IP46(src=remote_addr, dst=client_addr) /
+                  ICMPelem / InnerIP)
+
+            rxs = self.send_and_expect(self.pg1,
+                                       p1 * N_PKTS,
+                                       self.pg0)
+
+            TCPUDPError = TCPerror if vip.l4p == TCP else UDPerror
+            IP46error = IPerror6 if vip.isV6 else IPerror
+            for rx in rxs:
+                self.assert_packet_checksums_valid(rx)
+                self.assertEqual(rx[IP46].src, vip.ip)
+                self.assertEqual(rx[ICMP46][IP46error].src, client_addr)
+                self.assertEqual(rx[ICMP46][IP46error].dst, vip.ip)
+                self.assertEqual(rx[ICMP46][IP46error]
+                                 [TCPUDPError].sport, sport)
+                self.assertEqual(rx[ICMP46][IP46error]
+                                 [TCPUDPError].dport, vip.port)
+
+            # from other remote to client, ICMP error
+            # outside shouldn't be NAT-ed
+            p1 = (Ether(dst=self.pg2.local_mac, src=self.pg2.remote_mac) /
+                  IP46(src=remote2_addr, dst=client_addr) /
+                  ICMPelem / InnerIP)
+
+            rxs = self.send_and_expect(self.pg1,
+                                       p1 * N_PKTS,
+                                       self.pg0)
+
+            TCPUDPError = TCPerror if vip.l4p == TCP else UDPerror
+            IP46error = IPerror6 if vip.isV6 else IPerror
+            for rx in rxs:
+                self.assert_packet_checksums_valid(rx)
+                self.assertEqual(rx[IP46].src, remote2_addr)
+                self.assertEqual(rx[ICMP46][IP46error].src, client_addr)
+                self.assertEqual(rx[ICMP46][IP46error].dst, vip.ip)
+                self.assertEqual(rx[ICMP46][IP46error]
+                                 [TCPUDPError].sport, sport)
+                self.assertEqual(rx[ICMP46][IP46error]
+                                 [TCPUDPError].dport, vip.port)
+
+        self.vapi.cnat_session_purge()
+
     def test_cnat6(self):
         # """ CNat Translation ipv6 """
         vips = [
@@ -478,7 +590,7 @@ class TestCNatSourceNAT(VppTestCase):
 
     def cnat_test_sourcenat(self, srcNatAddr, l4p=TCP, isV6=False):
         ip_v = "ip6" if isV6 else "ip4"
-        ip_class = IPv6 if isV6 else IP
+        IP46 = IPv6 if isV6 else IP
         sports = [1234, 1235, 1236]
         dports = [6661, 6662, 6663]
 
@@ -493,14 +605,17 @@ class TestCNatSourceNAT(VppTestCase):
         t1 = self.cnat_set_snat_address(srcNatAddr, self.pg0, isV6)
 
         for nbr, remote_host in enumerate(self.pg1.remote_hosts):
+            if isV6:
+                client_addr = self.pg0.remote_hosts[0].ip6
+                remote_addr = self.pg1.remote_hosts[nbr].ip6
+            else:
+                client_addr = self.pg0.remote_hosts[0].ip4
+                remote_addr = self.pg1.remote_hosts[nbr].ip4
             # from pods to outside network
             p1 = (
-                Ether(
-                    dst=self.pg0.local_mac,
-                    src=self.pg0.remote_hosts[0].mac) /
-                ip_class(
-                    src=getattr(self.pg0.remote_hosts[0], ip_v),
-                    dst=getattr(remote_host, ip_v)) /
+                Ether(dst=self.pg0.local_mac,
+                      src=self.pg0.remote_hosts[0].mac) /
+                IP46(src=client_addr, dst=remote_addr) /
                 l4p(sport=sports[nbr], dport=dports[nbr]) /
                 Raw())
 
@@ -510,21 +625,16 @@ class TestCNatSourceNAT(VppTestCase):
                 self.pg1)
             for rx in rxs:
                 self.assert_packet_checksums_valid(rx)
-                self.assertEqual(
-                    rx[ip_class].dst,
-                    getattr(remote_host, ip_v))
+                self.assertEqual(rx[IP46].dst, remote_addr)
                 self.assertEqual(rx[l4p].dport, dports[nbr])
-                self.assertEqual(
-                    rx[ip_class].src,
-                    srcNatAddr)
+                self.assertEqual(rx[IP46].src, srcNatAddr)
                 sport = rx[l4p].sport
 
             # from outside to pods
             p2 = (
-                Ether(
-                    dst=self.pg1.local_mac,
-                    src=self.pg1.remote_hosts[nbr].mac) /
-                ip_class(src=getattr(remote_host, ip_v), dst=srcNatAddr) /
+                Ether(dst=self.pg1.local_mac,
+                      src=self.pg1.remote_hosts[nbr].mac) /
+                IP46(src=remote_addr, dst=srcNatAddr) /
                 l4p(sport=dports[nbr], dport=sport) /
                 Raw())
 
@@ -535,18 +645,14 @@ class TestCNatSourceNAT(VppTestCase):
 
             for rx in rxs:
                 self.assert_packet_checksums_valid(rx)
-                self.assertEqual(
-                    rx[ip_class].dst,
-                    getattr(self.pg0.remote_hosts[0], ip_v))
+                self.assertEqual(rx[IP46].dst, client_addr)
                 self.assertEqual(rx[l4p].dport, sports[nbr])
                 self.assertEqual(rx[l4p].sport, dports[nbr])
-                self.assertEqual(
-                    rx[ip_class].src,
-                    getattr(remote_host, ip_v))
+                self.assertEqual(rx[IP46].src, remote_addr)
 
             # add remote host to exclude list
             subnet_mask = 100 if isV6 else 16
-            subnet = getattr(remote_host, ip_v) + "/" + str(subnet_mask)
+            subnet = "%s/%d" % (remote_addr, subnet_mask)
             exclude_subnet = ip_network(subnet, strict=False)
 
             t1.cnat_exclude_subnet(exclude_subnet)
@@ -558,13 +664,9 @@ class TestCNatSourceNAT(VppTestCase):
                 self.pg1)
             for rx in rxs:
                 self.assert_packet_checksums_valid(rx)
-                self.assertEqual(
-                    rx[ip_class].dst,
-                    getattr(remote_host, ip_v))
+                self.assertEqual(rx[IP46].dst, remote_addr)
                 self.assertEqual(rx[l4p].dport, dports[nbr])
-                self.assertEqual(
-                    rx[ip_class].src,
-                    getattr(self.pg0.remote_hosts[0], ip_v))
+                self.assertEqual(rx[IP46].src, client_addr)
 
             # remove remote host from exclude list
             t1.cnat_exclude_subnet(exclude_subnet, isAdd=False)
@@ -577,13 +679,9 @@ class TestCNatSourceNAT(VppTestCase):
 
             for rx in rxs:
                 self.assert_packet_checksums_valid(rx)
-                self.assertEqual(
-                    rx[ip_class].dst,
-                    getattr(remote_host, ip_v))
+                self.assertEqual(rx[IP46].dst, remote_addr)
                 self.assertEqual(rx[l4p].dport, dports[nbr])
-                self.assertEqual(
-                    rx[ip_class].src,
-                    srcNatAddr)
+                self.assertEqual(rx[IP46].src, srcNatAddr)
 
     def test_cnat6_sourcenat(self):
         # """ CNat Source Nat ipv6 """