cnat: Destination based NAT 62/27162/27
authorNeale Ranns <nranns@cisco.com>
Tue, 19 May 2020 07:17:19 +0000 (07:17 +0000)
committerAndrew Yourtchenko <ayourtch@gmail.com>
Mon, 31 Aug 2020 09:23:32 +0000 (09:23 +0000)
Type: feature

Signed-off-by: Neale Ranns <nranns@cisco.com>
Change-Id: I64a99a4fbc674212944247793fd5c1fb701408cb

25 files changed:
MAINTAINERS
src/plugins/cnat/CMakeLists.txt [new file with mode: 0644]
src/plugins/cnat/FEATURE.yaml [new file with mode: 0644]
src/plugins/cnat/bihash_40_48.h [new file with mode: 0644]
src/plugins/cnat/cnat.api [new file with mode: 0644]
src/plugins/cnat/cnat.rst [new file with mode: 0644]
src/plugins/cnat/cnat_api.c [new file with mode: 0644]
src/plugins/cnat/cnat_client.c [new file with mode: 0644]
src/plugins/cnat/cnat_client.h [new file with mode: 0644]
src/plugins/cnat/cnat_error.def [new file with mode: 0644]
src/plugins/cnat/cnat_node.h [new file with mode: 0644]
src/plugins/cnat/cnat_node_snat.c [new file with mode: 0644]
src/plugins/cnat/cnat_node_vip.c [new file with mode: 0644]
src/plugins/cnat/cnat_scanner.c [new file with mode: 0644]
src/plugins/cnat/cnat_session.c [new file with mode: 0644]
src/plugins/cnat/cnat_session.h [new file with mode: 0644]
src/plugins/cnat/cnat_snat.c [new file with mode: 0644]
src/plugins/cnat/cnat_snat.h [new file with mode: 0644]
src/plugins/cnat/cnat_translation.c [new file with mode: 0644]
src/plugins/cnat/cnat_translation.h [new file with mode: 0644]
src/plugins/cnat/cnat_types.c [new file with mode: 0644]
src/plugins/cnat/cnat_types.h [new file with mode: 0644]
src/plugins/cnat/test/test_cnat.py [new file with mode: 0644]
src/vnet/ip/ip_types.c
src/vnet/lisp-cp/control.c

index 017557b..e928186 100644 (file)
@@ -682,6 +682,12 @@ I: urpf
 M:     Neale Ranns <nranns@cisco.com>
 F:     src/plugins/urpf
 
+Plugin - CNat
+I:     cnat
+M:     Nathan Skrzypczak <nathan.skrzypczak@gmail.com>
+M:     Neale Ranns <nranns@cisco.com>
+F:     src/plugins/cnat
+
 VPP Config Tooling
 I:     vpp_config
 M:     John DeNisco <jdenisco@cisco.com>
diff --git a/src/plugins/cnat/CMakeLists.txt b/src/plugins/cnat/CMakeLists.txt
new file mode 100644 (file)
index 0000000..b37b02c
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (c) 2018 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(cnat
+  SOURCES
+  cnat_api.c
+  cnat_client.c
+  cnat_node_snat.c
+  cnat_node_vip.c
+  cnat_scanner.c
+  cnat_session.c
+  cnat_translation.c
+  cnat_types.c
+  cnat_snat.c
+
+  API_FILES
+  cnat.api
+)
diff --git a/src/plugins/cnat/FEATURE.yaml b/src/plugins/cnat/FEATURE.yaml
new file mode 100644 (file)
index 0000000..9deda2e
--- /dev/null
@@ -0,0 +1,17 @@
+---
+name: Cloud NAT
+maintainer: Nathan Skrzypczak <nathan.skrzypczak@gmail.com>
+features:
+  - Destination based address/port translation
+  - Conditional sourceNATing based on prefix exclusions
+
+description: "This plugin is intended to complement the VPP's plugin_nat for
+              Cloud use-cases. It allows for source/destination address/port
+              translation based on multiple criterias. It is intended to be modular
+              enough so that one could write a use-case optimised translation function
+              without having to deal with actually re-writing packets or maintining
+              sessions.
+              This plugin supports multithreading. Workers share a unique bihash where
+              sessions are stored."
+state: development
+properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/cnat/bihash_40_48.h b/src/plugins/cnat/bihash_40_48.h
new file mode 100644 (file)
index 0000000..df345ce
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#undef BIHASH_TYPE
+#undef BIHASH_KVP_PER_PAGE
+#undef BIHASH_32_64_SVM
+#undef BIHASH_ENABLE_STATS
+#undef BIHASH_KVP_AT_BUCKET_LEVEL
+#undef BIHASH_LAZY_INSTANTIATE
+#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES
+
+#define BIHASH_TYPE _40_48
+#define BIHASH_KVP_PER_PAGE 2
+#define BIHASH_KVP_AT_BUCKET_LEVEL 1
+#define BIHASH_LAZY_INSTANTIATE 1
+#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 2
+
+#ifndef __included_bihash_40_48_h__
+#define __included_bihash_40_48_h__
+
+#include <vppinfra/crc32.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+
+typedef struct
+{
+  u64 key[5];
+  u64 value[6];
+} clib_bihash_kv_40_48_t;
+
+static inline int
+clib_bihash_is_free_40_48 (const clib_bihash_kv_40_48_t * v)
+{
+  /* Free values are clib_memset to 0xff, check a bit... */
+  if (v->key[0] == ~0ULL && v->value[0] == ~0ULL)
+    return 1;
+  return 0;
+}
+
+static inline u64
+clib_bihash_hash_40_48 (const clib_bihash_kv_40_48_t * v)
+{
+#ifdef clib_crc32c_uses_intrinsics
+  return clib_crc32c ((u8 *) v->key, 40);
+#else
+  u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4];
+  return clib_xxhash (tmp);
+#endif
+}
+
+static inline u8 *
+format_bihash_kvp_40_48 (u8 * s, va_list * args)
+{
+  clib_bihash_kv_40_48_t *v = va_arg (*args, clib_bihash_kv_40_48_t *);
+
+  s =
+    format (s,
+           "key %llu %llu %llu %llu %llu value %llu %llu %llu %llu %llu %u",
+           v->key[0], v->key[1], v->key[2], v->key[3], v->key[4],
+           v->value[0], v->value[1], v->value[2], v->value[3], v->value[4],
+           v->value[5]);
+  return s;
+}
+
+static inline int
+clib_bihash_key_compare_40_48 (u64 * a, u64 * b)
+{
+#if defined (CLIB_HAVE_VEC512)
+  u64x8 v;
+  v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
+  return (u64x8_is_zero_mask (v) & 0x1f) == 0;
+#elif defined (CLIB_HAVE_VEC256)
+  u64x4 v = { a[4] ^ b[4], 0, 0, 0 };
+  v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
+  return u64x4_is_all_zero (v);
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+  u64x2 v = { a[4] ^ b[4], 0 };
+  v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
+  v |= u64x2_load_unaligned (a + 2) ^ u64x2_load_unaligned (b + 2);
+  return u64x2_is_all_zero (v);
+#else
+  return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])
+         | (a[4] ^ b[4])) == 0;
+#endif
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_40_48_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat.api b/src/plugins/cnat/cnat.api
new file mode 100644 (file)
index 0000000..10af9b9
--- /dev/null
@@ -0,0 +1,136 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+    This file defines the vpp control-plane API messages
+    used to control the ABF plugin
+*/
+
+option version = "0.1.0";
+import "vnet/ip/ip_types.api";
+import "vnet/fib/fib_types.api";
+
+enum cnat_translation_flags:u8
+{
+  CNAT_TRANSLATION_ALLOC_PORT = 1,
+};
+
+typedef cnat_endpoint
+{
+  vl_api_address_t addr;
+  u16 port;
+};
+
+typedef cnat_endpoint_tuple
+{
+  vl_api_cnat_endpoint_t dst_ep;
+  vl_api_cnat_endpoint_t src_ep;
+};
+
+typedef cnat_translation
+{
+  vl_api_cnat_endpoint_t vip;
+  u32 id;
+  vl_api_ip_proto_t ip_proto;
+  u8 is_real_ip;
+  u8 flags;
+  u8 n_paths;
+  vl_api_cnat_endpoint_tuple_t paths[n_paths];
+};
+
+define cnat_translation_update
+{
+  u32 client_index;
+  u32 context;
+  vl_api_cnat_translation_t translation;
+};
+
+define cnat_translation_update_reply
+{
+  u32 context;
+  i32 retval;
+  u32 id;
+};
+
+autoreply define cnat_translation_del
+{
+  u32 client_index;
+  u32 context;
+  u32 id;
+};
+
+define cnat_translation_details
+{
+  u32 context;
+  vl_api_cnat_translation_t translation;
+};
+
+define cnat_translation_dump
+{
+  u32 client_index;
+  u32 context;
+};
+
+autoreply define cnat_session_purge
+{
+  u32 client_index;
+  u32 context;
+};
+
+typedef cnat_session
+{
+  vl_api_cnat_endpoint_t src;
+  vl_api_cnat_endpoint_t dst;
+  vl_api_cnat_endpoint_t new;
+  vl_api_ip_proto_t ip_proto;
+  f64 timestamp;
+};
+
+define cnat_session_details
+{
+  u32 context;
+  vl_api_cnat_session_t session;
+};
+
+define cnat_session_dump
+{
+  u32 client_index;
+  u32 context;
+};
+
+autoreply define cnat_set_snat_addresses
+{
+  u32 client_index;
+  u32 context;
+  vl_api_ip4_address_t snat_ip4;
+  vl_api_ip6_address_t snat_ip6;
+};
+
+autoreply define cnat_add_del_snat_prefix
+{
+  u32 client_index;
+  u32 context;
+  u8 is_add;
+  vl_api_prefix_t prefix;
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat.rst b/src/plugins/cnat/cnat.rst
new file mode 100644 (file)
index 0000000..f1b3def
--- /dev/null
@@ -0,0 +1,81 @@
+.. _dev_cnat:
+
+.. toctree::
+
+Cloud NAT
+=========
+
+Overview
+________
+
+This plugin covers specific NAT use-cases that come mostly
+from the container networking world. On the contraty of the
+NAT concepts used for e.g. a home gateway, there is no notion
+of 'outside' and 'inside'. We handle Virtual (or Real) IPs and
+translations of the packets destined to them
+
+Terminology & Usage
+___________________
+
+Setting up the NAT will consist in the creation of a translation
+that has several backends. A translation is 3-tuple containing :
+a fully qualified IP address a port and a protocol. All packets
+destined to it (ip, port) will then choose one of the backends,
+and follow its rewrite rules.
+
+A backend consists of four rewrites components (source & destination
+address, source & destination port) that shall be applied to packets
+on the way in, and reverted on the way back.
+
+Backends are equally load-balanced with a flow hash. The choice
+of a backend for a flow will trigger the creation of a NAT session,
+that will store the packet rewrite to do and the one to undo
+until the flow is reset or a timeout is reached
+
+Translating Addresses
+---------------------
+
+In this example, all packets destined to 30.0.0.2:80 will be
+rewritten so that their destination IP is 20.0.0.1 and destination
+port 8080. Here 30.0.0.2 has to be a virtual IP, it cannot be
+assigned to an interface
+
+.. code-block:: console
+
+  cnat translation add proto TCP vip 30.0.0.2 80 to ->20.0.0.1 8080
+
+
+If 30.0.0.2 is the address of an interface, we can use the following
+to do the same translation, and additionnaly change the source.
+address with 1.2.3.4
+
+.. code-block:: console
+
+  cnat translation add proto TCP real 30.0.0.2 80 to 1.2.3.4->20.0.0.1 8080
+
+To show existing translations and sessions you can use
+
+.. code-block:: console
+
+  cnat show session verbose
+  cant show translation
+
+
+SourceNATing outgoing traffic
+-----------------------------
+
+A independant part of the plugin allows changing the source address
+of outgoing traffic on a per-interface basis.
+
+.. code-block:: console
+
+  cnat snat with 30::1
+  cnat snat exclude 20::/100
+  ex_ctl _calico_master cnat snat exclude 10::/100
+  ex_ctl _calico_master set interface feature tap0 ip6-cnat-snat arc ip6-unicast
+
+
+
+Extending the NAT
+_________________
+
diff --git a/src/plugins/cnat/cnat_api.c b/src/plugins/cnat/cnat_api.c
new file mode 100644 (file)
index 0000000..014f75c
--- /dev/null
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stddef.h>
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <cnat/cnat_translation.h>
+#include <cnat/cnat_session.h>
+#include <cnat/cnat_client.h>
+#include <cnat/cnat_snat.h>
+
+#include <vnet/ip/ip_types_api.h>
+
+#include <vpp/app/version.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <vnet/format_fns.h>
+#include <cnat/cnat.api_enum.h>
+#include <cnat/cnat.api_types.h>
+
+/**
+ * Base message ID fot the plugin
+ */
+static u32 cnat_base_msg_id;
+
+#define REPLY_MSG_ID_BASE cnat_base_msg_id
+
+#include <vlibapi/api_helper_macros.h>
+
+static void
+cnat_endpoint_decode (const vl_api_cnat_endpoint_t * in,
+                     cnat_endpoint_t * out)
+{
+  ip_address_decode2 (&in->addr, &out->ce_ip);
+  out->ce_port = clib_net_to_host_u16 (in->port);
+}
+
+static void
+cnat_endpoint_tuple_decode (const vl_api_cnat_endpoint_tuple_t * in,
+                           cnat_endpoint_tuple_t * out)
+{
+  cnat_endpoint_decode (&in->src_ep, &out->src_ep);
+  cnat_endpoint_decode (&in->dst_ep, &out->dst_ep);
+}
+
+static void
+cnat_endpoint_encode (const cnat_endpoint_t * in,
+                     vl_api_cnat_endpoint_t * out)
+{
+  ip_address_encode2 (&in->ce_ip, &out->addr);
+  out->port = clib_net_to_host_u16 (in->ce_port);
+}
+
+static void
+vl_api_cnat_translation_update_t_handler (vl_api_cnat_translation_update_t
+                                         * mp)
+{
+  vl_api_cnat_translation_update_reply_t *rmp;
+  cnat_endpoint_t vip;
+  cnat_endpoint_tuple_t *paths = NULL, *path;
+  ip_protocol_t ip_proto;
+  u32 id = ~0;
+  u8 flags;
+  int rv = 0;
+  u8 pi;
+
+  rv = ip_proto_decode (mp->translation.ip_proto, &ip_proto);
+
+  if (rv)
+    goto done;
+
+  vec_validate (paths, mp->translation.n_paths - 1);
+
+  for (pi = 0; pi < mp->translation.n_paths; pi++)
+    {
+      path = &paths[pi];
+      cnat_endpoint_tuple_decode (&mp->translation.paths[pi], path);
+    }
+  cnat_endpoint_decode (&mp->translation.vip, &vip);
+
+  flags = mp->translation.flags;
+  if (!mp->translation.is_real_ip)
+    flags |= CNAT_FLAG_EXCLUSIVE;
+  id = cnat_translation_update (&vip, ip_proto, paths, flags);
+
+  vec_free (paths);
+
+done:
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_CNAT_TRANSLATION_UPDATE_REPLY,
+  ({
+    rmp->id = htonl (id);
+  }));
+  /* *INDENT-ON* */
+}
+
+static void
+vl_api_cnat_translation_del_t_handler (vl_api_cnat_translation_del_t * mp)
+{
+  vl_api_cnat_translation_del_reply_t *rmp;
+  int rv;
+
+  rv = cnat_translation_delete (ntohl (mp->id));
+
+  REPLY_MACRO (VL_API_CNAT_TRANSLATION_DEL_REPLY);
+}
+
+typedef struct cnat_dump_walk_ctx_t_
+{
+  vl_api_registration_t *rp;
+  u32 context;
+} cnat_dump_walk_ctx_t;
+
+static walk_rc_t
+cnat_translation_send_details (u32 cti, void *args)
+{
+  vl_api_cnat_translation_details_t *mp;
+  cnat_dump_walk_ctx_t *ctx;
+  cnat_ep_trk_t *trk;
+  vl_api_cnat_endpoint_tuple_t *path;
+  size_t msg_size;
+  cnat_translation_t *ct;
+  u8 n_paths;
+
+  ctx = args;
+  ct = cnat_translation_get (cti);
+  n_paths = vec_len (ct->ct_paths);
+  msg_size = sizeof (*mp) + sizeof (mp->translation.paths[0]) * n_paths;
+
+  mp = vl_msg_api_alloc_zero (msg_size);
+  mp->_vl_msg_id = ntohs (VL_API_CNAT_TRANSLATION_DETAILS + cnat_base_msg_id);
+
+  /* fill in the message */
+  mp->context = ctx->context;
+  mp->translation.n_paths = n_paths;
+  mp->translation.id = htonl (cti);
+  cnat_endpoint_encode (&ct->ct_vip, &mp->translation.vip);
+  mp->translation.ip_proto = ip_proto_encode (ct->ct_proto);
+
+  path = mp->translation.paths;
+  vec_foreach (trk, ct->ct_paths)
+  {
+    cnat_endpoint_encode (&trk->ct_ep[VLIB_TX], &path->dst_ep);
+    cnat_endpoint_encode (&trk->ct_ep[VLIB_RX], &path->src_ep);
+    path++;
+  }
+
+  vl_api_send_msg (ctx->rp, (u8 *) mp);
+
+  return (WALK_CONTINUE);
+}
+
+static void
+vl_api_cnat_translation_dump_t_handler (vl_api_cnat_translation_dump_t * mp)
+{
+  vl_api_registration_t *rp;
+
+  rp = vl_api_client_index_to_registration (mp->client_index);
+  if (rp == 0)
+    return;
+
+  cnat_dump_walk_ctx_t ctx = {
+    .rp = rp,
+    .context = mp->context,
+  };
+
+  cnat_translation_walk (cnat_translation_send_details, &ctx);
+}
+
+static void
+ip_address2_from_46 (const ip46_address_t * nh,
+                    ip_address_family_t af, ip_address_t * ip)
+{
+  ip_addr_46 (ip) = *nh;
+  ip_addr_version (ip) = af;
+}
+
+static walk_rc_t
+cnat_session_send_details (const cnat_session_t * session, void *args)
+{
+  vl_api_cnat_session_details_t *mp;
+  cnat_dump_walk_ctx_t *ctx;
+  cnat_endpoint_t ep;
+
+  ctx = args;
+
+  mp = vl_msg_api_alloc_zero (sizeof (*mp));
+  mp->_vl_msg_id = ntohs (VL_API_CNAT_SESSION_DETAILS + cnat_base_msg_id);
+
+  /* fill in the message */
+  mp->context = ctx->context;
+
+  ip_address2_from_46 (&session->value.cs_ip[VLIB_TX], session->key.cs_af,
+                      &ep.ce_ip);
+  ep.ce_port = clib_host_to_net_u16 (session->value.cs_port[VLIB_TX]);
+  cnat_endpoint_encode (&ep, &mp->session.new);
+
+  ip_address2_from_46 (&session->key.cs_ip[VLIB_RX], session->key.cs_af,
+                      &ep.ce_ip);
+  ep.ce_port = clib_host_to_net_u16 (session->key.cs_port[VLIB_RX]);
+  cnat_endpoint_encode (&ep, &mp->session.src);
+
+  ip_address2_from_46 (&session->key.cs_ip[VLIB_TX], session->key.cs_af,
+                      &ep.ce_ip);
+  ep.ce_port = clib_host_to_net_u16 (session->key.cs_port[VLIB_TX]);
+  cnat_endpoint_encode (&ep, &mp->session.dst);
+
+  mp->session.ip_proto = ip_proto_encode (session->key.cs_proto);
+
+  vl_api_send_msg (ctx->rp, (u8 *) mp);
+
+  return (WALK_CONTINUE);
+}
+
+static void
+vl_api_cnat_session_dump_t_handler (vl_api_cnat_session_dump_t * mp)
+{
+  vl_api_registration_t *rp;
+
+  rp = vl_api_client_index_to_registration (mp->client_index);
+  if (rp == 0)
+    return;
+
+  cnat_dump_walk_ctx_t ctx = {
+    .rp = rp,
+    .context = mp->context,
+  };
+
+  cnat_session_walk (cnat_session_send_details, &ctx);
+}
+
+static void
+vl_api_cnat_session_purge_t_handler (vl_api_cnat_session_purge_t * mp)
+{
+  vl_api_cnat_session_purge_reply_t *rmp;
+  int rv;
+
+  cnat_client_throttle_pool_process ();
+  rv = cnat_session_purge ();
+  rv |= cnat_translation_purge ();
+
+  REPLY_MACRO (VL_API_CNAT_SESSION_PURGE_REPLY);
+}
+
+static void
+vl_api_cnat_set_snat_addresses_t_handler (vl_api_cnat_set_snat_addresses_t
+                                         * mp)
+{
+  vl_api_cnat_set_snat_addresses_reply_t *rmp;
+  int rv = 0;
+
+  ip4_address_decode (mp->snat_ip4, &cnat_main.snat_ip4);
+  ip6_address_decode (mp->snat_ip6, &cnat_main.snat_ip6);
+
+  REPLY_MACRO (VL_API_CNAT_SET_SNAT_ADDRESSES_REPLY);
+}
+
+static void
+  vl_api_cnat_add_del_snat_prefix_t_handler
+  (vl_api_cnat_add_del_snat_prefix_t * mp)
+{
+  vl_api_cnat_add_del_snat_prefix_reply_t *rmp;
+  ip_prefix_t pfx;
+  int rv;
+
+  ip_prefix_decode2 (&mp->prefix, &pfx);
+  if (mp->is_add)
+    rv = cnat_add_snat_prefix (&pfx);
+  else
+    rv = cnat_del_snat_prefix (&pfx);
+
+  REPLY_MACRO (VL_API_CNAT_ADD_DEL_SNAT_PREFIX_REPLY);
+}
+
+#include <cnat/cnat.api.c>
+
+static clib_error_t *
+cnat_api_init (vlib_main_t * vm)
+{
+  /* Ask for a correctly-sized block of API message decode slots */
+  cnat_base_msg_id = setup_message_id_table ();
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (cnat_api_init);
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+    .version = VPP_BUILD_VER,
+    .description = "CNat Translate",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat_client.c b/src/plugins/cnat/cnat_client.c
new file mode 100644 (file)
index 0000000..10d9966
--- /dev/null
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <cnat/cnat_client.h>
+#include <cnat/cnat_translation.h>
+
+cnat_client_t *cnat_client_pool;
+
+cnat_client_db_t cnat_client_db;
+
+dpo_type_t cnat_client_dpo;
+
+static_always_inline u8
+cnat_client_is_clone (cnat_client_t * cc)
+{
+  return (FIB_NODE_INDEX_INVALID == cc->cc_fei);
+}
+
+static void
+cnat_client_db_remove (cnat_client_t * cc)
+{
+  if (ip_addr_version (&cc->cc_ip) == AF_IP4)
+    hash_unset (cnat_client_db.crd_cip4, ip_addr_v4 (&cc->cc_ip).as_u32);
+  else
+    hash_unset_mem_free (&cnat_client_db.crd_cip6, &ip_addr_v6 (&cc->cc_ip));
+}
+
+static void
+cnat_client_destroy (cnat_client_t * cc)
+{
+  ASSERT (!cnat_client_is_clone (cc));
+  if (!(cc->flags & CNAT_FLAG_EXCLUSIVE))
+    {
+      ASSERT (fib_entry_is_sourced (cc->cc_fei, cnat_fib_source));
+      fib_table_entry_delete_index (cc->cc_fei, cnat_fib_source);
+      ASSERT (!fib_entry_is_sourced (cc->cc_fei, cnat_fib_source));
+    }
+  cnat_client_db_remove (cc);
+  dpo_reset (&cc->cc_parent);
+  pool_put (cnat_client_pool, cc);
+}
+
+void
+cnat_client_free_by_ip (ip46_address_t * ip, u8 af)
+{
+  cnat_client_t *cc;
+  cc = (AF_IP4 == af ?
+       cnat_client_ip4_find (&ip->ip4) : cnat_client_ip6_find (&ip->ip6));
+  /* This can happen if the translation gets deleted
+     before the session */
+  if (NULL == cc)
+    return;
+  if ((0 == cnat_client_uncnt_session (cc))
+      && (cc->flags & CNAT_FLAG_EXPIRES))
+    cnat_client_destroy (cc);
+}
+
+void
+cnat_client_throttle_pool_process ()
+{
+  /* This processes ips stored in the throttle pool
+     to update session refcounts
+     and should be called before cnat_client_free_by_ip */
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  cnat_client_t *cc;
+  int nthreads;
+  u32 *del_vec = NULL, *ai;
+  ip_address_t *addr;
+  nthreads = tm->n_threads + 1;
+  for (int i = 0; i < nthreads; i++)
+    {
+      vec_reset_length (del_vec);
+      clib_spinlock_lock (&cnat_client_db.throttle_pool_lock[i]);
+      /* *INDENT-OFF* */
+      pool_foreach(addr, cnat_client_db.throttle_pool[i], ({
+       cc = (AF_IP4 == addr->version ?
+             cnat_client_ip4_find (&ip_addr_v4(addr)) :
+             cnat_client_ip6_find (&ip_addr_v6(addr)));
+       /* Client might not already be created */
+       if (NULL != cc)
+         {
+           cnat_client_cnt_session (cc);
+           vec_add1(del_vec, addr - cnat_client_db.throttle_pool[i]);
+         }
+      }));
+      /* *INDENT-ON* */
+      vec_foreach (ai, del_vec)
+      {
+       /* Free session */
+       addr = pool_elt_at_index (cnat_client_db.throttle_pool[i], *ai);
+       pool_put (cnat_client_db.throttle_pool[i], addr);
+      }
+      clib_spinlock_unlock (&cnat_client_db.throttle_pool_lock[i]);
+    }
+}
+
+void
+cnat_client_translation_added (index_t cci)
+{
+  cnat_client_t *cc;
+  cc = cnat_client_get (cci);
+  ASSERT (!(cc->flags & CNAT_FLAG_EXPIRES));
+  cc->tr_refcnt++;
+}
+
+void
+cnat_client_translation_deleted (index_t cci)
+{
+  cnat_client_t *cc;
+
+  cc = cnat_client_get (cci);
+  ASSERT (!(cc->flags & CNAT_FLAG_EXPIRES));
+  cc->tr_refcnt--;
+
+  if (0 == cc->tr_refcnt)
+    cnat_client_destroy (cc);
+}
+
+static void
+cnat_client_db_add (cnat_client_t * cc)
+{
+  index_t cci;
+
+  cci = cc - cnat_client_pool;
+
+  if (ip_addr_version (&cc->cc_ip) == AF_IP4)
+    hash_set (cnat_client_db.crd_cip4, ip_addr_v4 (&cc->cc_ip).as_u32, cci);
+  else
+    hash_set_mem_alloc (&cnat_client_db.crd_cip6,
+                       &ip_addr_v6 (&cc->cc_ip), cci);
+}
+
+
+index_t
+cnat_client_add (const ip_address_t * ip, u8 flags)
+{
+  cnat_client_t *cc;
+  dpo_id_t tmp = DPO_INVALID;
+  fib_node_index_t fei;
+  dpo_proto_t dproto;
+  fib_prefix_t pfx;
+  index_t cci;
+  u32 fib_flags;
+
+  /* check again if we need this client */
+  cc = (AF_IP4 == ip->version ?
+       cnat_client_ip4_find (&ip->ip.ip4) :
+       cnat_client_ip6_find (&ip->ip.ip6));
+
+  if (NULL != cc)
+    return (cc - cnat_client_pool);
+
+
+  pool_get_aligned (cnat_client_pool, cc, CLIB_CACHE_LINE_BYTES);
+  cc->cc_locks = 1;
+  cci = cc - cnat_client_pool;
+  cc->parent_cci = cci;
+  cc->flags = flags;
+
+  ip_address_copy (&cc->cc_ip, ip);
+  cnat_client_db_add (cc);
+
+  ip_address_to_fib_prefix (&cc->cc_ip, &pfx);
+
+  dproto = fib_proto_to_dpo (pfx.fp_proto);
+  dpo_set (&tmp, cnat_client_dpo, dproto, cci);
+  dpo_stack (cnat_client_dpo, dproto, &cc->cc_parent, drop_dpo_get (dproto));
+
+  fib_flags = FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT;
+  fib_flags |= (flags & CNAT_FLAG_EXCLUSIVE) ?
+    FIB_ENTRY_FLAG_EXCLUSIVE : FIB_ENTRY_FLAG_INTERPOSE;
+
+  fei = fib_table_entry_special_dpo_add (CNAT_FIB_TABLE,
+                                        &pfx, cnat_fib_source, fib_flags,
+                                        &tmp);
+
+  cc = pool_elt_at_index (cnat_client_pool, cci);
+  cc->cc_fei = fei;
+
+  return (cci);
+}
+
+void
+cnat_client_learn (const cnat_learn_arg_t * l)
+{
+  /* RPC call to add a client from the dataplane */
+  index_t cci;
+  cnat_client_t *cc;
+  cci = cnat_client_add (&l->addr, CNAT_FLAG_EXPIRES);
+  cc = pool_elt_at_index (cnat_client_pool, cci);
+  cnat_client_cnt_session (cc);
+  /* Process throttled calls if any */
+  cnat_client_throttle_pool_process ();
+}
+
+/**
+ * Interpose a policy DPO
+ */
+static void
+cnat_client_dpo_interpose (const dpo_id_t * original,
+                          const dpo_id_t * parent, dpo_id_t * clone)
+{
+  cnat_client_t *cc, *cc_clone;
+
+  pool_get_zero (cnat_client_pool, cc_clone);
+  cc = cnat_client_get (original->dpoi_index);
+
+  cc_clone->cc_fei = FIB_NODE_INDEX_INVALID;
+  cc_clone->parent_cci = cc->parent_cci;
+  cc_clone->flags = cc->flags;
+  ip_address_copy (&cc_clone->cc_ip, &cc->cc_ip);
+
+  /* stack the clone on the FIB provided parent */
+  dpo_stack (cnat_client_dpo, original->dpoi_proto, &cc_clone->cc_parent,
+            parent);
+
+  /* return the clone */
+  dpo_set (clone,
+          cnat_client_dpo,
+          original->dpoi_proto, cc_clone - cnat_client_pool);
+}
+
+int
+cnat_client_purge (void)
+{
+  ASSERT (0 == hash_elts (cnat_client_db.crd_cip6));
+  ASSERT (0 == hash_elts (cnat_client_db.crd_cip4));
+  ASSERT (0 == pool_elts (cnat_client_pool));
+  return (0);
+}
+
+u8 *
+format_cnat_client (u8 * s, va_list * args)
+{
+  index_t cci = va_arg (*args, index_t);
+  u32 indent = va_arg (*args, u32);
+
+  cnat_client_t *cc = pool_elt_at_index (cnat_client_pool, cci);
+
+  s = format (s, "[%d] cnat-client:[%U] tr:%d sess:%d", cci,
+             format_ip_address, &cc->cc_ip,
+             cc->tr_refcnt, cc->session_refcnt);
+  if (cc->flags & CNAT_FLAG_EXPIRES)
+    s = format (s, " expires");
+
+  if (cc->flags & CNAT_FLAG_EXCLUSIVE)
+    s = format (s, " exclusive");
+
+  if (cnat_client_is_clone (cc))
+    s = format (s, "\n%Uclone of [%d]\n%U%U",
+               format_white_space, indent + 2, cc->parent_cci,
+               format_white_space, indent + 2,
+               format_dpo_id, &cc->cc_parent, indent + 4);
+
+  return (s);
+}
+
+
+static clib_error_t *
+cnat_client_show (vlib_main_t * vm,
+                 unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  index_t cci;
+
+  cci = INDEX_INVALID;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "%d", &cci))
+       ;
+      else
+       return (clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input));
+    }
+
+  if (INDEX_INVALID == cci)
+    {
+      /* *INDENT-OFF* */
+      pool_foreach_index(cci, cnat_client_pool, ({
+        vlib_cli_output(vm, "%U", format_cnat_client, cci, 0);
+      }))
+      /* *INDENT-ON* */
+
+      vlib_cli_output (vm, "%d clients", pool_elts (cnat_client_pool));
+      vlib_cli_output (vm, "%d timestamps", pool_elts (cnat_timestamps));
+    }
+  else
+    {
+      vlib_cli_output (vm, "Invalid policy ID:%d", cci);
+    }
+
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cnat_client_show_cmd_node, static) = {
+  .path = "show cnat client",
+  .function = cnat_client_show,
+  .short_help = "show cnat client",
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+const static char *const cnat_client_dpo_ip4_nodes[] = {
+  "ip4-cnat-tx",
+  NULL,
+};
+
+const static char *const cnat_client_dpo_ip6_nodes[] = {
+  "ip6-cnat-tx",
+  NULL,
+};
+
+const static char *const *const cnat_client_dpo_nodes[DPO_PROTO_NUM] = {
+  [DPO_PROTO_IP4] = cnat_client_dpo_ip4_nodes,
+  [DPO_PROTO_IP6] = cnat_client_dpo_ip6_nodes,
+};
+
+static void
+cnat_client_dpo_lock (dpo_id_t * dpo)
+{
+  cnat_client_t *cc;
+
+  cc = cnat_client_get (dpo->dpoi_index);
+
+  cc->cc_locks++;
+}
+
+static void
+cnat_client_dpo_unlock (dpo_id_t * dpo)
+{
+  cnat_client_t *cc;
+
+  cc = cnat_client_get (dpo->dpoi_index);
+
+  cc->cc_locks--;
+
+  if (0 == cc->cc_locks)
+    {
+      ASSERT (cnat_client_is_clone (cc));
+      pool_put (cnat_client_pool, cc);
+    }
+}
+
+u8 *
+format_cnat_client_dpo (u8 * s, va_list * ap)
+{
+  index_t cci = va_arg (*ap, index_t);
+  u32 indent = va_arg (*ap, u32);
+
+  s = format (s, "%U", format_cnat_client, cci, indent);
+
+  return (s);
+}
+
+const static dpo_vft_t cnat_client_dpo_vft = {
+  .dv_lock = cnat_client_dpo_lock,
+  .dv_unlock = cnat_client_dpo_unlock,
+  .dv_format = format_cnat_client_dpo,
+  .dv_mk_interpose = cnat_client_dpo_interpose,
+};
+
+static clib_error_t *
+cnat_client_init (vlib_main_t * vm)
+{
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  int nthreads = tm->n_threads + 1;
+  int i;
+  cnat_client_dpo = dpo_register_new_type (&cnat_client_dpo_vft,
+                                          cnat_client_dpo_nodes);
+
+  cnat_client_db.crd_cip6 = hash_create_mem (0,
+                                            sizeof (ip6_address_t),
+                                            sizeof (uword));
+
+  vec_validate (cnat_client_db.throttle_pool, nthreads);
+  vec_validate (cnat_client_db.throttle_pool_lock, nthreads);
+  for (i = 0; i < nthreads; i++)
+    clib_spinlock_init (&cnat_client_db.throttle_pool_lock[i]);
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (cnat_client_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat_client.h b/src/plugins/cnat/cnat_client.h
new file mode 100644 (file)
index 0000000..9bc622d
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CNAT_CLIENT_H__
+#define __CNAT_CLIENT_H__
+
+#include <cnat/cnat_types.h>
+
+/**
+ * A client is a representation of an IP address behind the NAT.
+ * A client thus sends packet to a VIP.
+ * Clients are learned in the Data-plane when they send packets,
+ * but, since they make additions to the FIB they must be programmed
+ * in the main thread. They are aged out when they become idle.
+ *
+ * A client interposes in the FIB graph for the prefix corresponding
+ * to the client (e.g. client's-IP/32). As a result this client object
+ * is cloned as the interpose DPO. The clones are removed when the lock
+ * count drops to zero. The originals are removed when the client ages.
+ * At forwarding time the client preforms the reverse translation and
+ * then ships the packet to where the FIB would send it.
+ */
+typedef struct cnat_client_t_
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+  /**
+   * the client's IP address
+   */
+  ip_address_t cc_ip;
+
+  /**
+   * How to send packets to this client post translation
+   */
+  dpo_id_t cc_parent;
+
+  /**
+   * the FIB entry this client sources
+   */
+  fib_node_index_t cc_fei;
+
+  /**
+   * number of DPO locks
+   */
+  u32 cc_locks;
+
+  /**
+   * Translations refcount for cleanup
+   */
+  u32 tr_refcnt;
+
+  /**
+   * Session refcount for cleanup
+   */
+  u32 session_refcnt;
+
+  /**
+   * Parent cnat_client index if cloned via interpose
+   * or own index if vanilla client.
+   * Used to get translations & update session_refcnt
+   */
+  index_t parent_cci;
+
+  /**
+   * Client flags
+   */
+  u8 flags;
+} cnat_client_t;
+
+extern u8 *format_cnat_client (u8 * s, va_list * args);
+extern void cnat_client_free_by_ip (ip46_address_t * addr, u8 af);
+
+extern cnat_client_t *cnat_client_pool;
+extern dpo_type_t cnat_client_dpo;
+
+#define CC_INDEX_INVALID ((u32)(~0))
+
+static_always_inline cnat_client_t *
+cnat_client_get (index_t i)
+{
+  return (pool_elt_at_index (cnat_client_pool, i));
+}
+
+typedef struct cnat_learn_arg_t_
+{
+  ip_address_t addr;
+} cnat_learn_arg_t;
+
+/**
+ * A translation that references this VIP was deleted
+ */
+extern void cnat_client_translation_deleted (index_t cci);
+
+/**
+ * A translation that references this VIP was added
+ */
+extern void cnat_client_translation_added (index_t cci);
+/**
+ * Called in the main thread by RPC from the workers to learn a
+ * new client
+ */
+extern void cnat_client_learn (const cnat_learn_arg_t * l);
+
+extern index_t cnat_client_add (const ip_address_t * ip, u8 flags);
+
+/**
+ * Check all the clients were purged by translation & session purge
+ */
+extern int cnat_client_purge (void);
+
+/**
+ * CNat Client (dpo) flags
+ */
+typedef enum
+{
+  /* IP already present in the FIB, need to interpose dpo */
+  CNAT_FLAG_EXCLUSIVE = (1 << 1),
+  /* Prune this entry */
+  CNAT_FLAG_EXPIRES = (1 << 2),
+} cnat_entry_flag_t;
+
+
+extern void cnat_client_throttle_pool_process ();
+
+/**
+ * DB of clients
+ */
+typedef struct cnat_client_db_t_
+{
+  uword *crd_cip4;
+  uword *crd_cip6;
+  /* Pool of addresses that have been throttled
+     and need to be refcounted before calling
+     cnat_client_free_by_ip */
+  ip_address_t **throttle_pool;
+  clib_spinlock_t *throttle_pool_lock;
+} cnat_client_db_t;
+
+extern cnat_client_db_t cnat_client_db;
+
+/**
+ * Find a client from an IP4 address
+ */
+static_always_inline cnat_client_t *
+cnat_client_ip4_find (const ip4_address_t * ip)
+{
+  uword *p;
+
+  p = hash_get (cnat_client_db.crd_cip4, ip->as_u32);
+
+  if (p)
+    return (pool_elt_at_index (cnat_client_pool, p[0]));
+
+  return (NULL);
+}
+
+static_always_inline u32
+cnat_client_ip4_find_index (const ip4_address_t * ip)
+{
+  uword *p;
+
+  p = hash_get (cnat_client_db.crd_cip4, ip->as_u32);
+
+  if (p)
+    return p[0];
+
+  return -1;
+}
+
+/**
+ * Find a client from an IP6 address
+ */
+static_always_inline cnat_client_t *
+cnat_client_ip6_find (const ip6_address_t * ip)
+{
+  uword *p;
+
+  p = hash_get_mem (cnat_client_db.crd_cip6, ip);
+
+  if (p)
+    return (pool_elt_at_index (cnat_client_pool, p[0]));
+
+  return (NULL);
+}
+
+/**
+ * Add a session refcnt to this client
+ */
+static_always_inline u32
+cnat_client_cnt_session (cnat_client_t * cc)
+{
+  cnat_client_t *ccp = cnat_client_get (cc->parent_cci);
+  return clib_atomic_add_fetch (&ccp->session_refcnt, 1);
+}
+
+/**
+ * Del a session refcnt to this client
+ */
+static_always_inline u32
+cnat_client_uncnt_session (cnat_client_t * cc)
+{
+  cnat_client_t *ccp = cnat_client_get (cc->parent_cci);
+  return clib_atomic_sub_fetch (&ccp->session_refcnt, 1);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/plugins/cnat/cnat_error.def b/src/plugins/cnat/cnat_error.def
new file mode 100644 (file)
index 0000000..f7809d8
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+cnat_error (NONE, "no error")
+cnat_error (EXHAUSTED_PORTS, "no more free ports")
+
+
diff --git a/src/plugins/cnat/cnat_node.h b/src/plugins/cnat/cnat_node.h
new file mode 100644 (file)
index 0000000..58e81c1
--- /dev/null
@@ -0,0 +1,535 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CNAT_NODE_H__
+#define __CNAT_NODE_H__
+
+#include <vlibmemory/api.h>
+#include <cnat/cnat_session.h>
+#include <cnat/cnat_client.h>
+
+typedef uword (*cnat_node_sub_t) (vlib_main_t * vm,
+                                 vlib_node_runtime_t * node,
+                                 vlib_buffer_t * b,
+                                 cnat_node_ctx_t * ctx, int rv,
+                                 cnat_session_t * session);
+
+/**
+ * Inline translation functions
+ */
+
+static_always_inline u8
+has_ip6_address (ip6_address_t * a)
+{
+  return ((0 != a->as_u64[0]) || (0 != a->as_u64[1]));
+}
+
+static_always_inline void
+cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp,
+                      u16 * checksum,
+                      ip4_address_t new_addr[VLIB_N_DIR],
+                      u16 new_port[VLIB_N_DIR])
+{
+  u16 old_port[VLIB_N_DIR];
+  ip4_address_t old_addr[VLIB_N_DIR];
+  ip_csum_t sum;
+
+  old_port[VLIB_TX] = udp->dst_port;
+  old_port[VLIB_RX] = udp->src_port;
+  old_addr[VLIB_TX] = ip4->dst_address;
+  old_addr[VLIB_RX] = ip4->src_address;
+
+  sum = *checksum;
+  if (new_addr[VLIB_TX].as_u32)
+    sum =
+      ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32,
+                     ip4_header_t, dst_address);
+  if (new_port[VLIB_TX])
+    {
+      udp->dst_port = new_port[VLIB_TX];
+      sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
+                           ip4_header_t /* cheat */ ,
+                           length /* changed member */ );
+    }
+  if (new_addr[VLIB_RX].as_u32)
+    sum =
+      ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32,
+                     ip4_header_t, src_address);
+
+  if (new_port[VLIB_RX])
+    {
+      udp->src_port = new_port[VLIB_RX];
+      sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
+                           ip4_header_t /* cheat */ ,
+                           length /* changed member */ );
+    }
+  *checksum = ip_csum_fold (sum);
+}
+
+static_always_inline void
+cnat_ip4_translate_l3 (ip4_header_t * ip4, ip4_address_t new_addr[VLIB_N_DIR])
+{
+  ip4_address_t old_addr[VLIB_N_DIR];
+  ip_csum_t sum;
+
+  old_addr[VLIB_TX] = ip4->dst_address;
+  old_addr[VLIB_RX] = ip4->src_address;
+
+  sum = ip4->checksum;
+  if (new_addr[VLIB_TX].as_u32)
+    {
+      ip4->dst_address = new_addr[VLIB_TX];
+      sum =
+       ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
+                       new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
+    }
+  if (new_addr[VLIB_RX].as_u32)
+    {
+      ip4->src_address = new_addr[VLIB_RX];
+      sum =
+       ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
+                       new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
+    }
+  ip4->checksum = ip_csum_fold (sum);
+}
+
+static_always_inline void
+cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index)
+{
+  cnat_main_t *cm = &cnat_main;
+  if (PREDICT_FALSE (tcp_fin (tcp)))
+    {
+      cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
+    }
+
+  if (PREDICT_FALSE (tcp_rst (tcp)))
+    {
+      cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
+    }
+
+  if (PREDICT_FALSE (tcp_syn (tcp) && tcp_ack (tcp)))
+    {
+      cnat_timestamp_set_lifetime (index, cm->tcp_max_age);
+    }
+}
+
+static_always_inline void
+cnat_translation_ip4 (const cnat_session_t * session,
+                     ip4_header_t * ip4, udp_header_t * udp)
+{
+  tcp_header_t *tcp = (tcp_header_t *) udp;
+  ip4_address_t new_addr[VLIB_N_DIR];
+  u16 new_port[VLIB_N_DIR];
+
+  new_addr[VLIB_TX] = session->value.cs_ip[VLIB_TX].ip4;
+  new_addr[VLIB_RX] = session->value.cs_ip[VLIB_RX].ip4;
+  new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
+  new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
+
+  if (ip4->protocol == IP_PROTOCOL_TCP)
+    {
+      if (PREDICT_FALSE (tcp->checksum))
+       cnat_ip4_translate_l4 (ip4, udp, &tcp->checksum, new_addr, new_port);
+      else
+       {
+         udp->dst_port = new_port[VLIB_TX];
+         udp->src_port = new_port[VLIB_RX];
+       }
+      cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
+    }
+  else if (ip4->protocol == IP_PROTOCOL_UDP)
+    {
+      if (PREDICT_FALSE (udp->checksum))
+       cnat_ip4_translate_l4 (ip4, udp, &udp->checksum, new_addr, new_port);
+      else
+       {
+         udp->dst_port = new_port[VLIB_TX];
+         udp->src_port = new_port[VLIB_RX];
+       }
+    }
+
+  cnat_ip4_translate_l3 (ip4, new_addr);
+}
+
+static_always_inline void
+cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR])
+{
+  if (has_ip6_address (&new_addr[VLIB_TX]))
+    ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]);
+  if (has_ip6_address (&new_addr[VLIB_RX]))
+    ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]);
+}
+
+static_always_inline void
+cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp,
+                      u16 * checksum,
+                      ip6_address_t new_addr[VLIB_N_DIR],
+                      u16 new_port[VLIB_N_DIR])
+{
+  u16 old_port[VLIB_N_DIR];
+  ip6_address_t old_addr[VLIB_N_DIR];
+  ip_csum_t sum;
+
+  old_port[VLIB_TX] = udp->dst_port;
+  old_port[VLIB_RX] = udp->src_port;
+  ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
+  ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
+
+  sum = *checksum;
+  if (has_ip6_address (&new_addr[VLIB_TX]))
+    {
+      sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
+      sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
+      sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
+      sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
+    }
+
+  if (new_port[VLIB_TX])
+    {
+      udp->dst_port = new_port[VLIB_TX];
+      sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
+                           ip4_header_t /* cheat */ ,
+                           length /* changed member */ );
+    }
+  if (has_ip6_address (&new_addr[VLIB_RX]))
+    {
+      sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
+      sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
+      sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
+      sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
+    }
+
+  if (new_port[VLIB_RX])
+    {
+      udp->src_port = new_port[VLIB_RX];
+      sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
+                           ip4_header_t /* cheat */ ,
+                           length /* changed member */ );
+    }
+  *checksum = ip_csum_fold (sum);
+}
+
+static_always_inline void
+cnat_translation_ip6 (const cnat_session_t * session,
+                     ip6_header_t * ip6, udp_header_t * udp)
+{
+  tcp_header_t *tcp = (tcp_header_t *) udp;
+  ip6_address_t new_addr[VLIB_N_DIR];
+  u16 new_port[VLIB_N_DIR];
+
+  ip6_address_copy (&new_addr[VLIB_TX], &session->value.cs_ip[VLIB_TX].ip6);
+  ip6_address_copy (&new_addr[VLIB_RX], &session->value.cs_ip[VLIB_RX].ip6);
+  new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
+  new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
+
+  if (ip6->protocol == IP_PROTOCOL_TCP)
+    {
+      if (PREDICT_FALSE (tcp->checksum))
+       cnat_ip6_translate_l4 (ip6, udp, &tcp->checksum, new_addr, new_port);
+      else
+       {
+         udp->dst_port = new_port[VLIB_TX];
+         udp->src_port = new_port[VLIB_RX];
+       }
+      cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
+    }
+  else if (ip6->protocol == IP_PROTOCOL_UDP)
+    {
+      if (PREDICT_FALSE (udp->checksum))
+       cnat_ip6_translate_l4 (ip6, udp, &udp->checksum, new_addr, new_port);
+      else
+       {
+         udp->dst_port = new_port[VLIB_TX];
+         udp->src_port = new_port[VLIB_RX];
+       }
+    }
+
+  cnat_ip6_translate_l3 (ip6, new_addr);
+}
+
+static_always_inline void
+cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
+                      clib_bihash_kv_40_48_t * bkey)
+{
+  udp_header_t *udp;
+  cnat_session_t *session = (cnat_session_t *) bkey;
+  if (AF_IP4 == af)
+    {
+      ip4_header_t *ip4;
+      ip4 = vlib_buffer_get_current (b);
+      udp = (udp_header_t *) (ip4 + 1);
+      session->key.cs_af = AF_IP4;
+      session->key.__cs_pad[0] = 0;
+      session->key.__cs_pad[1] = 0;
+
+      ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX], &ip4->dst_address);
+      ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX], &ip4->src_address);
+      session->key.cs_port[VLIB_RX] = udp->src_port;
+      session->key.cs_port[VLIB_TX] = udp->dst_port;
+      session->key.cs_proto = ip4->protocol;
+    }
+  else
+    {
+      ip6_header_t *ip6;
+      ip6 = vlib_buffer_get_current (b);
+      udp = (udp_header_t *) (ip6 + 1);
+      session->key.cs_af = AF_IP6;
+      session->key.__cs_pad[0] = 0;
+      session->key.__cs_pad[1] = 0;
+
+      ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX], &ip6->dst_address);
+      ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX], &ip6->src_address);
+      session->key.cs_port[VLIB_RX] = udp->src_port;
+      session->key.cs_port[VLIB_TX] = udp->dst_port;
+      session->key.cs_proto = ip6->protocol;
+    }
+}
+
+/**
+ * Create NAT sessions
+ */
+
+static_always_inline void
+cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx,
+                    u8 rsession_flags)
+{
+  cnat_client_t *cc;
+  clib_bihash_kv_40_48_t rkey;
+  cnat_session_t *rsession = (cnat_session_t *) & rkey;
+  clib_bihash_kv_40_48_t *bkey = (clib_bihash_kv_40_48_t *) session;
+  clib_bihash_kv_40_48_t rvalue;
+  int rv;
+
+  /* create the reverse flow key */
+  ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
+                    &session->value.cs_ip[VLIB_TX]);
+  ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
+                    &session->value.cs_ip[VLIB_RX]);
+  rsession->key.cs_proto = session->key.cs_proto;
+  rsession->key.__cs_pad[0] = 0;
+  rsession->key.__cs_pad[1] = 0;
+  rsession->key.cs_af = ctx->af;
+  rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
+  rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
+
+  /* First search for existing reverse session */
+  rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue);
+  if (!rv)
+    {
+      /* Reverse session already exists
+         corresponding client should also exist
+         we only need to refcnt the timestamp */
+      cnat_session_t *found_rsession = (cnat_session_t *) & rvalue;
+      session->value.cs_ts_index = found_rsession->value.cs_ts_index;
+      cnat_timestamp_inc_refcnt (session->value.cs_ts_index);
+      clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1 /* is_add */ );
+      goto create_rsession;
+    }
+
+  session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
+  clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1);
+
+  /* is this the first time we've seen this source address */
+  cc = (AF_IP4 == ctx->af ?
+       cnat_client_ip4_find (&session->value.cs_ip[VLIB_RX].ip4) :
+       cnat_client_ip6_find (&session->value.cs_ip[VLIB_RX].ip6));
+
+  if (NULL == cc)
+    {
+      u64 r0 = 17;
+      if (AF_IP4 == ctx->af)
+       r0 = (u64) session->value.cs_ip[VLIB_RX].ip4.as_u32;
+      else
+       {
+         r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[0];
+         r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[1];
+       }
+
+      /* Rate limit */
+      if (!throttle_check (&cnat_throttle, ctx->thread_index, r0, ctx->seed))
+       {
+         cnat_learn_arg_t l;
+         l.addr.version = ctx->af;
+         ip46_address_copy (&l.addr.ip, &session->value.cs_ip[VLIB_RX]);
+         /* fire client create to the main thread */
+         vl_api_rpc_call_main_thread (cnat_client_learn,
+                                      (u8 *) & l, sizeof (l));
+       }
+      else
+       {
+         /* Will still need to count those for session refcnt */
+         ip_address_t *addr;
+         clib_spinlock_lock (&cnat_client_db.throttle_pool_lock
+                             [ctx->thread_index]);
+         pool_get (cnat_client_db.throttle_pool[ctx->thread_index], addr);
+         addr->version = ctx->af;
+         ip46_address_copy (&addr->ip, &session->value.cs_ip[VLIB_RX]);
+         clib_spinlock_unlock (&cnat_client_db.throttle_pool_lock
+                               [ctx->thread_index]);
+       }
+    }
+  else
+    {
+      cnat_client_cnt_session (cc);
+    }
+
+create_rsession:
+  /* add the reverse flow */
+  ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
+                    &session->key.cs_ip[VLIB_TX]);
+  ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
+                    &session->key.cs_ip[VLIB_RX]);
+  rsession->value.cs_ts_index = session->value.cs_ts_index;
+  rsession->value.cs_lbi = INDEX_INVALID;
+  rsession->value.flags = rsession_flags;
+  rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
+  rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
+
+  clib_bihash_add_del_40_48 (&cnat_session_db, &rkey, 1);
+}
+
+always_inline uword
+cnat_node_inline (vlib_main_t * vm,
+                 vlib_node_runtime_t * node,
+                 vlib_frame_t * frame,
+                 cnat_node_sub_t cnat_sub,
+                 ip_address_family_t af, u8 do_trace)
+{
+  u32 n_left, *from, thread_index;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
+  vlib_buffer_t **b = bufs;
+  u16 nexts[VLIB_FRAME_SIZE], *next;
+  f64 now;
+  u64 seed;
+
+  thread_index = vm->thread_index;
+  from = vlib_frame_vector_args (frame);
+  n_left = frame->n_vectors;
+  next = nexts;
+  vlib_get_buffers (vm, from, bufs, n_left);
+  now = vlib_time_now (vm);
+  seed = throttle_seed (&cnat_throttle, thread_index, vlib_time_now (vm));
+  cnat_session_t *session[4];
+  clib_bihash_kv_40_48_t bkey[4], bvalue[4];
+  u64 hash[4];
+  int rv[4];
+
+  cnat_node_ctx_t ctx = { now, seed, thread_index, af, do_trace };
+
+  if (n_left >= 8)
+    {
+      /* Kickstart our state */
+      cnat_session_make_key (b[3], af, &bkey[3]);
+      cnat_session_make_key (b[2], af, &bkey[2]);
+      cnat_session_make_key (b[1], af, &bkey[1]);
+      cnat_session_make_key (b[0], af, &bkey[0]);
+
+      hash[3] = clib_bihash_hash_40_48 (&bkey[3]);
+      hash[2] = clib_bihash_hash_40_48 (&bkey[2]);
+      hash[1] = clib_bihash_hash_40_48 (&bkey[1]);
+      hash[0] = clib_bihash_hash_40_48 (&bkey[0]);
+    }
+
+  while (n_left >= 8)
+    {
+      if (n_left >= 12)
+       {
+         vlib_prefetch_buffer_header (b[11], LOAD);
+         vlib_prefetch_buffer_header (b[10], LOAD);
+         vlib_prefetch_buffer_header (b[9], LOAD);
+         vlib_prefetch_buffer_header (b[8], LOAD);
+       }
+
+      rv[3] =
+       clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
+                                                    hash[3], &bkey[3],
+                                                    &bvalue[3]);
+      session[3] = (cnat_session_t *) (rv[3] ? &bkey[3] : &bvalue[3]);
+      next[3] = cnat_sub (vm, node, b[3], &ctx, rv[3], session[3]);
+
+      rv[2] =
+       clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
+                                                    hash[2], &bkey[2],
+                                                    &bvalue[2]);
+      session[2] = (cnat_session_t *) (rv[2] ? &bkey[2] : &bvalue[2]);
+      next[2] = cnat_sub (vm, node, b[2], &ctx, rv[2], session[2]);
+
+      rv[1] =
+       clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
+                                                    hash[1], &bkey[1],
+                                                    &bvalue[1]);
+      session[1] = (cnat_session_t *) (rv[1] ? &bkey[1] : &bvalue[1]);
+      next[1] = cnat_sub (vm, node, b[1], &ctx, rv[1], session[1]);
+
+      rv[0] =
+       clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
+                                                    hash[0], &bkey[0],
+                                                    &bvalue[0]);
+      session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
+      next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
+
+      cnat_session_make_key (b[7], af, &bkey[3]);
+      cnat_session_make_key (b[6], af, &bkey[2]);
+      cnat_session_make_key (b[5], af, &bkey[1]);
+      cnat_session_make_key (b[4], af, &bkey[0]);
+
+      hash[3] = clib_bihash_hash_40_48 (&bkey[3]);
+      hash[2] = clib_bihash_hash_40_48 (&bkey[2]);
+      hash[1] = clib_bihash_hash_40_48 (&bkey[1]);
+      hash[0] = clib_bihash_hash_40_48 (&bkey[0]);
+
+      clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[3]);
+      clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[2]);
+      clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[1]);
+      clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[0]);
+
+      clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[3]);
+      clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[2]);
+      clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[1]);
+      clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[0]);
+
+      b += 4;
+      next += 4;
+      n_left -= 4;
+    }
+
+  while (n_left > 0)
+    {
+      cnat_session_make_key (b[0], af, &bkey[0]);
+      rv[0] = clib_bihash_search_inline_2_40_48 (&cnat_session_db,
+                                                &bkey[0], &bvalue[0]);
+
+      session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
+      next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
+
+      b++;
+      next++;
+      n_left--;
+    }
+
+  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+  return frame->n_vectors;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/plugins/cnat/cnat_node_snat.c b/src/plugins/cnat/cnat_node_snat.c
new file mode 100644 (file)
index 0000000..cc1421b
--- /dev/null
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlibmemory/api.h>
+#include <cnat/cnat_node.h>
+#include <cnat/cnat_snat.h>
+
+typedef enum cnat_snat_next_
+{
+  CNAT_SNAT_NEXT_DROP,
+  CNAT_SNAT_N_NEXT,
+} cnat_snat_next_t;
+
+typedef struct cnat_snat_trace_
+{
+  u32 found;
+  cnat_session_t session;
+} cnat_snat_trace_t;
+
+vlib_node_registration_t cnat_snat_ip4_node;
+vlib_node_registration_t cnat_snat_ip6_node;
+
+static u8 *
+format_cnat_snat_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  cnat_snat_trace_t *t = va_arg (*args, cnat_snat_trace_t *);
+
+  if (t->found)
+    s = format (s, "found: %U", format_cnat_session, &t->session, 1);
+  else
+    s = format (s, "not found");
+  return s;
+}
+
+/* CNat sub for source NAT as a feature arc on ip[46]-unicast
+   This node's sub shouldn't apply to the same flows as
+   cnat_vip_inline */
+always_inline uword
+cnat_snat_inline (vlib_main_t * vm,
+                 vlib_node_runtime_t * node,
+                 vlib_buffer_t * b,
+                 cnat_node_ctx_t * ctx, int rv, cnat_session_t * session)
+{
+  cnat_main_t *cm = &cnat_main;
+  ip4_header_t *ip4;
+  ip_protocol_t iproto;
+  ip6_header_t *ip6;
+  udp_header_t *udp0;
+  u32 arc_next0;
+  u16 next0;
+  u16 sport;
+
+  if (AF_IP4 == ctx->af)
+    {
+      ip4 = vlib_buffer_get_current (b);
+      iproto = ip4->protocol;
+      udp0 = (udp_header_t *) (ip4 + 1);
+    }
+  else
+    {
+      ip6 = vlib_buffer_get_current (b);
+      iproto = ip6->protocol;
+      udp0 = (udp_header_t *) (ip6 + 1);
+    }
+
+  /* By default don't follow previous next0 */
+  vnet_feature_next (&arc_next0, b);
+  next0 = arc_next0;
+
+  if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP)
+    {
+      /* Dont translate */
+      goto trace;
+    }
+
+  if (!rv)
+    {
+      /* session table hit */
+      cnat_timestamp_update (session->value.cs_ts_index, ctx->now);
+    }
+  else
+    {
+      ip46_address_t ip46_dst_address;
+      if (AF_IP4 == ctx->af)
+       ip46_address_set_ip4 (&ip46_dst_address, &ip4->dst_address);
+      else
+       ip46_address_set_ip6 (&ip46_dst_address, &ip6->dst_address);
+      rv = cnat_search_snat_prefix (&ip46_dst_address, ctx->af);
+      if (!rv)
+       {
+         /* Prefix table hit, we shouldn't source NAT */
+         goto trace;
+       }
+      /* New flow, create the sessions if necessary. session will be a snat
+         session, and rsession will be a dnat session
+         Note: packet going through this path are going to the outside,
+         so they will never hit the NAT again (they are not going towards
+         a VIP) */
+      if (AF_IP4 == ctx->af)
+       {
+         ip46_address_set_ip4 (&session->value.cs_ip[VLIB_RX],
+                               &cm->snat_ip4);
+         ip46_address_set_ip4 (&session->value.cs_ip[VLIB_TX],
+                               &ip4->dst_address);
+       }
+      else
+       {
+         ip46_address_set_ip6 (&session->value.cs_ip[VLIB_RX],
+                               &cm->snat_ip6);
+         ip46_address_set_ip6 (&session->value.cs_ip[VLIB_TX],
+                               &ip6->dst_address);
+       }
+
+      /* Port allocation, first try to use the original port, allocate one
+         if it is already used */
+      sport = udp0->src_port;
+      rv = cnat_allocate_port (cm, &sport);
+      if (rv)
+       {
+         vlib_node_increment_counter (vm, cnat_snat_ip4_node.index,
+                                      CNAT_ERROR_EXHAUSTED_PORTS, 1);
+         next0 = CNAT_SNAT_NEXT_DROP;
+         goto trace;
+       }
+
+      session->value.cs_port[VLIB_RX] = sport;
+      session->value.cs_port[VLIB_TX] = udp0->dst_port;
+      session->value.cs_lbi = INDEX_INVALID;
+      session->value.flags =
+       CNAT_SESSION_FLAG_NO_CLIENT | CNAT_SESSION_FLAG_ALLOC_PORT;
+
+      cnat_session_create (session, ctx, CNAT_SESSION_FLAG_HAS_SNAT);
+    }
+
+
+  if (AF_IP4 == ctx->af)
+    cnat_translation_ip4 (session, ip4, udp0);
+  else
+    cnat_translation_ip6 (session, ip6, udp0);
+
+trace:
+  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+    {
+      cnat_snat_trace_t *t;
+
+      t = vlib_add_trace (vm, node, b, sizeof (*t));
+
+      if (NULL != session)
+       clib_memcpy (&t->session, session, sizeof (t->session));
+    }
+  return next0;
+}
+
+VLIB_NODE_FN (cnat_snat_ip4_node) (vlib_main_t * vm,
+                                  vlib_node_runtime_t * node,
+                                  vlib_frame_t * frame)
+{
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    return cnat_node_inline (vm, node, frame, cnat_snat_inline, AF_IP4,
+                            1 /* do_trace */ );
+  return cnat_node_inline (vm, node, frame, cnat_snat_inline, AF_IP4,
+                          0 /* do_trace */ );
+}
+
+VLIB_NODE_FN (cnat_snat_ip6_node) (vlib_main_t * vm,
+                                  vlib_node_runtime_t * node,
+                                  vlib_frame_t * frame)
+{
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    return cnat_node_inline (vm, node, frame, cnat_snat_inline, AF_IP6,
+                            1 /* do_trace */ );
+  return cnat_node_inline (vm, node, frame, cnat_snat_inline, AF_IP6,
+                          0 /* do_trace */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (cnat_snat_ip4_node) =
+{
+  .name = "ip4-cnat-snat",
+  .vector_size = sizeof (u32),
+  .format_trace = format_cnat_snat_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = CNAT_N_ERROR,
+  .error_strings = cnat_error_strings,
+  .n_next_nodes = CNAT_SNAT_N_NEXT,
+  .next_nodes =
+  {
+    [CNAT_SNAT_NEXT_DROP] = "ip4-drop",
+  }
+};
+
+VLIB_REGISTER_NODE (cnat_snat_ip6_node) =
+{
+  .name = "ip6-cnat-snat",
+  .vector_size = sizeof (u32),
+  .format_trace = format_cnat_snat_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = CNAT_N_ERROR,
+  .error_strings = cnat_error_strings,
+  .n_next_nodes = CNAT_SNAT_N_NEXT,
+  .next_nodes =
+  {
+    [CNAT_SNAT_NEXT_DROP] = "ip6-drop",
+  }
+};
+/* *INDENT-ON* */
+
+
+VNET_FEATURE_INIT (cnat_snat_ip4_node, static) =
+{
+.arc_name = "ip4-unicast",.node_name = "ip4-cnat-snat",};
+
+VNET_FEATURE_INIT (cnat_snat_ip6_node, static) =
+{
+.arc_name = "ip6-unicast",.node_name = "ip6-cnat-snat",};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat_node_vip.c b/src/plugins/cnat/cnat_node_vip.c
new file mode 100644 (file)
index 0000000..afabed0
--- /dev/null
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlibmemory/api.h>
+#include <cnat/cnat_node.h>
+#include <cnat/cnat_translation.h>
+
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+
+typedef struct cnat_translation_trace_t_
+{
+  cnat_session_t session;
+  cnat_translation_t tr;
+  u32 found_session;
+  u32 created_session;
+  u32 has_tr;
+} cnat_translation_trace_t;
+
+typedef enum cnat_translation_next_t_
+{
+  CNAT_TRANSLATION_NEXT_DROP,
+  CNAT_TRANSLATION_NEXT_LOOKUP,
+  CNAT_TRANSLATION_N_NEXT,
+} cnat_translation_next_t;
+
+vlib_node_registration_t cnat_vip_ip4_node;
+vlib_node_registration_t cnat_vip_ip6_node;
+
+static u8 *
+format_cnat_translation_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  cnat_translation_trace_t *t =
+    va_arg (*args, cnat_translation_trace_t *);
+
+  if (t->found_session)
+    s = format (s, "found: %U", format_cnat_session, &t->session, 1);
+  else if (t->created_session)
+    s = format (s, "created: %U\n  tr: %U",
+               format_cnat_session, &t->session, 1,
+               format_cnat_translation,
+               &t->tr, 0);
+  else if (t->has_tr)
+    s = format (s, "tr pass: %U", format_cnat_translation,
+       &t->tr, 0);
+  else
+    s = format (s, "not found");
+  return s;
+}
+
+/* CNat sub for NAT behind a fib entry (VIP or interposed real IP) */
+always_inline uword
+cnat_vip_inline (vlib_main_t * vm,
+                  vlib_node_runtime_t * node,
+                  vlib_buffer_t * b,
+                  cnat_node_ctx_t * ctx, int rv, cnat_session_t * session)
+{
+  vlib_combined_counter_main_t *cntm = &cnat_translation_counters;
+  cnat_main_t *cm = &cnat_main;
+  const cnat_translation_t *ct = NULL;
+  ip4_header_t *ip4;
+  ip_protocol_t iproto;
+  ip6_header_t *ip6;
+  udp_header_t *udp0;
+  cnat_client_t *cc;
+  u16 next0;
+  index_t cti;
+  int created_session = 0;
+  if (AF_IP4 == ctx->af)
+    {
+      ip4 = vlib_buffer_get_current (b);
+      iproto = ip4->protocol;
+      udp0 = (udp_header_t *) (ip4 + 1);
+    }
+  else
+    {
+      ip6 = vlib_buffer_get_current (b);
+      iproto = ip6->protocol;
+      udp0 = (udp_header_t *) (ip6 + 1);
+    }
+
+  cc = cnat_client_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
+
+  if (iproto != IP_PROTOCOL_UDP && iproto != IP_PROTOCOL_TCP)
+    {
+      /* Dont translate & follow the fib programming */
+      next0 = cc->cc_parent.dpoi_next_node;
+      vnet_buffer (b)->ip.adj_index[VLIB_TX] = cc->cc_parent.dpoi_index;
+      goto trace;
+    }
+
+  ct = cnat_find_translation (cc->parent_cci,
+                               clib_host_to_net_u16 (udp0->dst_port),
+                               iproto);
+
+  if (!rv)
+    {
+      /* session table hit */
+      cnat_timestamp_update (session->value.cs_ts_index, ctx->now);
+
+      if (NULL != ct)
+       {
+         /* Translate & follow the translation given LB */
+         next0 = ct->ct_lb.dpoi_next_node;
+         vnet_buffer (b)->ip.adj_index[VLIB_TX] = session->value.cs_lbi;
+       }
+      else if (session->value.flags & CNAT_SESSION_FLAG_HAS_SNAT)
+       {
+         /* The return needs DNAT, so we need an additionnal
+          * lookup after translation */
+         next0 = CNAT_TRANSLATION_NEXT_LOOKUP;
+       }
+      else
+       {
+         /* Translate & follow the fib programming */
+         next0 = cc->cc_parent.dpoi_next_node;
+         vnet_buffer (b)->ip.adj_index[VLIB_TX] = cc->cc_parent.dpoi_index;
+       }
+    }
+  else
+    {
+      if (NULL == ct)
+       {
+         /* Dont translate & Follow the fib programming */
+         vnet_buffer (b)->ip.adj_index[VLIB_TX] = cc->cc_parent.dpoi_index;
+         next0 = cc->cc_parent.dpoi_next_node;
+         goto trace;
+       }
+
+      /* New flow, create the sessions */
+      const load_balance_t *lb0;
+      cnat_ep_trk_t *trk0;
+      u32 hash_c0, bucket0;
+      u32 rsession_flags = 0;
+      const dpo_id_t *dpo0;
+
+      lb0 = load_balance_get (ct->ct_lb.dpoi_index);
+      if (!lb0->lb_n_buckets)
+       {
+         /* Dont translate & Follow the fib programming */
+         vnet_buffer (b)->ip.adj_index[VLIB_TX] = cc->cc_parent.dpoi_index;
+         next0 = cc->cc_parent.dpoi_next_node;
+         goto trace;
+       }
+
+      /* session table miss */
+      hash_c0 = (AF_IP4 == ctx->af ?
+                ip4_compute_flow_hash (ip4, lb0->lb_hash_config) :
+                ip6_compute_flow_hash (ip6, lb0->lb_hash_config));
+      bucket0 = hash_c0 & lb0->lb_n_buckets_minus_1;
+      dpo0 = load_balance_get_fwd_bucket (lb0, bucket0);
+
+      /* add the session */
+      trk0 = &ct->ct_paths[bucket0];
+
+      ip46_address_copy (&session->value.cs_ip[VLIB_TX],
+                        &trk0->ct_ep[VLIB_TX].ce_ip.ip);
+      if (ip_address_is_zero (&trk0->ct_ep[VLIB_RX].ce_ip))
+       {
+         if (AF_IP4 == ctx->af)
+           ip46_address_set_ip4 (&session->value.cs_ip[VLIB_RX],
+                                 &ip4->src_address);
+         else
+           ip46_address_set_ip6 (&session->value.cs_ip[VLIB_RX],
+                                 &ip6->src_address);
+       }
+      else
+       {
+         /* We source NAT with the translation */
+         rsession_flags |= CNAT_SESSION_FLAG_HAS_SNAT;
+         ip46_address_copy (&session->value.cs_ip[VLIB_RX],
+                            &trk0->ct_ep[VLIB_RX].ce_ip.ip);
+       }
+      session->value.cs_port[VLIB_TX] =
+       clib_host_to_net_u16 (trk0->ct_ep[VLIB_TX].ce_port);
+      session->value.cs_port[VLIB_RX] =
+       clib_host_to_net_u16 (trk0->ct_ep[VLIB_RX].ce_port);
+
+      session->value.flags = 0;
+      if (!session->value.cs_port[VLIB_RX])
+       {
+         u16 sport;
+         sport = udp0->src_port;
+         /* Allocate a port only if asked and if we actually sNATed */
+         if ((ct->flags & CNAT_TRANSLATION_FLAG_ALLOCATE_PORT)
+              && (rsession_flags & CNAT_SESSION_FLAG_HAS_SNAT)) {
+           sport = 0; /* force allocation */
+           session->value.flags |= CNAT_SESSION_FLAG_ALLOC_PORT;
+           rv = cnat_allocate_port (cm, &sport);
+           if (rv)
+             {
+               vlib_node_increment_counter (vm, cnat_vip_ip4_node.index,
+                                           CNAT_ERROR_EXHAUSTED_PORTS, 1);
+               next0 = CNAT_TRANSLATION_NEXT_DROP;
+               goto trace;
+             }
+           }
+
+         session->value.cs_port[VLIB_RX] = sport;
+       }
+      session->value.cs_lbi = dpo0->dpoi_index;
+
+      cnat_client_cnt_session (cc);
+      cnat_session_create (session, ctx, rsession_flags);
+      created_session = 1;
+
+      next0 = ct->ct_lb.dpoi_next_node;
+      vnet_buffer (b)->ip.adj_index[VLIB_TX] = session->value.cs_lbi;
+    }
+
+
+  if (AF_IP4 == ctx->af)
+    cnat_translation_ip4 (session, ip4, udp0);
+  else
+    cnat_translation_ip6 (session, ip6, udp0);
+
+  if (NULL != ct)
+    {
+      cti = ct - cnat_translation_pool;
+      vlib_increment_combined_counter (cntm, ctx->thread_index, cti, 1,
+                                 vlib_buffer_length_in_chain (vm, b));
+    }
+
+trace:
+  if (PREDICT_FALSE (ctx->do_trace))
+    {
+      cnat_translation_trace_t *t;
+
+      t = vlib_add_trace (vm, node, b, sizeof (*t));
+
+      t->found_session = !rv;
+      t->created_session = created_session;
+      if (t->found_session || t->created_session)
+       clib_memcpy (&t->session, session, sizeof (t->session));
+      t->has_tr = (NULL != ct);
+      if (t->has_tr)
+       clib_memcpy (&t->tr, ct, sizeof (cnat_translation_t));
+    }
+  return next0;
+}
+
+VLIB_NODE_FN (cnat_vip_ip4_node) (vlib_main_t * vm,
+                                   vlib_node_runtime_t * node,
+                                   vlib_frame_t * frame)
+{
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    return cnat_node_inline (vm, node, frame, cnat_vip_inline, AF_IP4,
+                              1 /* do_trace */ );
+  return cnat_node_inline (vm, node, frame, cnat_vip_inline, AF_IP4,
+                            0 /* do_trace */ );
+}
+
+VLIB_NODE_FN (cnat_vip_ip6_node) (vlib_main_t * vm,
+                                   vlib_node_runtime_t * node,
+                                   vlib_frame_t * frame)
+{
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    return cnat_node_inline (vm, node, frame, cnat_vip_inline, AF_IP6,
+                              1 /* do_trace */ );
+  return cnat_node_inline (vm, node, frame, cnat_vip_inline, AF_IP6,
+                            0 /* do_trace */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (cnat_vip_ip4_node) =
+{
+  .name = "ip4-cnat-tx",
+  .vector_size = sizeof (u32),
+  .format_trace = format_cnat_translation_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = 0,
+  .n_next_nodes = CNAT_TRANSLATION_N_NEXT,
+  .next_nodes =
+  {
+    [CNAT_TRANSLATION_NEXT_DROP] = "ip4-drop",
+    [CNAT_TRANSLATION_NEXT_LOOKUP] = "ip4-lookup",
+  }
+};
+VLIB_REGISTER_NODE (cnat_vip_ip6_node) =
+{
+  .name = "ip6-cnat-tx",
+  .vector_size = sizeof (u32),
+  .format_trace = format_cnat_translation_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = 0,
+  .n_next_nodes = CNAT_TRANSLATION_N_NEXT,
+  .next_nodes =
+  {
+    [CNAT_TRANSLATION_NEXT_DROP] = "ip6-drop",
+    [CNAT_TRANSLATION_NEXT_LOOKUP] = "ip6-lookup",
+  }
+};
+/* *INDENT-ON* */
+
diff --git a/src/plugins/cnat/cnat_scanner.c b/src/plugins/cnat/cnat_scanner.c
new file mode 100644 (file)
index 0000000..f5af327
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cnat/cnat_session.h>
+#include <cnat/cnat_client.h>
+
+typedef enum cnat_scanner_cmd_t_
+{
+  CNAT_SCANNER_OFF,
+  CNAT_SCANNER_ON,
+} cnat_scanner_cmd_t;
+
+static uword
+cnat_scanner_process (vlib_main_t * vm,
+                     vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  uword event_type, *event_data = 0;
+  cnat_main_t *cm = &cnat_main;
+  f64 start_time;
+  int enabled = 1, i = 0;
+
+  while (1)
+    {
+      if (enabled)
+       vlib_process_wait_for_event_or_clock (vm, cm->scanner_timeout);
+      else
+       vlib_process_wait_for_event (vm);
+
+      event_type = vlib_process_get_events (vm, &event_data);
+      vec_reset_length (event_data);
+
+      start_time = vlib_time_now (vm);
+
+      switch (event_type)
+       {
+         /* timer expired */
+       case ~0:
+         break;
+       case CNAT_SCANNER_OFF:
+         enabled = 0;
+         break;
+       case CNAT_SCANNER_ON:
+         enabled = 1;
+         break;
+       default:
+         ASSERT (0);
+       }
+
+      cnat_client_throttle_pool_process ();
+      i = cnat_session_scan (vm, start_time, i);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (cnat_scanner_process_node) = {
+  .function = cnat_scanner_process,
+  .type = VLIB_NODE_TYPE_PROCESS,
+  .name = "cnat-scanner-process",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+cnat_scanner_cmd (vlib_main_t * vm,
+                 unformat_input_t * input, vlib_cli_command_t * c)
+{
+  cnat_scanner_cmd_t cmd;
+
+  cmd = CNAT_SCANNER_ON;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "on"))
+       cmd = CNAT_SCANNER_ON;
+      else if (unformat (input, "off"))
+       cmd = CNAT_SCANNER_OFF;
+      else
+       return (clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input));
+    }
+
+  vlib_process_signal_event (vm, cnat_scanner_process_node.index, cmd, 0);
+
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cnat_scanner_cmd_node, static) = {
+  .path = "test cnat scanner",
+  .function = cnat_scanner_cmd,
+  .short_help = "test cnat scanner",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat_session.c b/src/plugins/cnat/cnat_session.c
new file mode 100644 (file)
index 0000000..7f95e1b
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <cnat/cnat_session.h>
+
+#include <vppinfra/bihash_template.h>
+#include <vppinfra/bihash_template.c>
+
+
+clib_bihash_40_48_t cnat_session_db;
+
+
+typedef struct cnat_session_walk_ctx_t_
+{
+  cnat_session_walk_cb_t cb;
+  void *ctx;
+} cnat_session_walk_ctx_t;
+
+static int
+cnat_session_walk_cb (BVT (clib_bihash_kv) * kv, void *arg)
+{
+  cnat_session_t *session = (cnat_session_t *) kv;
+  cnat_session_walk_ctx_t *ctx = arg;
+
+  ctx->cb (session, ctx->ctx);
+
+  return (BIHASH_WALK_CONTINUE);
+}
+
+void
+cnat_session_walk (cnat_session_walk_cb_t cb, void *ctx)
+{
+  cnat_session_walk_ctx_t wctx = {
+    .cb = cb,
+    .ctx = ctx,
+  };
+  BV (clib_bihash_foreach_key_value_pair) (&cnat_session_db,
+                                          cnat_session_walk_cb, &wctx);
+}
+
+typedef struct cnat_session_purge_walk_t_
+{
+  clib_bihash_kv_40_48_t *keys;
+} cnat_session_purge_walk_ctx_t;
+
+static int
+cnat_session_purge_walk (BVT (clib_bihash_kv) * key, void *arg)
+{
+  cnat_session_purge_walk_ctx_t *ctx = arg;
+
+  vec_add1 (ctx->keys, *key);
+
+  return (BIHASH_WALK_CONTINUE);
+}
+
+u8 *
+format_cnat_session (u8 * s, va_list * args)
+{
+  cnat_session_t *sess = va_arg (*args, cnat_session_t *);
+  CLIB_UNUSED (int verbose) = va_arg (*args, int);
+  f64 ts = 0;
+  if (!pool_is_free_index (cnat_timestamps, sess->value.cs_ts_index))
+    ts = cnat_timestamp_exp (sess->value.cs_ts_index);
+
+  s =
+    format (s,
+           "session:[%U;%d -> %U;%d, %U] => %U;%d -> %U;%d lb:%d age:%f",
+           format_ip46_address, &sess->key.cs_ip[VLIB_RX], IP46_TYPE_ANY,
+           clib_host_to_net_u16 (sess->key.cs_port[VLIB_RX]),
+           format_ip46_address, &sess->key.cs_ip[VLIB_TX], IP46_TYPE_ANY,
+           clib_host_to_net_u16 (sess->key.cs_port[VLIB_TX]),
+           format_ip_protocol, sess->key.cs_proto, format_ip46_address,
+           &sess->value.cs_ip[VLIB_RX], IP46_TYPE_ANY,
+           clib_host_to_net_u16 (sess->value.cs_port[VLIB_RX]),
+           format_ip46_address, &sess->value.cs_ip[VLIB_TX], IP46_TYPE_ANY,
+           clib_host_to_net_u16 (sess->value.cs_port[VLIB_TX]),
+           sess->value.cs_lbi, ts);
+
+  return (s);
+}
+
+static clib_error_t *
+cnat_session_show (vlib_main_t * vm,
+                  unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  u8 verbose = 0;
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "verbose"))
+       verbose = 1;
+      else
+       return (clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input));
+    }
+
+  vlib_cli_output (vm, "CNat Sessions: now:%f\n%U\n",
+                  vlib_time_now (vm),
+                  BV (format_bihash), &cnat_session_db, verbose);
+
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cnat_session_show_cmd_node, static) = {
+  .path = "show cnat session",
+  .function = cnat_session_show,
+  .short_help = "show cnat session",
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+void
+cnat_session_free (cnat_session_t * session)
+{
+  clib_bihash_kv_40_48_t *bkey = (clib_bihash_kv_40_48_t *) session;
+  /* age it */
+  if (session->value.flags & CNAT_SESSION_FLAG_ALLOC_PORT)
+    cnat_free_port (session->value.cs_port[VLIB_RX]);
+  if (!(session->value.flags & CNAT_SESSION_FLAG_NO_CLIENT))
+    cnat_client_free_by_ip (&session->key.cs_ip[VLIB_TX], session->key.cs_af);
+  cnat_timestamp_free (session->value.cs_ts_index);
+
+  clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 0 /* is_add */ );
+}
+
+int
+cnat_session_purge (void)
+{
+  /* flush all the session from the DB */
+  cnat_session_purge_walk_ctx_t ctx = { };
+  clib_bihash_kv_40_48_t *key;
+
+  BV (clib_bihash_foreach_key_value_pair) (&cnat_session_db,
+                                          cnat_session_purge_walk, &ctx);
+
+  vec_foreach (key, ctx.keys) cnat_session_free ((cnat_session_t *) key);
+
+  vec_free (ctx.keys);
+
+  return (0);
+}
+
+u64
+cnat_session_scan (vlib_main_t * vm, f64 start_time, int i)
+{
+  BVT (clib_bihash) * h = &cnat_session_db;
+  int j, k;
+
+  /* Don't scan the l2 fib if it hasn't been instantiated yet */
+  if (alloc_arena (h) == 0)
+    return 0.0;
+
+  for (i = 0; i < h->nbuckets; i++)
+    {
+      /* allow no more than 100us without a pause */
+      if ((vlib_time_now (vm) - start_time) > 10e-5)
+       return (i);
+
+      if (i < (h->nbuckets - 3))
+       {
+         BVT (clib_bihash_bucket) * b =
+           BV (clib_bihash_get_bucket) (h, i + 3);
+         CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
+         b = BV (clib_bihash_get_bucket) (h, i + 1);
+         if (!BV (clib_bihash_bucket_is_empty) (b))
+           {
+             BVT (clib_bihash_value) * v =
+               BV (clib_bihash_get_value) (h, b->offset);
+             CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD);
+           }
+       }
+
+      BVT (clib_bihash_bucket) * b = BV (clib_bihash_get_bucket) (h, i);
+      if (BV (clib_bihash_bucket_is_empty) (b))
+       continue;
+      BVT (clib_bihash_value) * v = BV (clib_bihash_get_value) (h, b->offset);
+      for (j = 0; j < (1 << b->log2_pages); j++)
+       {
+         for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+           {
+             if (v->kvp[k].key[0] == ~0ULL && v->kvp[k].value[0] == ~0ULL)
+               continue;
+
+             cnat_session_t *session = (cnat_session_t *) & v->kvp[k];
+
+             if (start_time >
+                 cnat_timestamp_exp (session->value.cs_ts_index))
+               {
+                 /* age it */
+                 cnat_session_free (session);
+
+                 /*
+                  * Note: we may have just freed the bucket's backing
+                  * storage, so check right here...
+                  */
+                 if (BV (clib_bihash_bucket_is_empty) (b))
+                   goto doublebreak;
+               }
+           }
+         v++;
+       }
+    doublebreak:
+      ;
+    }
+
+  /* start again */
+  return (0);
+}
+
+static clib_error_t *
+cnat_session_init (vlib_main_t * vm)
+{
+  cnat_main_t *cm = &cnat_main;
+  BV (clib_bihash_init) (&cnat_session_db,
+                        "CNat Session DB", cm->session_hash_buckets,
+                        cm->session_hash_memory);
+  BV (clib_bihash_set_kvp_format_fn) (&cnat_session_db, format_cnat_session);
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (cnat_session_init);
+
+static clib_error_t *
+cnat_timestamp_show (vlib_main_t * vm,
+                    unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  cnat_timestamp_t *ts;
+  clib_rwlock_reader_lock (&cnat_main.ts_lock);
+    /* *INDENT-OFF* */
+  pool_foreach (ts, cnat_timestamps, ({
+    vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u",
+                    ts - cnat_timestamps,
+                    ts->last_seen, ts->lifetime, ts->refcnt);
+  }));
+  /* *INDENT-ON* */
+  clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cnat_timestamp_show_cmd, static) = {
+  .path = "show cnat timestamp",
+  .function = cnat_timestamp_show,
+  .short_help = "show cnat timestamp",
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat_session.h b/src/plugins/cnat/cnat_session.h
new file mode 100644 (file)
index 0000000..9e1e893
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CNAT_SESSION_H__
+#define __CNAT_SESSION_H__
+
+#include <vnet/udp/udp.h>
+
+#include <cnat/cnat_types.h>
+#include <cnat/cnat_client.h>
+#include <cnat/bihash_40_48.h>
+
+
+/**
+ * A session represents the memory of a translation.
+ * In the tx direction (from behind to in front of the NAT), the
+ * session is preserved so subsequent packets follow the same path
+ * even if the translation has been updated. In the tx direction
+ * the session represents the swap from the VIP to the server address
+ * In the RX direction the swap is from the server address/port to VIP.
+ *
+ * A session exists only as key and value in the bihash, there is no
+ * pool for this object. If there were a pool, one would need to be
+ * concerned about what worker is using it.
+ */
+typedef struct cnat_session_t_
+{
+  /**
+   * this key sits in the same memory location a 'key' in the bihash kvp
+   */
+  struct
+  {
+    /**
+     * IP 4/6 address in the rx/tx direction
+     */
+    ip46_address_t cs_ip[VLIB_N_DIR];
+
+    /**
+     * ports in rx/tx
+     */
+    u16 cs_port[VLIB_N_DIR];
+
+    /**
+     * The IP protocol TCP or UDP only supported
+     */
+    ip_protocol_t cs_proto;
+
+    /**
+     * The address family describing the IP addresses
+     */
+    u8 cs_af;
+
+    /**
+     * spare space
+     */
+    u8 __cs_pad[2];
+  } key;
+  /**
+   * this value sits in the same memory location a 'value' in the bihash kvp
+   */
+  struct
+  {
+    /**
+     * The IP address to translate to.
+     */
+    ip46_address_t cs_ip[VLIB_N_DIR];
+
+    /**
+     * the port to translate to.
+     */
+    u16 cs_port[VLIB_N_DIR];
+
+    /**
+     * The load balance object to use to forward
+     */
+    index_t cs_lbi;
+
+    /**
+     * Timestamp index this session was last used
+     */
+    u32 cs_ts_index;
+    /**
+     * Indicates a return path session that was source NATed
+     * on the way in.
+     */
+    u32 flags;
+  } value;
+} cnat_session_t;
+
+typedef enum cnat_session_flag_t_
+{
+  CNAT_SESSION_FLAG_HAS_SNAT = (1 << 0),
+  CNAT_SESSION_FLAG_ALLOC_PORT = (1 << 1),
+  CNAT_SESSION_FLAG_NO_CLIENT = (1 << 2),
+} cnat_session_flag_t;
+
+extern u8 *format_cnat_session (u8 * s, va_list * args);
+
+/**
+ * Ensure the session object correctly overlays the bihash key/value pair
+ */
+STATIC_ASSERT (STRUCT_OFFSET_OF (cnat_session_t, key) ==
+              STRUCT_OFFSET_OF (clib_bihash_kv_40_48_t, key),
+              "key overlaps");
+STATIC_ASSERT (STRUCT_OFFSET_OF (cnat_session_t, value) ==
+              STRUCT_OFFSET_OF (clib_bihash_kv_40_48_t, value),
+              "value overlaps");
+STATIC_ASSERT (sizeof (cnat_session_t) == sizeof (clib_bihash_kv_40_48_t),
+              "session kvp");
+
+/**
+ * The DB of sessions
+ */
+extern clib_bihash_40_48_t cnat_session_db;
+
+/**
+ * Callback function invoked during a walk of all translations
+ */
+typedef walk_rc_t (*cnat_session_walk_cb_t) (const cnat_session_t *
+                                            session, void *ctx);
+
+/**
+ * Walk/visit each of the cnat session
+ */
+extern void cnat_session_walk (cnat_session_walk_cb_t cb, void *ctx);
+
+/**
+ * Scan the session DB for expired sessions
+ */
+extern u64 cnat_session_scan (vlib_main_t * vm, f64 start_time, int i);
+
+/**
+ * Purge all the sessions
+ */
+extern int cnat_session_purge (void);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/plugins/cnat/cnat_snat.c b/src/plugins/cnat/cnat_snat.c
new file mode 100644 (file)
index 0000000..2f6a631
--- /dev/null
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <cnat/cnat_snat.h>
+
+static void
+cnat_compute_prefix_lengths_in_search_order (cnat_snat_pfx_table_t *
+                                            table, ip_address_family_t af)
+{
+  int i;
+  vec_reset_length (table->meta[af].prefix_lengths_in_search_order);
+  /* Note: bitmap reversed so this is in fact a longest prefix match */
+  /* *INDENT-OFF* */
+  clib_bitmap_foreach (i, table->meta[af].non_empty_dst_address_length_bitmap,
+    ({
+      int dst_address_length = 128 - i;
+      vec_add1 (table->meta[af].prefix_lengths_in_search_order, dst_address_length);
+    }));
+  /* *INDENT-ON* */
+}
+
+int
+cnat_add_snat_prefix (ip_prefix_t * pfx)
+{
+  /* All packets destined to this prefix won't be source-NAT-ed */
+  cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table;
+  clib_bihash_kv_24_8_t kv;
+  ip6_address_t *mask;
+  u64 af = ip_prefix_version (pfx);;
+
+  mask = &table->ip_masks[pfx->len];
+  if (AF_IP4 == af)
+    {
+      kv.key[0] = (u64) ip_prefix_v4 (pfx).as_u32 & mask->as_u64[0];
+      kv.key[1] = 0;
+    }
+  else
+    {
+      kv.key[0] = ip_prefix_v6 (pfx).as_u64[0] & mask->as_u64[0];
+      kv.key[1] = ip_prefix_v6 (pfx).as_u64[1] & mask->as_u64[1];
+    }
+  kv.key[2] = ((u64) af << 32) | pfx->len;
+  clib_bihash_add_del_24_8 (&table->ip_hash, &kv, 1 /* is_add */ );
+
+  table->meta[af].dst_address_length_refcounts[pfx->len]++;
+  table->meta[af].non_empty_dst_address_length_bitmap =
+    clib_bitmap_set (table->meta[af].non_empty_dst_address_length_bitmap,
+                    128 - pfx->len, 1);
+  cnat_compute_prefix_lengths_in_search_order (table, af);
+  return 0;
+}
+
+int
+cnat_del_snat_prefix (ip_prefix_t * pfx)
+{
+  cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table;
+  clib_bihash_kv_24_8_t kv, val;
+  ip6_address_t *mask;
+  u64 af = ip_prefix_version (pfx);;
+
+  mask = &table->ip_masks[pfx->len];
+  if (AF_IP4 == af)
+    {
+      kv.key[0] = (u64) ip_prefix_v4 (pfx).as_u32 & mask->as_u64[0];
+      kv.key[1] = 0;
+    }
+  else
+    {
+      kv.key[0] = ip_prefix_v6 (pfx).as_u64[0] & mask->as_u64[0];
+      kv.key[1] = ip_prefix_v6 (pfx).as_u64[1] & mask->as_u64[1];
+    }
+  kv.key[2] = ((u64) af << 32) | pfx->len;
+
+  if (clib_bihash_search_24_8 (&table->ip_hash, &kv, &val))
+    {
+      return 1;
+    }
+  clib_bihash_add_del_24_8 (&table->ip_hash, &kv, 0 /* is_add */ );
+  /* refcount accounting */
+  ASSERT (table->meta[af].dst_address_length_refcounts[pfx->len] > 0);
+  if (--table->meta[af].dst_address_length_refcounts[pfx->len] == 0)
+    {
+      table->meta[af].non_empty_dst_address_length_bitmap =
+       clib_bitmap_set (table->meta[af].non_empty_dst_address_length_bitmap,
+                        128 - pfx->len, 0);
+      cnat_compute_prefix_lengths_in_search_order (table, af);
+    }
+  return 0;
+}
+
+u8 *
+format_cnat_snat_prefix (u8 * s, va_list * args)
+{
+  clib_bihash_kv_24_8_t *kv = va_arg (*args, clib_bihash_kv_24_8_t *);
+  CLIB_UNUSED (int verbose) = va_arg (*args, int);
+  u32 af = kv->key[2] >> 32;
+  u32 len = kv->key[2] & 0xffffffff;
+  if (AF_IP4 == af)
+    s = format (s, "%U/%d", format_ip4_address, &kv->key[0], len);
+  else
+    s = format (s, "%U/%d", format_ip6_address, &kv->key[0], len);
+  return (s);
+}
+
+static clib_error_t *
+cnat_set_snat (vlib_main_t * vm,
+              unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  ip_address_t addr;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "%U", unformat_ip_address, &addr))
+       {
+         if (ip_addr_version (&addr) == AF_IP4)
+           clib_memcpy (&cnat_main.snat_ip4, &ip_addr_v4 (&addr),
+                        sizeof (ip4_address_t));
+         else
+           clib_memcpy (&cnat_main.snat_ip6, &ip_addr_v6 (&addr),
+                        sizeof (ip6_address_t));
+       }
+      else
+       return (clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input));
+    }
+
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cnat_set_snat_command, static) =
+{
+  .path = "cnat snat with",
+  .short_help = "cnat snat with [ip]",
+  .function = cnat_set_snat,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+cnat_snat_exclude (vlib_main_t * vm,
+                  unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  ip_prefix_t pfx;
+  u8 is_add = 1;
+  int rv;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "%U", unformat_ip_prefix, &pfx))
+       ;
+      else if (unformat (input, "del"))
+       is_add = 0;
+      else
+       return (clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input));
+    }
+
+  if (is_add)
+    rv = cnat_add_snat_prefix (&pfx);
+  else
+    rv = cnat_del_snat_prefix (&pfx);
+
+  if (rv)
+    {
+      return (clib_error_return (0, "error %d", rv, input));
+    }
+
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cnat_snat_exclude_command, static) =
+{
+  .path = "cnat snat exclude",
+  .short_help = "cnat snat exclude [ip]",
+  .function = cnat_snat_exclude,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+cnat_show_snat (vlib_main_t * vm,
+               unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table;
+  vlib_cli_output (vm, "Source NAT\nip4: %U\nip6: %U\n",
+                  format_ip4_address, &cnat_main.snat_ip4,
+                  format_ip6_address, &cnat_main.snat_ip6);
+  vlib_cli_output (vm, "Prefixes:\n%U\n",
+                  format_bihash_24_8, &table->ip_hash, 1);
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cnat_show_snat_command, static) =
+{
+  .path = "show cnat snat",
+  .short_help = "show cnat snat",
+  .function = cnat_show_snat,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+cnat_snat_init (vlib_main_t * vm)
+{
+  cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table;
+  cnat_main_t *cm = &cnat_main;
+  int i;
+  for (i = 0; i < ARRAY_LEN (table->ip_masks); i++)
+    {
+      u32 j, i0, i1;
+
+      i0 = i / 32;
+      i1 = i % 32;
+
+      for (j = 0; j < i0; j++)
+       table->ip_masks[i].as_u32[j] = ~0;
+
+      if (i1)
+       table->ip_masks[i].as_u32[i0] =
+         clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
+    }
+  clib_bihash_init_24_8 (&table->ip_hash, "snat prefixes",
+                        cm->snat_hash_buckets, cm->snat_hash_memory);
+  clib_bihash_set_kvp_format_fn_24_8 (&table->ip_hash,
+                                     format_cnat_snat_prefix);
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (cnat_snat_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat_snat.h b/src/plugins/cnat/cnat_snat.h
new file mode 100644 (file)
index 0000000..97bad8b
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CNAT_SNAT_H__
+#define __CNAT_SNAT_H__
+
+#include <cnat/cnat_types.h>
+
+always_inline int
+cnat_search_snat_prefix (ip46_address_t * addr, ip_address_family_t af)
+{
+  /* Returns 0 if addr matches any of the listed prefixes */
+  cnat_snat_pfx_table_t *table = &cnat_main.snat_pfx_table;
+  clib_bihash_kv_24_8_t kv, val;
+  int i, n_p, rv;
+  n_p = vec_len (table->meta[af].prefix_lengths_in_search_order);
+  if (AF_IP4 == af)
+    {
+      kv.key[0] = addr->ip4.as_u32;
+      kv.key[1] = 0;
+    }
+  else
+    {
+      kv.key[0] = addr->as_u64[0];
+      kv.key[1] = addr->as_u64[1];
+    }
+
+  /*
+   * start search from a mask length same length or shorter.
+   * we don't want matches longer than the mask passed
+   */
+  i = 0;
+  for (; i < n_p; i++)
+    {
+      int dst_address_length =
+       table->meta[af].prefix_lengths_in_search_order[i];
+      ip6_address_t *mask = &table->ip_masks[dst_address_length];
+
+      ASSERT (dst_address_length >= 0 && dst_address_length <= 128);
+      /* As lengths are decreasing, masks are increasingly specific. */
+      kv.key[0] &= mask->as_u64[0];
+      kv.key[1] &= mask->as_u64[1];
+      kv.key[2] = ((u64) af << 32) | dst_address_length;
+      rv = clib_bihash_search_inline_2_24_8 (&table->ip_hash, &kv, &val);
+      if (rv == 0)
+       return 0;
+    }
+  return -1;
+}
+
+extern int cnat_add_snat_prefix (ip_prefix_t * pfx);
+extern int cnat_del_snat_prefix (ip_prefix_t * pfx);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/plugins/cnat/cnat_translation.c b/src/plugins/cnat/cnat_translation.c
new file mode 100644 (file)
index 0000000..f680a16
--- /dev/null
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_source.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry_track.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/drop_dpo.h>
+
+#include <cnat/cnat_translation.h>
+#include <cnat/cnat_session.h>
+#include <cnat/cnat_client.h>
+
+cnat_translation_t *cnat_translation_pool;
+clib_bihash_8_8_t cnat_translation_db;
+
+static fib_node_type_t cnat_translation_fib_node_type;
+
+vlib_combined_counter_main_t cnat_translation_counters = {
+  .name = "cnat-translation",
+  .stat_segment_name = "/net/cnat-translation",
+};
+
+static void
+cnat_tracker_release (cnat_ep_trk_t * trk)
+{
+  fib_entry_untrack (trk->ct_fei, trk->ct_sibling);
+}
+
+static void
+cnat_tracker_track (index_t cti,
+                   const cnat_endpoint_tuple_t * path, cnat_ep_trk_t * trk)
+{
+  fib_prefix_t pfx;
+
+  ip_address_to_fib_prefix (&path->dst_ep.ce_ip, &pfx);
+
+  clib_memcpy (&trk->ct_ep[VLIB_TX], &path->dst_ep,
+              sizeof (trk->ct_ep[VLIB_TX]));
+  clib_memcpy (&trk->ct_ep[VLIB_RX], &path->src_ep,
+              sizeof (trk->ct_ep[VLIB_RX]));
+
+  trk->ct_fei = fib_entry_track (CNAT_FIB_TABLE,
+                                &pfx,
+                                cnat_translation_fib_node_type,
+                                cti, &trk->ct_sibling);
+
+  fib_entry_contribute_forwarding (trk->ct_fei,
+                                  fib_forw_chain_type_from_fib_proto
+                                  (pfx.fp_proto), &trk->ct_dpo);
+}
+
+void
+cnat_add_translation_to_db (index_t cci, u16 port, ip_protocol_t proto,
+                           index_t cti)
+{
+  clib_bihash_kv_8_8_t bkey;
+  u64 key;
+
+  key = (proto << 16) | port;
+  key = key << 32 | (u32) cci;
+
+  bkey.key = key;
+  bkey.value = cti;
+
+  clib_bihash_add_del_8_8 (&cnat_translation_db, &bkey, 1);
+}
+
+void
+cnat_remove_translation_from_db (index_t cci, u16 port, ip_protocol_t proto)
+{
+  clib_bihash_kv_8_8_t bkey;
+  u64 key;
+
+  key = (proto << 16) | port;
+  key = key << 32 | (u32) cci;
+
+  bkey.key = key;
+
+  clib_bihash_add_del_8_8 (&cnat_translation_db, &bkey, 0);
+}
+
+static void
+cnat_translation_stack (cnat_translation_t * ct)
+{
+  fib_protocol_t fproto;
+  cnat_ep_trk_t *trk;
+  dpo_proto_t dproto;
+  index_t lbi;
+
+  fproto = ip_address_family_to_fib_proto (ct->ct_vip.ce_ip.version);
+  dproto = fib_proto_to_dpo (fproto);
+
+  lbi = load_balance_create (vec_len (ct->ct_paths),
+                            fib_proto_to_dpo (fproto), IP_FLOW_HASH_DEFAULT);
+
+  vec_foreach (trk, ct->ct_paths)
+    load_balance_set_bucket (lbi, trk - ct->ct_paths, &trk->ct_dpo);
+
+  dpo_set (&ct->ct_lb, DPO_LOAD_BALANCE, dproto, lbi);
+  dpo_stack (cnat_client_dpo, dproto, &ct->ct_lb, &ct->ct_lb);
+}
+
+int
+cnat_translation_delete (u32 id)
+{
+  cnat_translation_t *ct;
+  cnat_ep_trk_t *trk;
+
+  if (pool_is_free_index (cnat_translation_pool, id))
+    return (VNET_API_ERROR_NO_SUCH_ENTRY);
+
+  ct = pool_elt_at_index (cnat_translation_pool, id);
+
+  dpo_reset (&ct->ct_lb);
+
+  vec_foreach (trk, ct->ct_paths) cnat_tracker_release (trk);
+
+  cnat_remove_translation_from_db (ct->ct_cci, ct->ct_vip.ce_port,
+                                  ct->ct_proto);
+  cnat_client_translation_deleted (ct->ct_cci);
+  pool_put (cnat_translation_pool, ct);
+
+  return (0);
+}
+
+u32
+cnat_translation_update (const cnat_endpoint_t * vip,
+                        ip_protocol_t proto,
+                        const cnat_endpoint_tuple_t * paths, u8 flags)
+{
+  const cnat_endpoint_tuple_t *path;
+  const cnat_client_t *cc;
+  cnat_translation_t *ct;
+  cnat_ep_trk_t *trk;
+  index_t cci;
+
+  /* do we know of this ep's vip */
+  cci = cnat_client_add (&vip->ce_ip, flags);
+  cc = cnat_client_get (cci);
+
+  ct = cnat_find_translation (cc->parent_cci, vip->ce_port, proto);
+
+  if (NULL == ct)
+    {
+      pool_get_zero (cnat_translation_pool, ct);
+
+      clib_memcpy (&ct->ct_vip, vip, sizeof (*vip));
+      ct->ct_proto = proto;
+      ct->ct_cci = cci;
+      ct->index = ct - cnat_translation_pool;
+
+      cnat_add_translation_to_db (cci, ct->ct_vip.ce_port, ct->ct_proto,
+                                 ct->index);
+      cnat_client_translation_added (cci);
+
+      vlib_validate_combined_counter (&cnat_translation_counters, ct->index);
+      vlib_zero_combined_counter (&cnat_translation_counters, ct->index);
+    }
+  ct->flags = flags;
+
+  vec_foreach (trk, ct->ct_paths)
+  {
+    cnat_tracker_release (trk);
+  }
+
+  vec_reset_length (ct->ct_paths);
+
+  vec_foreach (path, paths)
+  {
+    vec_add2 (ct->ct_paths, trk, 1);
+
+    cnat_tracker_track (ct->index, path, trk);
+  }
+
+  cnat_translation_stack (ct);
+
+  return (ct->index);
+}
+
+void
+cnat_translation_walk (cnat_translation_walk_cb_t cb, void *ctx)
+{
+  u32 api;
+
+  /* *INDENT-OFF* */
+  pool_foreach_index(api, cnat_translation_pool,
+  ({
+    if (!cb(api, ctx))
+      break;
+  }));
+  /* *INDENT-ON* */
+}
+
+static u8 *
+format_cnat_ep_trk (u8 * s, va_list * args)
+{
+  cnat_ep_trk_t *ck = va_arg (*args, cnat_ep_trk_t *);
+  u32 indent = va_arg (*args, u32);
+
+  s = format (s, "%U->%U", format_cnat_endpoint, &ck->ct_ep[VLIB_RX],
+             format_cnat_endpoint, &ck->ct_ep[VLIB_TX]);
+  s = format (s, "\n%Ufib-entry:%d", format_white_space, indent, ck->ct_fei);
+  s = format (s, "\n%U%U",
+             format_white_space, indent, format_dpo_id, &ck->ct_dpo, 6);
+
+  return (s);
+}
+
+u8 *
+format_cnat_translation (u8 * s, va_list * args)
+{
+  cnat_translation_t *ct = va_arg (*args, cnat_translation_t *);
+  cnat_ep_trk_t *ck;
+
+  s = format (s, "[%d] ", ct->index);
+  s = format (s, "%U %U", format_cnat_endpoint, &ct->ct_vip,
+             format_ip_protocol, ct->ct_proto);
+
+  vec_foreach (ck, ct->ct_paths)
+    s = format (s, "\n%U", format_cnat_ep_trk, ck, 2);
+
+  /* If printing a trace, the LB object might be deleted */
+  if (!pool_is_free_index (load_balance_pool, ct->ct_lb.dpoi_index))
+    {
+      s = format (s, "\n via:");
+      s = format (s, "\n%U%U",
+                 format_white_space, 2, format_dpo_id, &ct->ct_lb, 2);
+    }
+
+  return (s);
+}
+
+static clib_error_t *
+cnat_translation_show (vlib_main_t * vm,
+                      unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  index_t cti;
+  cnat_translation_t *ct;
+
+  cti = INDEX_INVALID;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "%d", &cti))
+       ;
+      else
+       return (clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input));
+    }
+
+  if (INDEX_INVALID == cti)
+    {
+      /* *INDENT-OFF* */
+      pool_foreach_index(cti, cnat_translation_pool,
+      ({
+       ct = pool_elt_at_index (cnat_translation_pool, cti);
+        vlib_cli_output(vm, "%U", format_cnat_translation, ct);
+      }));
+      /* *INDENT-ON* */
+    }
+  else
+    {
+      vlib_cli_output (vm, "Invalid policy ID:%d", cti);
+    }
+
+  return (NULL);
+}
+
+int
+cnat_translation_purge (void)
+{
+  /* purge all the translations */
+  index_t tri, *trp, *trs = NULL;
+
+  /* *INDENT-OFF* */
+  pool_foreach_index(tri, cnat_translation_pool,
+  ({
+    vec_add1(trs, tri);
+  }));
+  /* *INDENT-ON* */
+
+  vec_foreach (trp, trs) cnat_translation_delete (*trp);
+
+  ASSERT (0 == pool_elts (cnat_translation_pool));
+
+  vec_free (trs);
+
+  return (0);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cnat_translation_show_cmd_node, static) = {
+  .path = "show cnat translation",
+  .function = cnat_translation_show,
+  .short_help = "show cnat translation <VIP>",
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static fib_node_t *
+cnat_translation_get_node (fib_node_index_t index)
+{
+  cnat_translation_t *ct = cnat_translation_get (index);
+  return (&(ct->ct_node));
+}
+
+static cnat_translation_t *
+cnat_translation_get_from_node (fib_node_t * node)
+{
+  return ((cnat_translation_t *) (((char *) node) -
+                                 STRUCT_OFFSET_OF (cnat_translation_t,
+                                                   ct_node)));
+}
+
+static void
+cnat_translation_last_lock_gone (fib_node_t * node)
+{
+ /**/}
+
+/*
+ * A back walk has reached this ABF policy
+ */
+static fib_node_back_walk_rc_t
+cnat_translation_back_walk_notify (fib_node_t * node,
+                                  fib_node_back_walk_ctx_t * ctx)
+{
+  /*
+   * re-stack the fmask on the n-eos of the via
+   */
+  cnat_translation_t *ct = cnat_translation_get_from_node (node);
+
+  cnat_translation_stack (ct);
+
+  return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * The translation's graph node virtual function table
+ */
+static const fib_node_vft_t cnat_translation_vft = {
+  .fnv_get = cnat_translation_get_node,
+  .fnv_last_lock = cnat_translation_last_lock_gone,
+  .fnv_back_walk = cnat_translation_back_walk_notify,
+};
+
+static clib_error_t *
+cnat_translation_cli_add_del (vlib_main_t * vm,
+                             unformat_input_t * input,
+                             vlib_cli_command_t * cmd)
+{
+  u32 del_index = INDEX_INVALID;
+  ip_protocol_t proto = IP_PROTOCOL_TCP;
+  cnat_endpoint_t vip;
+  u8 flags = CNAT_FLAG_EXCLUSIVE;
+  cnat_endpoint_tuple_t tmp, *paths = NULL, *path;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "add"))
+       del_index = INDEX_INVALID;
+      else if (unformat (input, "del %d", &del_index))
+       ;
+      else if (unformat (input, "proto %U", unformat_ip_protocol, &proto))
+       ;
+      else if (unformat (input, "vip %U", unformat_cnat_ep, &vip))
+       flags = CNAT_FLAG_EXCLUSIVE;
+      else if (unformat (input, "real %U", unformat_cnat_ep, &vip))
+       flags = 0;
+      else if (unformat (input, "to %U", unformat_cnat_ep_tuple, &tmp))
+       {
+         pool_get (paths, path);
+         clib_memcpy (path, &tmp, sizeof (cnat_endpoint_tuple_t));
+       }
+      else
+       return (clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input));
+    }
+
+  if (INDEX_INVALID == del_index)
+    cnat_translation_update (&vip, proto, paths, flags);
+  else
+    cnat_translation_delete (del_index);
+
+  pool_free (paths);
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cnat_translation_cli_add_del_command, static) =
+{
+  .path = "cnat translation",
+  .short_help = "cnat translation [add|del] proto [TCP|UDP] [vip|real] [ip] [port] [to [ip] [port]->[ip] [port]]",
+  .function = cnat_translation_cli_add_del,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+cnat_translation_init (vlib_main_t * vm)
+{
+  cnat_main_t *cm = &cnat_main;
+  cnat_translation_fib_node_type =
+    fib_node_register_new_type (&cnat_translation_vft);
+
+  clib_bihash_init_8_8 (&cnat_translation_db, "CNat translation DB",
+                       cm->translation_hash_buckets,
+                       cm->translation_hash_memory);
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (cnat_translation_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat_translation.h b/src/plugins/cnat/cnat_translation.h
new file mode 100644 (file)
index 0000000..748487a
--- /dev/null
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CNAT_TRANSLATION_H__
+#define __CNAT_TRANSLATION_H__
+
+#include <cnat/cnat_types.h>
+#include <vnet/ip/ip_types.h>
+#include <vppinfra/bihash_8_8.h>
+
+/**
+ * Counters for each translation
+ */
+extern vlib_combined_counter_main_t cnat_translation_counters;
+
+/**
+ * Data used to track an EP in the FIB
+ */
+typedef struct cnat_ep_trk_t_
+{
+  /**
+   * The EP being tracked
+   */
+  cnat_endpoint_t ct_ep[VLIB_N_DIR];
+
+  /**
+   * The FIB entry for the EP
+   */
+  fib_node_index_t ct_fei;
+
+  /**
+   * The sibling on the entry's child list
+   */
+  u32 ct_sibling;
+
+  /**
+   * The forwarding contributed by the entry
+   */
+  dpo_id_t ct_dpo;
+} cnat_ep_trk_t;
+
+typedef enum cnat_translation_flag_t_
+{
+  CNAT_TRANSLATION_FLAG_ALLOCATE_PORT = (1 << 0),
+} cnat_translation_flag_t;
+
+/**
+ * A Translation represents the translation of a VEP to one of a set
+ * of real server addresses
+ */
+typedef struct cnat_translation_t_
+{
+  /**
+   * Linkage into the FIB graph
+   */
+  fib_node_t ct_node;
+
+  /**
+   * The LB used to forward to the backends
+   */
+  dpo_id_t ct_lb;
+
+  /**
+   * The Virtual end point
+   */
+  cnat_endpoint_t ct_vip;
+
+  /**
+   * The vector of tracked back-ends
+   */
+  cnat_ep_trk_t *ct_paths;
+
+  /**
+   * The ip protocol for the translation
+   */
+  ip_protocol_t ct_proto;
+
+  /**
+   * The client object this translation belongs on
+   */
+  index_t ct_cci;
+
+  /**
+   * Own index (if copied for trace)
+   */
+  index_t index;
+
+  /**
+   * Translation flags
+   */
+  u8 flags;
+} cnat_translation_t;
+
+extern cnat_translation_t *cnat_translation_pool;
+
+extern u8 *format_cnat_translation (u8 * s, va_list * args);
+
+/**
+ * create or update a translation
+ *
+ * @param vip The Virtual Endpoint
+ * @param ip_proto The ip protocol to translate
+ * @param backends the backends to choose from
+ *
+ * @return the ID of the translation. used to delete and gather stats
+ */
+extern u32 cnat_translation_update (const cnat_endpoint_t * vip,
+                                   ip_protocol_t ip_proto,
+                                   const cnat_endpoint_tuple_t *
+                                   backends, u8 flags);
+
+/**
+ * Add a translation to the bihash
+ *
+ * @param cci the ID of the parent client
+ * @param port the translation port
+ * @param proto the translation proto
+ * @param cti the translation index to be used as value
+ */
+extern void cnat_add_translation_to_db (index_t cci, u16 port,
+                                       ip_protocol_t proto, index_t cti);
+
+/**
+ * Remove a translation from the bihash
+ *
+ * @param cci the ID of the parent client
+ * @param port the translation port
+ * @param proto the translation proto
+ */
+extern void cnat_remove_translation_from_db (index_t cci, u16 port,
+                                            ip_protocol_t proto);
+
+/**
+ * Delete a translation
+ *
+ * @param id the ID as returned from the create
+ */
+extern int cnat_translation_delete (u32 id);
+
+/**
+ * Callback function invoked during a walk of all translations
+ */
+typedef walk_rc_t (*cnat_translation_walk_cb_t) (index_t index, void *ctx);
+
+/**
+ * Walk/visit each of the translations
+ */
+extern void cnat_translation_walk (cnat_translation_walk_cb_t cb, void *ctx);
+
+/**
+ * Purge all the trahslations
+ */
+extern int cnat_translation_purge (void);
+
+/*
+ * Data plane functions
+ */
+extern clib_bihash_8_8_t cnat_translation_db;
+
+static_always_inline cnat_translation_t *
+cnat_translation_get (index_t cti)
+{
+  return (pool_elt_at_index (cnat_translation_pool, cti));
+}
+
+static_always_inline cnat_translation_t *
+cnat_find_translation (index_t cti, u16 port, ip_protocol_t proto)
+{
+  clib_bihash_kv_8_8_t bkey, bvalue;
+  u64 key;
+  int rv;
+
+  key = (proto << 16) | port;
+  key = key << 32 | (u32) cti;
+
+  bkey.key = key;
+  rv = clib_bihash_search_inline_2_8_8 (&cnat_translation_db, &bkey, &bvalue);
+  if (!rv)
+    return (pool_elt_at_index (cnat_translation_pool, bvalue.value));
+
+  return (NULL);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/plugins/cnat/cnat_types.c b/src/plugins/cnat/cnat_types.c
new file mode 100644 (file)
index 0000000..1f2287e
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cnat/cnat_types.h>
+
+cnat_main_t cnat_main;
+fib_source_t cnat_fib_source;
+cnat_timestamp_t *cnat_timestamps;
+throttle_t cnat_throttle;
+
+char *cnat_error_strings[] = {
+#define cnat_error(n,s) s,
+#include <cnat/cnat_error.def>
+#undef cnat_error
+};
+
+uword
+unformat_cnat_ep (unformat_input_t * input, va_list * args)
+{
+  cnat_endpoint_t *a = va_arg (*args, cnat_endpoint_t *);
+  int port = 0;
+
+  clib_memset (a, 0, sizeof (*a));
+  if (unformat (input, "%U %d", unformat_ip_address, &a->ce_ip, &port))
+    ;
+  else if (unformat_user (input, unformat_ip_address, &a->ce_ip))
+    ;
+  else if (unformat (input, "%d", &port))
+    ;
+  else
+    return 0;
+  a->ce_port = (u16) port;
+  return 1;
+}
+
+uword
+unformat_cnat_ep_tuple (unformat_input_t * input, va_list * args)
+{
+  cnat_endpoint_tuple_t *a = va_arg (*args, cnat_endpoint_tuple_t *);
+  if (unformat (input, "%U->%U", unformat_cnat_ep, &a->src_ep,
+               unformat_cnat_ep, &a->dst_ep))
+    ;
+  else if (unformat (input, "->%U", unformat_cnat_ep, &a->dst_ep))
+    ;
+  else if (unformat (input, "%U->", unformat_cnat_ep, &a->src_ep))
+    ;
+  else
+    return 0;
+  return 1;
+}
+
+u8 *
+format_cnat_endpoint (u8 * s, va_list * args)
+{
+  cnat_endpoint_t *cep = va_arg (*args, cnat_endpoint_t *);
+
+  s = format (s, "%U;%d", format_ip_address, &cep->ce_ip, cep->ce_port);
+
+  return (s);
+}
+
+static clib_error_t *
+cnat_types_init (vlib_main_t * vm)
+{
+  vlib_thread_main_t *tm = &vlib_thread_main;
+  u32 n_vlib_mains = tm->n_vlib_mains;
+  cnat_fib_source = fib_source_allocate ("cnat",
+                                        CNAT_FIB_SOURCE_PRIORITY,
+                                        FIB_SOURCE_BH_SIMPLE);
+
+  clib_rwlock_init (&cnat_main.ts_lock);
+  clib_spinlock_init (&cnat_main.src_ports_lock);
+  clib_bitmap_validate (cnat_main.src_ports, UINT16_MAX);
+  throttle_init (&cnat_throttle, n_vlib_mains, 1e-3);
+
+  return (NULL);
+}
+
+static clib_error_t *
+cnat_config (vlib_main_t * vm, unformat_input_t * input)
+{
+  cnat_main_t *cm = &cnat_main;
+
+  cm->session_hash_memory = CNAT_DEFAULT_SESSION_MEMORY;
+  cm->session_hash_buckets = CNAT_DEFAULT_SESSION_BUCKETS;
+  cm->translation_hash_memory = CNAT_DEFAULT_TRANSLATION_MEMORY;
+  cm->translation_hash_buckets = CNAT_DEFAULT_TRANSLATION_BUCKETS;
+  cm->snat_hash_memory = CNAT_DEFAULT_SNAT_MEMORY;
+  cm->snat_hash_buckets = CNAT_DEFAULT_SNAT_BUCKETS;
+  cm->scanner_timeout = CNAT_DEFAULT_SCANNER_TIMEOUT;
+  cm->session_max_age = CNAT_DEFAULT_SESSION_MAX_AGE;
+  cm->tcp_max_age = CNAT_DEFAULT_TCP_MAX_AGE;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat
+         (input, "session-db-buckets %u", &cm->session_hash_buckets))
+       ;
+      else if (unformat (input, "session-db-memory %U",
+                        unformat_memory_size, &cm->session_hash_memory))
+       ;
+      else if (unformat (input, "translation-db-buckets %u",
+                        &cm->translation_hash_buckets))
+       ;
+      else if (unformat (input, "translation-db-memory %U",
+                        unformat_memory_size, &cm->translation_hash_memory))
+       ;
+      else if (unformat (input, "snat-db-buckets %u", &cm->snat_hash_buckets))
+       ;
+      else if (unformat (input, "snat-db-memory %U",
+                        unformat_memory_size, &cm->snat_hash_memory))
+       ;
+      else if (unformat (input, "session-cleanup-timeout %f",
+                        &cm->scanner_timeout))
+       ;
+      else if (unformat (input, "session-max-age %u", &cm->session_max_age))
+       ;
+      else if (unformat (input, "tcp-max-age %u", &cm->tcp_max_age))
+       ;
+      else
+       return clib_error_return (0, "unknown input '%U'",
+                                 format_unformat_error, input);
+    }
+
+  return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (cnat_config, "cnat");
+VLIB_INIT_FUNCTION (cnat_types_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/cnat/cnat_types.h b/src/plugins/cnat/cnat_types.h
new file mode 100644 (file)
index 0000000..8659aa5
--- /dev/null
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CNAT_TYPES_H__
+#define __CNAT_TYPES_H__
+
+#include <vnet/fib/fib_node.h>
+#include <vnet/fib/fib_source.h>
+#include <vnet/ip/ip_types.h>
+#include <vnet/ip/ip.h>
+
+/* only in the default table for v4 and v6 */
+#define CNAT_FIB_TABLE 0
+
+/* default lifetime of NAT sessions (seconds) */
+#define CNAT_DEFAULT_SESSION_MAX_AGE 30
+/* lifetime of TCP conn NAT sessions after SYNACK (seconds) */
+#define CNAT_DEFAULT_TCP_MAX_AGE 3600
+/* lifetime of TCP conn NAT sessions after RST/FIN (seconds) */
+#define CNAT_DEFAULT_TCP_RST_TIMEOUT 5
+#define CNAT_DEFAULT_SCANNER_TIMEOUT (1.0)
+
+#define CNAT_DEFAULT_SESSION_BUCKETS     1024
+#define CNAT_DEFAULT_TRANSLATION_BUCKETS 1024
+#define CNAT_DEFAULT_SNAT_BUCKETS        1024
+
+#define CNAT_DEFAULT_SESSION_MEMORY      (1 << 20)
+#define CNAT_DEFAULT_TRANSLATION_MEMORY  (256 << 10)
+#define CNAT_DEFAULT_SNAT_MEMORY         (64 << 20)
+
+/* This should be strictly lower than FIB_SOURCE_INTERFACE
+ * from fib_source.h */
+#define CNAT_FIB_SOURCE_PRIORITY  0x02
+
+/* Initial refcnt for timestamps (2 : session & rsession) */
+#define CNAT_TIMESTAMP_INIT_REFCNT 2
+
+#define MIN_SRC_PORT ((u16) 0xC000)
+
+typedef struct cnat_endpoint_t_
+{
+  ip_address_t ce_ip;
+  u16 ce_port;
+} cnat_endpoint_t;
+
+typedef struct cnat_endpoint_tuple_t_
+{
+  cnat_endpoint_t dst_ep;
+  cnat_endpoint_t src_ep;
+} cnat_endpoint_tuple_t;
+
+
+
+typedef struct
+{
+  u32 dst_address_length_refcounts[129];
+  u16 *prefix_lengths_in_search_order;
+  uword *non_empty_dst_address_length_bitmap;
+} cnat_snat_pfx_table_meta_t;
+
+typedef struct
+{
+  /* Stores (ip family, prefix & mask) */
+  clib_bihash_24_8_t ip_hash;
+  /* family dependant cache */
+  cnat_snat_pfx_table_meta_t meta[2];
+  /* Precomputed ip masks (ip4 & ip6) */
+  ip6_address_t ip_masks[129];
+} cnat_snat_pfx_table_t;
+
+typedef struct cnat_main_
+{
+  /* Memory size of the session bihash */
+  uword session_hash_memory;
+
+  /* Number of buckets of the  session bihash */
+  u32 session_hash_buckets;
+
+  /* Memory size of the translation bihash */
+  uword translation_hash_memory;
+
+  /* Number of buckets of the  translation bihash */
+  u32 translation_hash_buckets;
+
+  /* Memory size of the source NAT prefix bihash */
+  uword snat_hash_memory;
+
+  /* Number of buckets of the  source NAT prefix bihash */
+  u32 snat_hash_buckets;
+
+  /* Timeout after which to clear sessions (in seconds) */
+  u32 session_max_age;
+
+  /* Timeout after which to clear an established TCP
+   * session (in seconds) */
+  u32 tcp_max_age;
+
+  /* delay in seconds between two scans of session/clients tables */
+  f64 scanner_timeout;
+
+  /* Lock for the timestamp pool */
+  clib_rwlock_t ts_lock;
+
+  /* Source ports bitmap for snat */
+  clib_bitmap_t *src_ports;
+
+  /* Lock for src_ports access */
+  clib_spinlock_t src_ports_lock;
+
+  /* Ip4 Address to use for source NATing */
+  ip4_address_t snat_ip4;
+
+  /* Ip6 Address to use for source NATing */
+  ip6_address_t snat_ip6;
+
+  /* Longest prefix Match table for source NATing */
+  cnat_snat_pfx_table_t snat_pfx_table;
+} cnat_main_t;
+
+typedef struct cnat_timestamp_t_
+{
+  /* Last time said session was seen */
+  f64 last_seen;
+  /* expire after N seconds */
+  u16 lifetime;
+  /* Users refcount, initially 3 (session, rsession, dpo) */
+  u16 refcnt;
+} cnat_timestamp_t;
+
+typedef struct cnat_node_ctx_t_
+{
+  f64 now;
+  u64 seed;
+  u32 thread_index;
+  ip_address_family_t af;
+  u8 do_trace;
+} cnat_node_ctx_t;
+
+extern u8 *format_cnat_endpoint (u8 * s, va_list * args);
+extern uword unformat_cnat_ep_tuple (unformat_input_t * input,
+                                    va_list * args);
+extern uword unformat_cnat_ep (unformat_input_t * input, va_list * args);
+extern cnat_timestamp_t *cnat_timestamps;
+extern fib_source_t cnat_fib_source;
+extern cnat_main_t cnat_main;
+extern throttle_t cnat_throttle;
+
+extern char *cnat_error_strings[];
+
+typedef enum
+{
+#define cnat_error(n,s) CNAT_ERROR_##n,
+#include <cnat/cnat_error.def>
+#undef cnat_error
+  CNAT_N_ERROR,
+} cnat_error_t;
+
+/*
+  Dataplane functions
+*/
+
+always_inline u32
+cnat_timestamp_new (f64 t)
+{
+  u32 index;
+  cnat_timestamp_t *ts;
+  clib_rwlock_writer_lock (&cnat_main.ts_lock);
+  pool_get (cnat_timestamps, ts);
+  ts->last_seen = t;
+  ts->lifetime = cnat_main.session_max_age;
+  ts->refcnt = CNAT_TIMESTAMP_INIT_REFCNT;
+  index = ts - cnat_timestamps;
+  clib_rwlock_writer_unlock (&cnat_main.ts_lock);
+  return index;
+}
+
+always_inline void
+cnat_timestamp_inc_refcnt (u32 index)
+{
+  clib_rwlock_reader_lock (&cnat_main.ts_lock);
+  cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+  ts->refcnt++;
+  clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+}
+
+always_inline void
+cnat_timestamp_update (u32 index, f64 t)
+{
+  return;
+  clib_rwlock_reader_lock (&cnat_main.ts_lock);
+  cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+  ts->last_seen = t;
+  clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+}
+
+always_inline void
+cnat_timestamp_set_lifetime (u32 index, u16 lifetime)
+{
+  clib_rwlock_reader_lock (&cnat_main.ts_lock);
+  cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+  ts->lifetime = lifetime;
+  clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+}
+
+always_inline f64
+cnat_timestamp_exp (u32 index)
+{
+  f64 t;
+  if (INDEX_INVALID == index)
+    return -1;
+  clib_rwlock_reader_lock (&cnat_main.ts_lock);
+  cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+  t = ts->last_seen + (f64) ts->lifetime;
+  clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+  return t;
+}
+
+always_inline void
+cnat_timestamp_free (u32 index)
+{
+  if (INDEX_INVALID == index)
+    return;
+  clib_rwlock_writer_lock (&cnat_main.ts_lock);
+  cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+  ts->refcnt--;
+  if (0 == ts->refcnt)
+    pool_put (cnat_timestamps, ts);
+  clib_rwlock_writer_unlock (&cnat_main.ts_lock);
+}
+
+always_inline void
+cnat_free_port (u16 port)
+{
+  cnat_main_t *cm = &cnat_main;
+  clib_spinlock_lock (&cm->src_ports_lock);
+  clib_bitmap_set_no_check (cm->src_ports, port, 0);
+  clib_spinlock_unlock (&cm->src_ports_lock);
+}
+
+always_inline int
+cnat_allocate_port (cnat_main_t * cm, u16 * port)
+{
+  *port = clib_net_to_host_u16 (*port);
+  if (*port == 0)
+    *port = MIN_SRC_PORT;
+  clib_spinlock_lock (&cm->src_ports_lock);
+  if (clib_bitmap_get_no_check (cm->src_ports, *port))
+    {
+      *port = clib_bitmap_next_clear (cm->src_ports, *port);
+      if (PREDICT_FALSE (*port >= UINT16_MAX))
+       *port = clib_bitmap_next_clear (cm->src_ports, MIN_SRC_PORT);
+      if (PREDICT_FALSE (*port >= UINT16_MAX))
+       return -1;
+    }
+  clib_bitmap_set_no_check (cm->src_ports, *port, 1);
+  *port = clib_host_to_net_u16 (*port);
+  clib_spinlock_unlock (&cm->src_ports_lock);
+  return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif
diff --git a/src/plugins/cnat/test/test_cnat.py b/src/plugins/cnat/test/test_cnat.py
new file mode 100644 (file)
index 0000000..18e3baa
--- /dev/null
@@ -0,0 +1,596 @@
+#!/usr/bin/env python3
+
+import unittest
+
+from framework import VppTestCase, VppTestRunner
+from vpp_ip import DpoProto
+
+from scapy.packet import Raw
+from scapy.layers.l2 import Ether
+from scapy.layers.inet import IP, UDP, TCP
+from scapy.layers.inet6 import IPv6
+
+from ipaddress import ip_address, ip_network, \
+    IPv4Address, IPv6Address, IPv4Network, IPv6Network
+
+from vpp_object import VppObject
+from vpp_papi import VppEnum
+
+N_PKTS = 15
+
+
+def find_cnat_translation(test, id):
+    ts = test.vapi.cnat_translation_dump()
+    for t in ts:
+        if id == t.translation.id:
+            return True
+    return False
+
+
+class Ep(object):
+    """ CNat endpoint """
+
+    def __init__(self, ip, port, l4p=TCP):
+        self.ip = ip
+        self.port = port
+        self.l4p = l4p
+
+    def encode(self):
+        return {'addr': self.ip,
+                'port': self.port}
+
+    def __str__(self):
+        return ("%s:%d" % (self.ip, self.port))
+
+
+class EpTuple(object):
+    """ CNat endpoint """
+
+    def __init__(self, src, dst):
+        self.src = src
+        self.dst = dst
+
+    def encode(self):
+        return {'src_ep': self.src.encode(),
+                'dst_ep': self.dst.encode()}
+
+    def __str__(self):
+        return ("%s->%s" % (self.src, self.dst))
+
+
+class VppCNatTranslation(VppObject):
+
+    def __init__(self, test, iproto, vip, paths):
+        self._test = test
+        self.vip = vip
+        self.iproto = iproto
+        self.paths = paths
+        self.encoded_paths = []
+        for path in self.paths:
+            self.encoded_paths.append(path.encode())
+
+    @property
+    def vl4_proto(self):
+        ip_proto = VppEnum.vl_api_ip_proto_t
+        return {
+            UDP: ip_proto.IP_API_PROTO_UDP,
+            TCP: ip_proto.IP_API_PROTO_TCP,
+        }[self.iproto]
+
+    def delete(self):
+        r = self._test.vapi.cnat_translation_del(id=self.id)
+
+    def add_vpp_config(self):
+        r = self._test.vapi.cnat_translation_update(
+            {'vip': self.vip.encode(),
+             'ip_proto': self.vl4_proto,
+             'n_paths': len(self.paths),
+             'paths': self.encoded_paths})
+        self._test.registry.register(self, self._test.logger)
+        self.id = r.id
+
+    def modify_vpp_config(self, paths):
+        self.paths = paths
+        self.encoded_paths = []
+        for path in self.paths:
+            self.encoded_paths.append(path.encode())
+
+        r = self._test.vapi.cnat_translation_update(
+            {'vip': self.vip.encode(),
+             'ip_proto': self.vl4_proto,
+             'n_paths': len(self.paths),
+             'paths': self.encoded_paths})
+        self._test.registry.register(self, self._test.logger)
+
+    def remove_vpp_config(self):
+        self._test.vapi.cnat_translation_del(self.id)
+
+    def query_vpp_config(self):
+        return find_cnat_translation(self._test, self.id)
+
+    def object_id(self):
+        return ("cnat-translation-%s" % (self.vip))
+
+    def get_stats(self):
+        c = self._test.statistics.get_counter("/net/cnat-translation")
+        return c[0][self.id]
+
+
+class VppCNATSourceNat(VppObject):
+
+    def __init__(self, test, address, exclude_subnets=[]):
+        self._test = test
+        self.address = address
+        self.exclude_subnets = exclude_subnets
+
+    def add_vpp_config(self):
+        a = ip_address(self.address)
+        if 4 == a.version:
+            self._test.vapi.cnat_set_snat_addresses(snat_ip4=self.address)
+        else:
+            self._test.vapi.cnat_set_snat_addresses(snat_ip6=self.address)
+        for subnet in self.exclude_subnets:
+            self.cnat_exclude_subnet(subnet, True)
+
+    def cnat_exclude_subnet(self, exclude_subnet, isAdd=True):
+        add = 1 if isAdd else 0
+        self._test.vapi.cnat_add_del_snat_prefix(
+                prefix=exclude_subnet, is_add=add)
+
+    def query_vpp_config(self):
+        return False
+
+    def remove_vpp_config(self):
+        return False
+
+
+class TestCNatTranslation(VppTestCase):
+    """ CNat Translation """
+    extra_vpp_punt_config = ["cnat", "{",
+                             "session-max-age", "1",
+                             "tcp-max-age", "1", "}"]
+
+    @classmethod
+    def setUpClass(cls):
+        super(TestCNatTranslation, cls).setUpClass()
+
+    @classmethod
+    def tearDownClass(cls):
+        super(TestCNatTranslation, cls).tearDownClass()
+
+    def setUp(self):
+        super(TestCNatTranslation, self).setUp()
+
+        self.create_pg_interfaces(range(3))
+
+        for i in self.pg_interfaces:
+            i.admin_up()
+            i.config_ip4()
+            i.resolve_arp()
+            i.config_ip6()
+            i.resolve_ndp()
+
+    def tearDown(self):
+        for i in self.pg_interfaces:
+            i.unconfig_ip4()
+            i.unconfig_ip6()
+            i.admin_down()
+        super(TestCNatTranslation, self).tearDown()
+
+    def cnat_create_translation(self, vip, nbr, isV6=False):
+        ip_v = "ip6" if isV6 else "ip4"
+        dep = Ep(getattr(self.pg1.remote_hosts[nbr], ip_v), 4000 + nbr)
+        sep = Ep("::", 0) if isV6 else Ep("0.0.0.0", 0)
+        t1 = VppCNatTranslation(
+            self, vip.l4p, vip,
+            [EpTuple(sep, dep), EpTuple(sep, dep)])
+        t1.add_vpp_config()
+        return t1
+
+    def cnat_test_translation(self, t1, nbr, sports, isV6=False):
+        ip_v = "ip6" if isV6 else "ip4"
+        ip_class = IPv6 if isV6 else IP
+        vip = t1.vip
+
+        #
+        # Flows
+        #
+        for src in self.pg0.remote_hosts:
+            for sport in sports:
+                # from client to vip
+                p1 = (Ether(dst=self.pg0.local_mac,
+                            src=src.mac) /
+                      ip_class(src=getattr(src, ip_v), dst=vip.ip) /
+                      vip.l4p(sport=sport, dport=vip.port) /
+                      Raw())
+
+                self.vapi.cli("trace add pg-input 1")
+                rxs = self.send_and_expect(self.pg0,
+                                           p1 * N_PKTS,
+                                           self.pg1)
+
+                for rx in rxs:
+                    self.assert_packet_checksums_valid(rx)
+                    self.assertEqual(
+                        rx[ip_class].dst,
+                        getattr(self.pg1.remote_hosts[nbr], ip_v))
+                    self.assertEqual(rx[vip.l4p].dport, 4000 + nbr)
+                    self.assertEqual(
+                        rx[ip_class].src,
+                        getattr(src, ip_v))
+                    self.assertEqual(rx[vip.l4p].sport, sport)
+
+                # from vip to client
+                p1 = (Ether(dst=self.pg1.local_mac,
+                            src=self.pg1.remote_mac) /
+                      ip_class(src=getattr(
+                          self.pg1.remote_hosts[nbr],
+                          ip_v),
+                          dst=getattr(src, ip_v)) /
+                      vip.l4p(sport=4000 + nbr, dport=sport) /
+                      Raw())
+
+                rxs = self.send_and_expect(self.pg1,
+                                           p1 * N_PKTS,
+                                           self.pg0)
+
+                for rx in rxs:
+                    self.assert_packet_checksums_valid(rx)
+                    self.assertEqual(
+                        rx[ip_class].dst,
+                        getattr(src, ip_v))
+                    self.assertEqual(rx[vip.l4p].dport, sport)
+                    self.assertEqual(rx[ip_class].src, vip.ip)
+                    self.assertEqual(rx[vip.l4p].sport, vip.port)
+
+                #
+                # packets to the VIP that do not match a
+                # translation are dropped
+                #
+                p1 = (Ether(dst=self.pg0.local_mac,
+                            src=src.mac) /
+                      ip_class(src=getattr(src, ip_v), dst=vip.ip) /
+                      vip.l4p(sport=sport, dport=6666) /
+                      Raw())
+
+                self.send_and_assert_no_replies(self.pg0,
+                                                p1 * N_PKTS,
+                                                self.pg1)
+
+                #
+                # packets from the VIP that do not match a
+                # session are forwarded
+                #
+                p1 = (Ether(dst=self.pg1.local_mac,
+                            src=self.pg1.remote_mac) /
+                      ip_class(src=getattr(
+                          self.pg1.remote_hosts[nbr],
+                          ip_v),
+                          dst=getattr(src, ip_v)) /
+                      vip.l4p(sport=6666, dport=sport) /
+                      Raw())
+
+                rxs = self.send_and_expect(self.pg1,
+                                           p1 * N_PKTS,
+                                           self.pg0)
+
+        self.assertEqual(t1.get_stats()['packets'],
+                         N_PKTS *
+                         len(sports) *
+                         len(self.pg0.remote_hosts))
+
+    def cnat_test_translation_update(self, t1, sports, isV6=False):
+        ip_v = "ip6" if isV6 else "ip4"
+        ip_class = IPv6 if isV6 else IP
+        vip = t1.vip
+
+        #
+        # modify the translation to use a different backend
+        #
+        dep = Ep(getattr(self.pg2, 'remote_' + ip_v), 5000)
+        sep = Ep("::", 0) if isV6 else Ep("0.0.0.0", 0)
+        t1.modify_vpp_config([EpTuple(sep, dep)])
+
+        #
+        # existing flows follow the old path
+        #
+        for src in self.pg0.remote_hosts:
+            for sport in sports:
+                # from client to vip
+                p1 = (Ether(dst=self.pg0.local_mac,
+                            src=src.mac) /
+                      ip_class(src=getattr(src, ip_v), dst=vip.ip) /
+                      vip.l4p(sport=sport, dport=vip.port) /
+                      Raw())
+
+                rxs = self.send_and_expect(self.pg0,
+                                           p1 * N_PKTS,
+                                           self.pg1)
+
+        #
+        # new flows go to the new backend
+        #
+        for src in self.pg0.remote_hosts:
+            p1 = (Ether(dst=self.pg0.local_mac,
+                        src=src.mac) /
+                  ip_class(src=getattr(src, ip_v), dst=vip.ip) /
+                  vip.l4p(sport=9999, dport=vip.port) /
+                  Raw())
+
+            rxs = self.send_and_expect(self.pg0,
+                                       p1 * N_PKTS,
+                                       self.pg2)
+
+    def cnat_translation(self, vips, isV6=False):
+        """ CNat Translation """
+
+        ip_class = IPv6 if isV6 else IP
+        ip_v = "ip6" if isV6 else "ip4"
+        sports = [1234, 1233]
+
+        #
+        # turn the scanner off whilst testing otherwise sessions
+        # will time out
+        #
+        self.vapi.cli("test cnat scanner off")
+
+        sessions = self.vapi.cnat_session_dump()
+
+        trs = []
+        for nbr, vip in enumerate(vips):
+            trs.append(self.cnat_create_translation(vip, nbr, isV6=isV6))
+
+        self.logger.info(self.vapi.cli("sh cnat client"))
+        self.logger.info(self.vapi.cli("sh cnat translation"))
+
+        #
+        # translations
+        #
+        for nbr, vip in enumerate(vips):
+            self.cnat_test_translation(trs[nbr], nbr, sports, isV6=isV6)
+            self.cnat_test_translation_update(trs[nbr], sports, isV6=isV6)
+            if isV6:
+                self.logger.info(self.vapi.cli(
+                    "sh ip6 fib %s" % self.pg0.remote_ip6))
+            else:
+                self.logger.info(self.vapi.cli(
+                    "sh ip fib %s" % self.pg0.remote_ip4))
+            self.logger.info(self.vapi.cli("sh cnat session verbose"))
+
+        #
+        # turn the scanner back on and wait untill the sessions
+        # all disapper
+        #
+        self.vapi.cli("test cnat scanner on")
+
+        n_tries = 0
+        sessions = self.vapi.cnat_session_dump()
+        while (len(sessions) and n_tries < 100):
+            n_tries += 1
+            sessions = self.vapi.cnat_session_dump()
+            self.sleep(2)
+
+        self.assertTrue(n_tries < 100)
+
+        #
+        # load some flows again and purge
+        #
+        for vip in vips:
+            for src in self.pg0.remote_hosts:
+                for sport in sports:
+                    # from client to vip
+                    p1 = (Ether(dst=self.pg0.local_mac,
+                                src=src.mac) /
+                          ip_class(src=getattr(src, ip_v), dst=vip.ip) /
+                          vip.l4p(sport=sport, dport=vip.port) /
+                          Raw())
+                    self.send_and_expect(self.pg0,
+                                         p1 * N_PKTS,
+                                         self.pg2)
+
+        for tr in trs:
+            tr.delete()
+
+        self.assertTrue(self.vapi.cnat_session_dump())
+        self.vapi.cnat_session_purge()
+        self.assertFalse(self.vapi.cnat_session_dump())
+
+    def test_cnat6(self):
+        # """ CNat Translation ipv6 """
+        vips = [
+            Ep("30::1", 5555),
+            Ep("30::2", 5554),
+            Ep("30::2", 5553, UDP),
+        ]
+
+        self.pg0.generate_remote_hosts(len(vips))
+        self.pg0.configure_ipv6_neighbors()
+        self.pg1.generate_remote_hosts(len(vips))
+        self.pg1.configure_ipv6_neighbors()
+
+        self.cnat_translation(vips, isV6=True)
+
+    def test_cnat4(self):
+        # """ CNat Translation ipv4 """
+
+        vips = [
+            Ep("30.0.0.1", 5555),
+            Ep("30.0.0.2", 5554),
+            Ep("30.0.0.2", 5553, UDP),
+        ]
+
+        self.pg0.generate_remote_hosts(len(vips))
+        self.pg0.configure_ipv4_neighbors()
+        self.pg1.generate_remote_hosts(len(vips))
+        self.pg1.configure_ipv4_neighbors()
+
+        self.cnat_translation(vips)
+
+
+class TestCNatSourceNAT(VppTestCase):
+    """ CNat Source NAT """
+    extra_vpp_punt_config = ["cnat", "{",
+                             "session-max-age", "1",
+                             "tcp-max-age", "1", "}"]
+
+    @classmethod
+    def setUpClass(cls):
+        super(TestCNatSourceNAT, cls).setUpClass()
+
+    @classmethod
+    def tearDownClass(cls):
+        super(TestCNatSourceNAT, cls).tearDownClass()
+
+    def setUp(self):
+        super(TestCNatSourceNAT, self).setUp()
+
+        self.create_pg_interfaces(range(3))
+
+        for i in self.pg_interfaces:
+            i.admin_up()
+            i.config_ip4()
+            i.resolve_arp()
+            i.config_ip6()
+            i.resolve_ndp()
+
+    def tearDown(self):
+        for i in self.pg_interfaces:
+            i.unconfig_ip4()
+            i.unconfig_ip6()
+            i.admin_down()
+        super(TestCNatSourceNAT, self).tearDown()
+
+    def cnat_create_translation(self, srcNatAddr, interface, isV6=False):
+        t1 = VppCNATSourceNat(self, srcNatAddr)
+        t1.add_vpp_config()
+        cnat_arc_name = "ip6-unicast" if isV6 else "ip4-unicast"
+        cnat_feature_name = "ip6-cnat-snat" if isV6 else "ip4-cnat-snat"
+        self.vapi.feature_enable_disable(
+            enable=1,
+            arc_name=cnat_arc_name,
+            feature_name=cnat_feature_name,
+            sw_if_index=interface.sw_if_index)
+
+        return t1
+
+    def cnat_test_sourcenat(self, srcNatAddr, l4p=TCP, isV6=False):
+        ip_v = "ip6" if isV6 else "ip4"
+        ip_class = IPv6 if isV6 else IP
+        sports = [1234, 1235, 1236]
+        dports = [6661, 6662, 6663]
+
+        self.pg0.generate_remote_hosts(1)
+        self.pg0.configure_ipv4_neighbors()
+        self.pg0.configure_ipv6_neighbors()
+        self.pg1.generate_remote_hosts(len(sports))
+        self.pg1.configure_ipv4_neighbors()
+        self.pg1.configure_ipv6_neighbors()
+
+        self.vapi.cli("test cnat scanner on")
+        t1 = self.cnat_create_translation(srcNatAddr, self.pg0)
+
+        for nbr, remote_host in enumerate(self.pg1.remote_hosts):
+            # from pods to outside network
+            p1 = (
+                Ether(
+                    dst=self.pg0.local_mac,
+                    src=self.pg0.remote_hosts[0].mac) /
+                ip_class(
+                    src=getattr(self.pg0.remote_hosts[0], ip_v),
+                    dst=getattr(remote_host, ip_v)) /
+                l4p(sport=sports[nbr], dport=dports[nbr]) /
+                Raw())
+
+            rxs = self.send_and_expect(
+                                self.pg0,
+                                p1 * N_PKTS,
+                                self.pg1)
+            for rx in rxs:
+                self.assert_packet_checksums_valid(rx)
+                self.assertEqual(
+                    rx[ip_class].dst,
+                    getattr(remote_host, ip_v))
+                self.assertEqual(rx[l4p].dport, dports[nbr])
+                self.assertEqual(
+                    rx[ip_class].src,
+                    srcNatAddr)
+                sport = rx[l4p].sport
+
+            # from outside to pods
+            p2 = (
+                Ether(
+                    dst=self.pg1.local_mac,
+                    src=self.pg1.remote_hosts[nbr].mac) /
+                ip_class(src=getattr(remote_host, ip_v), dst=srcNatAddr) /
+                l4p(sport=dports[nbr], dport=sport) /
+                Raw())
+
+            rxs = self.send_and_expect(
+                                    self.pg1,
+                                    p2 * N_PKTS,
+                                    self.pg0)
+
+            for rx in rxs:
+                self.assert_packet_checksums_valid(rx)
+                self.assertEqual(
+                    rx[ip_class].dst,
+                    getattr(self.pg0.remote_hosts[0], ip_v))
+                self.assertEqual(rx[l4p].dport, sports[nbr])
+                self.assertEqual(rx[l4p].sport, dports[nbr])
+                self.assertEqual(
+                    rx[ip_class].src,
+                    getattr(remote_host, ip_v))
+
+            # add remote host to exclude list
+            subnet_mask = 100 if isV6 else 16
+            subnet = getattr(remote_host, ip_v) + "/" + str(subnet_mask)
+            exclude_subnet = ip_network(subnet, strict=False)
+
+            t1.cnat_exclude_subnet(exclude_subnet)
+            self.vapi.cnat_session_purge()
+
+            rxs = self.send_and_expect(
+                                self.pg0,
+                                p1 * N_PKTS,
+                                self.pg1)
+            for rx in rxs:
+                self.assert_packet_checksums_valid(rx)
+                self.assertEqual(
+                    rx[ip_class].dst,
+                    getattr(remote_host, ip_v))
+                self.assertEqual(rx[l4p].dport, dports[nbr])
+                self.assertEqual(
+                    rx[ip_class].src,
+                    getattr(self.pg0.remote_hosts[0], ip_v))
+
+            # remove remote host from exclude list
+            t1.cnat_exclude_subnet(exclude_subnet, isAdd=False)
+            self.vapi.cnat_session_purge()
+
+            rxs = self.send_and_expect(
+                    self.pg0,
+                    p1 * N_PKTS,
+                    self.pg1)
+
+            for rx in rxs:
+                self.assert_packet_checksums_valid(rx)
+                self.assertEqual(
+                    rx[ip_class].dst,
+                    getattr(remote_host, ip_v))
+                self.assertEqual(rx[l4p].dport, dports[nbr])
+                self.assertEqual(
+                    rx[ip_class].src,
+                    srcNatAddr)
+
+    # def test_cnat6_sourcenat(self):
+    #     # """ CNat Source Nat ipv6 """
+    #     self.cnat_test_sourcenat(self.pg2.remote_hosts[0].ip6, TCP, True)
+    #     self.cnat_test_sourcenat(self.pg2.remote_hosts[0].ip6, UDP, True)
+
+    def test_cnat4_sourcenat(self):
+        # """ CNat Source Nat ipv4 """
+        self.cnat_test_sourcenat(self.pg2.remote_hosts[0].ip4, TCP)
+        self.cnat_test_sourcenat(self.pg2.remote_hosts[0].ip4, UDP)
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)
index 5041c12..3d489e4 100644 (file)
@@ -261,6 +261,30 @@ ip_address_from_46 (const ip46_address_t * nh,
   ip_addr_version (ip) = ip_address_family_from_fib_proto (fproto);
 }
 
+/**
+ * convert from a IP address to a FIB prefix
+ */
+void
+ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix)
+{
+  if (addr->version == AF_IP4)
+    {
+      prefix->fp_len = 32;
+      prefix->fp_proto = FIB_PROTOCOL_IP4;
+      clib_memset (&prefix->fp_addr.pad, 0, sizeof (prefix->fp_addr.pad));
+      memcpy (&prefix->fp_addr.ip4, &addr->ip.ip4,
+             sizeof (prefix->fp_addr.ip4));
+    }
+  else
+    {
+      prefix->fp_len = 128;
+      prefix->fp_proto = FIB_PROTOCOL_IP6;
+      memcpy (&prefix->fp_addr.ip6, &addr->ip.ip6,
+             sizeof (prefix->fp_addr.ip6));
+    }
+  prefix->___fp___pad = 0;
+}
+
 static void
 ip_prefix_normalize_ip4 (ip4_address_t * ip4, u8 preflen)
 {
@@ -364,6 +388,17 @@ ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2)
   return cmp;
 }
 
+/**
+ * convert from a LISP to a FIB prefix
+ */
+void
+ip_prefix_to_fib_prefix (const ip_prefix_t * ip_prefix,
+                        fib_prefix_t * fib_prefix)
+{
+  ip_address_to_fib_prefix (&ip_prefix->addr, fib_prefix);
+  fib_prefix->fp_len = ip_prefix->len;
+}
+
 static bool
 ip4_prefix_validate (const ip_prefix_t * ip)
 {
index c159366..7e9d059 100644 (file)
@@ -123,41 +123,6 @@ ip_interface_get_first_ip_address (lisp_cp_main_t * lcm, u32 sw_if_index,
   return 1;
 }
 
-/**
- * convert from a LISP address to a FIB prefix
- */
-void
-ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix)
-{
-  if (addr->version == AF_IP4)
-    {
-      prefix->fp_len = 32;
-      prefix->fp_proto = FIB_PROTOCOL_IP4;
-      clib_memset (&prefix->fp_addr.pad, 0, sizeof (prefix->fp_addr.pad));
-      memcpy (&prefix->fp_addr.ip4, &addr->ip.ip4,
-             sizeof (prefix->fp_addr.ip4));
-    }
-  else
-    {
-      prefix->fp_len = 128;
-      prefix->fp_proto = FIB_PROTOCOL_IP6;
-      memcpy (&prefix->fp_addr.ip6, &addr->ip.ip6,
-             sizeof (prefix->fp_addr.ip6));
-    }
-  prefix->___fp___pad = 0;
-}
-
-/**
- * convert from a LISP to a FIB prefix
- */
-void
-ip_prefix_to_fib_prefix (const ip_prefix_t * ip_prefix,
-                        fib_prefix_t * fib_prefix)
-{
-  ip_address_to_fib_prefix (&ip_prefix->addr, fib_prefix);
-  fib_prefix->fp_len = ip_prefix->len;
-}
-
 /**
  * Find the sw_if_index of the interface that would be used to egress towards
  * dst.