nat: use correct data types for memory sizes
[vpp.git] / src / plugins / nat / nat.h
index 664af39..187e351 100644 (file)
@@ -23,6 +23,7 @@
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/ip/icmp46_packet.h>
 #include <vnet/api_errno.h>
+#include <vnet/fib/fib_source.h>
 #include <vppinfra/elog.h>
 #include <vppinfra/bihash_8_8.h>
 #include <vppinfra/bihash_16_8.h>
@@ -30,6 +31,7 @@
 #include <vppinfra/error.h>
 #include <vlibapi/api.h>
 #include <vlib/log.h>
+#include <vppinfra/bihash_16_8.h>
 
 /* default session timeouts */
 #define SNAT_UDP_TIMEOUT 300
 /* NAT buffer flags */
 #define SNAT_FLAG_HAIRPINNING (1 << 0)
 
+typedef struct
+{
+  u32 arc_next;
+} nat_buffer_opaque_t;
+
+typedef enum
+{
+  NAT_NEXT_DROP,
+  NAT_NEXT_ICMP_ERROR,
+  NAT_NEXT_IN2OUT_PRE,
+  NAT_NEXT_OUT2IN_PRE,
+  NAT_NEXT_IN2OUT_ED_FAST_PATH,
+  NAT_NEXT_IN2OUT_ED_SLOW_PATH,
+  NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH,
+  NAT_NEXT_OUT2IN_ED_FAST_PATH,
+  NAT_NEXT_OUT2IN_ED_SLOW_PATH,
+  NAT_N_NEXT,
+} nat_next_t;
+
+typedef struct
+{
+  u32 next_index;
+} nat_pre_trace_t;
+
+#define nat_buffer_opaque(b) \
+  ((nat_buffer_opaque_t *)((vnet_buffer_opaque2_t *)b->opaque2)->__unused2)
+
+/*
+STATIC_ASSERT (sizeof (nat_buffer_opaque_t) <=
+               STRUCT_SIZE_OF (vnet_buffer_opaque_t, unused),
+               "Custom meta-data too large for vnet_buffer_opaque_t");
+
+#define nat_buffer_opaque(b) \
+  ((nat_buffer_opaque_t *)((u8 *)((b)->opaque) + \
+    STRUCT_OFFSET_OF (vnet_buffer_opaque_t, unused)))*/
+
 /* session key (4-tuple) */
 typedef struct
 {
@@ -177,6 +215,66 @@ typedef enum
 #undef _
 } snat_session_state_t;
 
+#define foreach_nat_in2out_ed_error                     \
+_(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
+_(IN2OUT_PACKETS, "good in2out packets processed")      \
+_(OUT_OF_PORTS, "out of ports")                         \
+_(BAD_ICMP_TYPE, "unsupported ICMP type")               \
+_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded")   \
+_(MAX_USER_SESS_EXCEEDED, "max user sessions exceeded") \
+_(DROP_FRAGMENT, "drop fragment")                       \
+_(MAX_REASS, "maximum reassemblies exceeded")           \
+_(MAX_FRAG, "maximum fragments per reassembly exceeded")\
+_(CANNOT_CREATE_USER, "cannot create NAT user")         \
+_(NON_SYN, "non-SYN packet try to create session")      \
+_(TCP_PACKETS, "TCP packets")                           \
+_(TCP_CLOSED, "drops due to TCP in transitory timeout") \
+_(UDP_PACKETS, "UDP packets")                           \
+_(ICMP_PACKETS, "ICMP packets")                         \
+_(OTHER_PACKETS, "other protocol packets")              \
+_(FRAGMENTS, "fragments")                               \
+_(CACHED_FRAGMENTS, "cached fragments")                 \
+_(PROCESSED_FRAGMENTS, "processed fragments")
+
+typedef enum
+{
+#define _(sym,str) NAT_IN2OUT_ED_ERROR_##sym,
+  foreach_nat_in2out_ed_error
+#undef _
+    NAT_IN2OUT_ED_N_ERROR,
+} nat_in2out_ed_error_t;
+
+#define foreach_nat_out2in_ed_error                     \
+_(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
+_(OUT2IN_PACKETS, "good out2in packets processed")      \
+_(OUT_OF_PORTS, "out of ports")                         \
+_(BAD_ICMP_TYPE, "unsupported ICMP type")               \
+_(NO_TRANSLATION, "no translation")                     \
+_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded")   \
+_(MAX_USER_SESS_EXCEEDED, "max user sessions exceeded") \
+_(DROP_FRAGMENT, "drop fragment")                       \
+_(MAX_REASS, "maximum reassemblies exceeded")           \
+_(MAX_FRAG, "maximum fragments per reassembly exceeded")\
+_(CANNOT_CREATE_USER, "cannot create NAT user")         \
+_(NON_SYN, "non-SYN packet try to create session")      \
+_(TCP_PACKETS, "TCP packets")                           \
+_(TCP_CLOSED, "drops due to TCP in transitory timeout") \
+_(UDP_PACKETS, "UDP packets")                           \
+_(ICMP_PACKETS, "ICMP packets")                         \
+_(OTHER_PACKETS, "other protocol packets")              \
+_(FRAGMENTS, "fragments")                               \
+_(CACHED_FRAGMENTS, "cached fragments")                 \
+_(PROCESSED_FRAGMENTS, "processed fragments")
+
+typedef enum
+{
+#define _(sym,str) NAT_OUT2IN_ED_ERROR_##sym,
+  foreach_nat_out2in_ed_error
+#undef _
+    NAT_OUT2IN_ED_N_ERROR,
+} nat_out2in_ed_error_t;
+
+
 /* Endpoint dependent TCP session state */
 #define NAT44_SES_I2O_FIN 1
 #define NAT44_SES_O2I_FIN 2
@@ -222,6 +320,10 @@ typedef CLIB_PACKED(struct
   u32 per_user_index;
   u32 per_user_list_head_index;
 
+  /* index in global LRU list */
+  u32 global_lru_index;
+  f64 last_lru_update;
+
   /* Last heard timer */
   f64 last_heard;
 
@@ -244,6 +346,7 @@ typedef CLIB_PACKED(struct
   u8 state;
   u32 i2o_fin_seq;
   u32 o2i_fin_seq;
+  u32 tcp_close_timestamp;
 
   /* user index */
   u32 user_index;
@@ -258,6 +361,8 @@ typedef struct
   u32 sessions_per_user_list_head_index;
   u32 nsessions;
   u32 nstaticsessions;
+  /* discovered minimum session timeout time */
+  u64 min_session_timeout;
 } snat_user_t;
 
 typedef struct
@@ -268,7 +373,7 @@ typedef struct
 #define _(N, i, n, s) \
   u16 busy_##n##_ports; \
   u16 * busy_##n##_ports_per_thread; \
-  uword * busy_##n##_port_bitmap;
+  u32 busy_##n##_port_refcounts[65535];
   foreach_snat_protocol
 #undef _
 /* *INDENT-ON* */
@@ -420,8 +525,24 @@ typedef struct
   /* Pool of doubly-linked list elements */
   dlist_elt_t *list_pool;
 
+  /* LRU session list - head is stale, tail is fresh */
+  dlist_elt_t *global_lru_pool;
+  u32 global_lru_head_index;
+
   /* NAT thread index */
   u32 snat_thread_index;
+
+  /* real thread index */
+  u32 thread_index;
+
+  /* discovered minimum session timeout time */
+  u64 min_session_timeout;
+
+  /* session scavenging */
+  u32 cleared;
+  u32 cleanup_runs;
+  f64 cleanup_timeout;
+
 } snat_main_per_thread_data_t;
 
 struct snat_main_s;
@@ -437,8 +558,14 @@ typedef u32 (snat_icmp_match_function_t) (struct snat_main_s * sm,
                                          void *e);
 
 /* Return worker thread index for given packet */
-typedef u32 (snat_get_worker_function_t) (ip4_header_t * ip,
-                                         u32 rx_fib_index);
+typedef u32 (snat_get_worker_in2out_function_t) (ip4_header_t * ip,
+                                                u32 rx_fib_index,
+                                                u8 is_output);
+
+typedef u32 (snat_get_worker_out2in_function_t) (vlib_buffer_t * b,
+                                                ip4_header_t * ip,
+                                                u32 rx_fib_index,
+                                                u8 is_output);
 
 /* NAT address and port allacotaion function */
 typedef int (nat_alloc_out_addr_and_port_function_t) (snat_address_t *
@@ -449,6 +576,28 @@ typedef int (nat_alloc_out_addr_and_port_function_t) (snat_address_t *
                                                      u16 port_per_thread,
                                                      u32 snat_thread_index);
 
+typedef struct ed_bihash_key_s
+{
+  u32 src_address;
+  u32 dst_address;
+  u16 src_port;
+  u16 dst_port;
+  u8 protocol;
+} ed_bihash_key_t;
+
+typedef struct ed_bihash_kv_s
+{
+  union
+  {
+    ed_bihash_key_t k;
+    clib_bihash_kv_16_8_t kv;
+  };
+} ed_bihash_kv_t;
+
+STATIC_ASSERT (STRUCT_SIZE_OF (ed_bihash_kv_t, k) <=
+              STRUCT_SIZE_OF (ed_bihash_kv_t, kv.key),
+              "ed key needs to fit in bihash key");
+
 typedef struct snat_main_s
 {
   /* ICMP session match functions */
@@ -459,8 +608,8 @@ typedef struct snat_main_s
   u32 num_workers;
   u32 first_worker_index;
   u32 *workers;
-  snat_get_worker_function_t *worker_in2out_cb;
-  snat_get_worker_function_t *worker_out2in_cb;
+  snat_get_worker_in2out_function_t *worker_in2out_cb;
+  snat_get_worker_out2in_function_t *worker_out2in_cb;
   u16 port_per_thread;
   u32 num_snat_thread;
 
@@ -518,21 +667,26 @@ typedef struct snat_main_s
   /* node indexes */
   u32 error_node_index;
 
+  /* handoff fq nodes  */
+  u32 handoff_out2in_index;
+  u32 handoff_in2out_index;
+  u32 handoff_in2out_output_index;
+
+  /* respect feature arc nodes */
+  u32 pre_out2in_node_index;
+  u32 pre_in2out_node_index;
+
   u32 in2out_node_index;
   u32 in2out_output_node_index;
   u32 in2out_fast_node_index;
   u32 in2out_slowpath_node_index;
   u32 in2out_slowpath_output_node_index;
-  u32 in2out_reass_node_index;
   u32 ed_in2out_node_index;
   u32 ed_in2out_slowpath_node_index;
-  u32 ed_in2out_reass_node_index;
   u32 out2in_node_index;
   u32 out2in_fast_node_index;
-  u32 out2in_reass_node_index;
   u32 ed_out2in_node_index;
   u32 ed_out2in_slowpath_node_index;
-  u32 ed_out2in_reass_node_index;
   u32 det_in2out_node_index;
   u32 det_out2in_node_index;
 
@@ -557,10 +711,10 @@ typedef struct snat_main_s
   u8 out2in_dpo;
   u8 endpoint_dependent;
   u32 translation_buckets;
-  u32 translation_memory_size;
+  uword translation_memory_size;
   u32 max_translations;
   u32 user_buckets;
-  u32 user_memory_size;
+  uword user_memory_size;
   u32 max_translations_per_user;
   u32 outside_vrf_id;
   u32 outside_fib_index;
@@ -568,9 +722,13 @@ typedef struct snat_main_s
   u32 inside_fib_index;
 
   /* values of various timeouts */
+
+  // min timeout of all proto timeouts
+  u32 min_timeout;
+  // proto timeouts
   u32 udp_timeout;
-  u32 tcp_established_timeout;
   u32 tcp_transitory_timeout;
+  u32 tcp_established_timeout;
   u32 icmp_timeout;
 
   /* TCP MSS clamping */
@@ -595,6 +753,8 @@ typedef struct snat_main_s
   ip4_main_t *ip4_main;
   ip_lookup_main_t *ip4_lookup_main;
   api_main_t *api_main;
+
+  clib_bihash_16_8_t ed_ext_ports;
 } snat_main_t;
 
 typedef struct
@@ -610,29 +770,30 @@ typedef struct
 } snat_runtime_t;
 
 extern snat_main_t snat_main;
+
+// nat pre ed next_node feature classification
+extern vlib_node_registration_t nat_default_node;
+extern vlib_node_registration_t nat_pre_in2out_node;
+extern vlib_node_registration_t nat_pre_out2in_node;
+
 extern vlib_node_registration_t snat_in2out_node;
 extern vlib_node_registration_t snat_in2out_output_node;
 extern vlib_node_registration_t snat_out2in_node;
-extern vlib_node_registration_t snat_in2out_fast_node;
-extern vlib_node_registration_t snat_out2in_fast_node;
 extern vlib_node_registration_t snat_in2out_worker_handoff_node;
 extern vlib_node_registration_t snat_in2out_output_worker_handoff_node;
 extern vlib_node_registration_t snat_out2in_worker_handoff_node;
 extern vlib_node_registration_t snat_det_in2out_node;
 extern vlib_node_registration_t snat_det_out2in_node;
-extern vlib_node_registration_t snat_hairpin_dst_node;
-extern vlib_node_registration_t snat_hairpin_src_node;
 extern vlib_node_registration_t nat44_ed_in2out_node;
 extern vlib_node_registration_t nat44_ed_in2out_output_node;
 extern vlib_node_registration_t nat44_ed_out2in_node;
-extern vlib_node_registration_t nat44_ed_hairpin_dst_node;
-extern vlib_node_registration_t nat44_ed_hairpin_src_node;
-extern vlib_node_registration_t nat44_ed_in2out_worker_handoff_node;
-extern vlib_node_registration_t nat44_ed_in2out_output_worker_handoff_node;
-extern vlib_node_registration_t nat44_ed_out2in_worker_handoff_node;
+
+extern fib_source_t nat_fib_src_hi;
+extern fib_source_t nat_fib_src_low;
 
 /* format functions */
 format_function_t format_snat_user;
+format_function_t format_snat_user_v2;
 format_function_t format_snat_static_mapping;
 format_function_t format_snat_static_map_to_resolve;
 format_function_t format_snat_session;
@@ -641,7 +802,6 @@ format_function_t format_snat_key;
 format_function_t format_static_mapping_key;
 format_function_t format_snat_protocol;
 format_function_t format_nat_addr_and_port_alloc_alg;
-format_function_t format_nat44_reass_trace;
 /* unformat functions */
 unformat_function_t unformat_snat_protocol;
 
@@ -733,7 +893,11 @@ unformat_function_t unformat_snat_protocol;
     @param t TCP header
     @return 1 if client initiating TCP connection
 */
-#define tcp_is_init(t) ((t->flags & TCP_FLAG_SYN) && !(t->flags & TCP_FLAG_ACK))
+always_inline bool
+tcp_flags_is_init (u8 f)
+{
+  return (f & TCP_FLAG_SYN) && !(f & TCP_FLAG_ACK);
+}
 
 /* logging */
 #define nat_log_err(...) \
@@ -771,7 +935,7 @@ do                                                \
       {                                           \
         ELOG_TYPE_DECLARE (e) =                   \
           {                                       \
-            .format = "nat-msg" _str,             \
+            .format = "nat-msg " _str,            \
             .format_args = "",                    \
           };                                      \
         ELOG_DATA (&sm->vlib_main->elog_main, e); \
@@ -803,6 +967,86 @@ do                                                       \
     }                                                    \
   } while (0);
 
+#define nat_elog_debug_handoff(_str, _tid, _fib, _src, _dst)                \
+do                                                                          \
+  {                                                                         \
+  if (PREDICT_FALSE (sm->log_level >= SNAT_LOG_DEBUG))                      \
+    {                                                                       \
+      ELOG_TYPE_DECLARE (e) =                                               \
+        {                                                                   \
+          .format = "nat-msg " _str " ip src: %d.%d.%d.%d dst: %d.%d.%d.%d" \
+                                    " tid from: %d to: %d fib: %d",         \
+        .format_args = "i1i1i1i1i1i1i1i1i4i4i4",                            \
+      };                                                                    \
+      CLIB_PACKED(struct                                                    \
+        {                                                                   \
+          u8 src_oct1;                                                      \
+          u8 src_oct2;                                                      \
+          u8 src_oct3;                                                      \
+          u8 src_oct4;                                                      \
+          u8 dst_oct1;                                                      \
+          u8 dst_oct2;                                                      \
+          u8 dst_oct3;                                                      \
+          u8 dst_oct4;                                                      \
+          u32 ftid;                                                         \
+          u32 ttid;                                                         \
+          u32 fib;                                                          \
+        }) *ed;                                                             \
+      ed = ELOG_DATA (&vlib_global_main.elog_main, e);                      \
+      ed->src_oct1 = _src >> 24;                                            \
+      ed->src_oct2 = _src >> 16;                                            \
+      ed->src_oct3 = _src >> 8;                                             \
+      ed->src_oct4 = _src;                                                  \
+      ed->dst_oct1 = _dst >> 24;                                            \
+      ed->dst_oct2 = _dst >> 16;                                            \
+      ed->dst_oct3 = _dst >> 8;                                             \
+      ed->dst_oct4 = _dst;                                                  \
+      ed->ftid = vlib_get_thread_index ();                                  \
+      ed->ttid = _tid;                                                      \
+      ed->fib = _fib;                                                       \
+    }                                                                       \
+  } while (0);
+
+#define nat_elog_debug_handoff_v2(_str, _prt, _fib, _src, _dst)              \
+do                                                                           \
+  {                                                                          \
+  if (PREDICT_FALSE (sm->log_level >= SNAT_LOG_DEBUG))                       \
+    {                                                                        \
+      ELOG_TYPE_DECLARE (e) =                                                \
+        {                                                                    \
+          .format = "nat-msg " _str " ip_src:%d.%d.%d.%d ip_dst:%d.%d.%d.%d" \
+                                    " tid:%d prt:%d fib:%d",                 \
+        .format_args = "i1i1i1i1i1i1i1i1i4i4i4",                             \
+      };                                                                     \
+      CLIB_PACKED(struct                                                     \
+        {                                                                    \
+          u8 src_oct1;                                                       \
+          u8 src_oct2;                                                       \
+          u8 src_oct3;                                                       \
+          u8 src_oct4;                                                       \
+          u8 dst_oct1;                                                       \
+          u8 dst_oct2;                                                       \
+          u8 dst_oct3;                                                       \
+          u8 dst_oct4;                                                       \
+          u32 tid;                                                           \
+          u32 prt;                                                           \
+          u32 fib;                                                           \
+        }) *ed;                                                              \
+      ed = ELOG_DATA (&vlib_global_main.elog_main, e);                       \
+      ed->src_oct1 = _src >> 24;                                             \
+      ed->src_oct2 = _src >> 16;                                             \
+      ed->src_oct3 = _src >> 8;                                              \
+      ed->src_oct4 = _src;                                                   \
+      ed->dst_oct1 = _dst >> 24;                                             \
+      ed->dst_oct2 = _dst >> 16;                                             \
+      ed->dst_oct3 = _dst >> 8;                                              \
+      ed->dst_oct4 = _dst;                                                   \
+      ed->tid = vlib_get_thread_index ();                                    \
+      ed->prt = _prt;                                                        \
+      ed->fib = _fib;                                                        \
+    }                                                                        \
+  } while (0);
+
 #define nat_elog_X1(_level, _fmt, _arg, _val1)         \
 do                                                     \
   {                                                    \
@@ -922,11 +1166,6 @@ int nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg);
 int nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg);
 int nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg);
 
-/**
- * @brief Increment IPv4 address
- */
-void increment_v4_address (ip4_address_t * a);
-
 /**
  * @brief Add external address to NAT44 pool
  *
@@ -1098,6 +1337,16 @@ int nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
 void nat_free_session_data (snat_main_t * sm, snat_session_t * s,
                            u32 thread_index, u8 is_ha);
 
+/**
+ * @brief Free NAT44 ED session data (lookup keys, external addrres port)
+ *
+ * @param s            NAT session
+ * @param thread_index thread index
+ * @param is_ha        is HA event
+ */
+void
+nat44_free_session_data (snat_main_t * sm, snat_session_t * s,
+                        u32 thread_index, u8 is_ha);
 /**
  * @brief Find or create NAT user
  *
@@ -1107,8 +1356,9 @@ void nat_free_session_data (snat_main_t * sm, snat_session_t * s,
  *
  * @return NAT user data structure on success otherwise zero value
  */
-snat_user_t *nat_user_get_or_create (snat_main_t * sm, ip4_address_t * addr,
-                                    u32 fib_index, u32 thread_index);
+snat_user_t *nat_user_get_or_create (snat_main_t * sm,
+                                    ip4_address_t * addr, u32 fib_index,
+                                    u32 thread_index);
 
 /**
  * @brief Allocate new NAT session or recycle last used
@@ -1243,7 +1493,6 @@ typedef struct
 } tcp_udp_header_t;
 
 #endif /* __included_nat_h__ */
-
 /*
  * fd.io coding-style-patch-verification: ON
  *