#include <vppinfra/dlist.h>
#include <vppinfra/error.h>
#include <vlibapi/api.h>
+#include <vlib/log.h>
#define SNAT_UDP_TIMEOUT 300
-#define SNAT_UDP_TIMEOUT_MIN 120
#define SNAT_TCP_TRANSITORY_TIMEOUT 240
#define SNAT_TCP_ESTABLISHED_TIMEOUT 7440
-#define SNAT_TCP_INCOMING_SYN 6
#define SNAT_ICMP_TIMEOUT 60
+#define NAT_FQ_NELTS 64
+
#define SNAT_FLAG_HAIRPINNING (1 << 0)
/* Key */
};
} snat_user_key_t;
+#define foreach_nat_addr_and_port_alloc_alg \
+ _(0, DEFAULT, "default") \
+ _(1, MAPE, "map-e") \
+ _(2, RANGE, "port-range")
+
+typedef enum {
+#define _(v, N, s) NAT_ADDR_AND_PORT_ALLOC_ALG_##N = v,
+ foreach_nat_addr_and_port_alloc_alg
+#undef _
+} nat_addr_and_port_alloc_alg_t;
#define foreach_snat_protocol \
_(UDP, 0, udp, "udp") \
_(3, TCP_ESTABLISHED, "tcp-established") \
_(4, TCP_FIN_WAIT, "tcp-fin-wait") \
_(5, TCP_CLOSE_WAIT, "tcp-close-wait") \
- _(6, TCP_LAST_ACK, "tcp-last-ack") \
- _(7, ICMP_ACTIVE, "icmp-active")
+ _(6, TCP_CLOSING, "tcp-closing") \
+ _(7, TCP_LAST_ACK, "tcp-last-ack") \
+ _(8, TCP_CLOSED, "tcp-closed") \
+ _(9, ICMP_ACTIVE, "icmp-active")
typedef enum {
#define _(v, N, s) SNAT_SESSION_##N = v,
#undef _
} snat_session_state_t;
+#define NAT44_SES_I2O_FIN 1
+#define NAT44_SES_O2I_FIN 2
+#define NAT44_SES_I2O_FIN_ACK 4
+#define NAT44_SES_O2I_FIN_ACK 8
+#define NAT44_SES_I2O_SYN 16
+#define NAT44_SES_O2I_SYN 32
+
+#define nat44_is_ses_closed(s) s->state == 0xf
-#define SNAT_SESSION_FLAG_STATIC_MAPPING 1
-#define SNAT_SESSION_FLAG_UNKNOWN_PROTO 2
-#define SNAT_SESSION_FLAG_LOAD_BALANCING 4
-#define SNAT_SESSION_FLAG_TWICE_NAT 8
+#define SNAT_SESSION_FLAG_STATIC_MAPPING 1
+#define SNAT_SESSION_FLAG_UNKNOWN_PROTO 2
+#define SNAT_SESSION_FLAG_LOAD_BALANCING 4
+#define SNAT_SESSION_FLAG_TWICE_NAT 8
+#define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT 16
+#define SNAT_SESSION_FLAG_FWD_BYPASS 32
+#define SNAT_SESSION_FLAG_AFFINITY 64
#define NAT_INTERFACE_FLAG_IS_INSIDE 1
#define NAT_INTERFACE_FLAG_IS_OUTSIDE 2
ip4_address_t ext_host_addr; /* 68-71 */
u16 ext_host_port; /* 72-73 */
- /* External hos address and port after translation */
+ /* External host address and port after translation */
ip4_address_t ext_host_nat_addr; /* 74-77 */
u16 ext_host_nat_port; /* 78-79 */
+
+ /* TCP session state */
+ u8 state;
+ u32 i2o_fin_seq;
+ u32 o2i_fin_seq;
}) snat_session_t;
#undef _
} snat_address_t;
+typedef struct {
+ u32 fib_index;
+ u32 refcount;
+} nat_outside_fib_t;
+
typedef struct {
u16 in_port;
snat_det_out_key_t out;
u16 port;
u8 probability;
u8 prefix;
+ u32 vrf_id;
+ u32 fib_index;
} nat44_lb_addr_port_t;
+typedef enum {
+ TWICE_NAT_DISABLED,
+ TWICE_NAT,
+ TWICE_NAT_SELF,
+} twice_nat_type_t;
+
+typedef enum {
+ NO_LB_NAT,
+ LB_NAT,
+ AFFINITY_LB_NAT,
+} lb_nat_type_t;
+
typedef struct {
ip4_address_t local_addr;
ip4_address_t external_addr;
u16 local_port;
u16 external_port;
u8 addr_only;
- u8 twice_nat;
+ twice_nat_type_t twice_nat;
u8 out2in_only;
u32 vrf_id;
u32 fib_index;
snat_protocol_t proto;
- u32 worker_index;
+ u32 affinity;
+ u32 *workers;
u8 *tag;
nat44_lb_addr_port_t *locals;
+ u32 affinity_per_service_list_head_index;
} snat_static_mapping_t;
typedef struct {
clib_bihash_8_8_t out2in;
clib_bihash_8_8_t in2out;
+ /* Endpoint dependent sessions lookup tables */
+ clib_bihash_16_8_t out2in_ed;
+ clib_bihash_16_8_t in2out_ed;
+
/* Find-a-user => src address lookup */
clib_bihash_8_8_t user_hash;
u32 snat_thread_index);
typedef struct snat_main_s {
- /* Endpoint address dependent sessions lookup tables */
- clib_bihash_16_8_t out2in_ed;
- clib_bihash_16_8_t in2out_ed;
-
snat_icmp_match_function_t * icmp_match_in2out_cb;
snat_icmp_match_function_t * icmp_match_out2in_cb;
u32 num_workers;
u32 first_worker_index;
- u32 next_worker;
u32 * workers;
snat_get_worker_function_t * worker_in2out_cb;
snat_get_worker_function_t * worker_out2in_cb;
/* Vector of outside addresses */
snat_address_t * addresses;
+ /* Address and port allocation function */
nat_alloc_out_addr_and_port_function_t *alloc_addr_and_port;
+ /* Address and port allocation type */
+ nat_addr_and_port_alloc_alg_t addr_and_port_alloc_alg;
+ /* Port set parameters (MAP-E) */
u8 psid_offset;
u8 psid_length;
u16 psid;
+ /* Port range parameters */
+ u16 start_port;
+ u16 end_port;
+
+ /* vector of outside fibs */
+ nat_outside_fib_t * outside_fibs;
/* Vector of twice NAT addresses for extenal hosts */
snat_address_t * twice_nat_addresses;
u32 in2out_node_index;
u32 in2out_output_node_index;
u32 out2in_node_index;
+ u32 error_node_index;
/* Deterministic NAT */
snat_det_map_t * det_maps;
u8 static_mapping_connection_tracking;
u8 deterministic;
u8 out2in_dpo;
+ u8 endpoint_dependent;
u32 translation_buckets;
u32 translation_memory_size;
u32 max_translations;
u32 tcp_transitory_timeout;
u32 icmp_timeout;
+ /* TCP MSS clamping */
+ u16 mss_clamping;
+ u16 mss_value_net;
+
/* API message ID base */
u16 msg_id_base;
+ /* log class */
+ vlib_log_class_t log_class;
+
/* convenience */
vlib_main_t * vlib_main;
vnet_main_t * vnet_main;
api_main_t * api_main;
} snat_main_t;
+typedef struct {
+ u32 thread_index;
+ f64 now;
+} nat44_is_idle_session_ctx_t;
+
extern snat_main_t snat_main;
extern vlib_node_registration_t snat_in2out_node;
extern vlib_node_registration_t snat_in2out_output_node;
extern vlib_node_registration_t snat_det_out2in_node;
extern vlib_node_registration_t snat_hairpin_dst_node;
extern vlib_node_registration_t snat_hairpin_src_node;
+extern vlib_node_registration_t nat44_ed_in2out_node;
+extern vlib_node_registration_t nat44_ed_in2out_output_node;
+extern vlib_node_registration_t nat44_ed_out2in_node;
+extern vlib_node_registration_t nat44_ed_hairpin_dst_node;
+extern vlib_node_registration_t nat44_ed_hairpin_src_node;
+extern vlib_node_registration_t nat44_ed_in2out_worker_handoff_node;
+extern vlib_node_registration_t nat44_ed_in2out_output_worker_handoff_node;
+extern vlib_node_registration_t nat44_ed_out2in_worker_handoff_node;
void snat_free_outside_address_and_port (snat_address_t * addresses,
u32 thread_index,
- snat_session_key_t * k,
- u32 address_index);
+ snat_session_key_t * k);
int snat_alloc_outside_address_and_port (snat_address_t * addresses,
u32 fib_index,
snat_session_key_t * mapping,
u8 by_external,
u8 *is_addr_only,
- u8 *twice_nat,
- u8 *lb);
+ twice_nat_type_t *twice_nat,
+ lb_nat_type_t *lb,
+ ip4_address_t * ext_host_addr);
void snat_add_del_addr_to_fib (ip4_address_t * addr,
u8 p_len,
format_function_t format_snat_user;
format_function_t format_snat_static_mapping;
format_function_t format_snat_static_map_to_resolve;
+format_function_t format_snat_session;
format_function_t format_det_map_ses;
typedef struct {
*/
#define is_lb_session(s) (s->flags & SNAT_SESSION_FLAG_LOAD_BALANCING)
+/** \brief Check if NAT session is forwarding bypass.
+ @param s NAT session
+ @return 1 if NAT session is load-balancing
+*/
+#define is_fwd_bypass_session(s) (s->flags & SNAT_SESSION_FLAG_FWD_BYPASS)
+
/** \brief Check if NAT session is endpoint dependent.
@param s NAT session
@return 1 if NAT session is endpoint dependent
*/
-#define is_ed_session(s) (snat_is_unk_proto_session (s) || is_twice_nat_session (s) || is_lb_session (s))
+#define is_ed_session(s) (s->flags & SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT)
+
+/** \brief Check if NAT session has affinity record.
+ @param s NAT session
+ @return 1 if NAT session has affinity record
+*/
+#define is_affinity_sessions(s) (s->flags & SNAT_SESSION_FLAG_AFFINITY)
#define nat_interface_is_inside(i) i->flags & NAT_INTERFACE_FLAG_IS_INSIDE
#define nat_interface_is_outside(i) i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE
+#define nat_log_err(...) \
+ vlib_log(VLIB_LOG_LEVEL_ERR, snat_main.log_class, __VA_ARGS__)
+#define nat_log_warn(...) \
+ vlib_log(VLIB_LOG_LEVEL_WARNING, snat_main.log_class, __VA_ARGS__)
+#define nat_log_notice(...) \
+ vlib_log(VLIB_LOG_LEVEL_NOTICE, snat_main.log_class, __VA_ARGS__)
+#define nat_log_info(...) \
+ vlib_log(VLIB_LOG_LEVEL_INFO, snat_main.log_class, __VA_ARGS__)
+#define nat_log_debug(...)\
+ vlib_log(VLIB_LOG_LEVEL_DEBUG, snat_main.log_class, __VA_ARGS__)
+
/*
* Why is this here? Because we don't need to touch this layer to
* simply reply to an icmp. We need to change id to a unique
u16 sequence;
} icmp_echo_header_t;
-always_inline u32
-ip_proto_to_snat_proto (u8 ip_proto)
-{
- u32 snat_proto = ~0;
-
- snat_proto = (ip_proto == IP_PROTOCOL_UDP) ? SNAT_PROTOCOL_UDP : snat_proto;
- snat_proto = (ip_proto == IP_PROTOCOL_TCP) ? SNAT_PROTOCOL_TCP : snat_proto;
- snat_proto = (ip_proto == IP_PROTOCOL_ICMP) ? SNAT_PROTOCOL_ICMP : snat_proto;
- snat_proto = (ip_proto == IP_PROTOCOL_ICMP6) ? SNAT_PROTOCOL_ICMP : snat_proto;
-
- return snat_proto;
-}
-
-always_inline u8
-snat_proto_to_ip_proto (snat_protocol_t snat_proto)
-{
- u8 ip_proto = ~0;
-
- ip_proto = (snat_proto == SNAT_PROTOCOL_UDP) ? IP_PROTOCOL_UDP : ip_proto;
- ip_proto = (snat_proto == SNAT_PROTOCOL_TCP) ? IP_PROTOCOL_TCP : ip_proto;
- ip_proto = (snat_proto == SNAT_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP : ip_proto;
-
- return ip_proto;
-}
-
typedef struct {
u16 src_port, dst_port;
} tcp_udp_header_t;
ip4_header_t *ip0, u8 *p_proto,
snat_session_key_t *p_value,
u8 *p_dont_translate, void *d, void *e);
+u32 icmp_match_in2out_ed(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e);
u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
u32 thread_index, vlib_buffer_t *b0,
ip4_header_t *ip0, u8 *p_proto,
ip4_header_t *ip0, u8 *p_proto,
snat_session_key_t *p_value,
u8 *p_dont_translate, void *d, void *e);
+u32 icmp_match_out2in_ed(snat_main_t *sm, vlib_node_runtime_t *node,
+ u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, u8 *p_proto,
+ snat_session_key_t *p_value,
+ u8 *p_dont_translate, void *d, void *e);
void increment_v4_address(ip4_address_t * a);
-void snat_add_address(snat_main_t *sm, ip4_address_t *addr, u32 vrf_id,
- u8 twice_nat);
+int snat_add_address(snat_main_t *sm, ip4_address_t *addr, u32 vrf_id,
+ u8 twice_nat);
int snat_del_address(snat_main_t *sm, ip4_address_t addr, u8 delete_sm,
u8 twice_nat);
void nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add);
int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
u32 sw_if_index, snat_protocol_t proto, int is_add,
- u8 twice_nat, u8 out2in_only, u8 *tag);
+ twice_nat_type_t twice_nat, u8 out2in_only,
+ u8 *tag);
clib_error_t * snat_api_init(vlib_main_t * vm, snat_main_t * sm);
int snat_set_workers (uword * bitmap);
int snat_interface_add_del(u32 sw_if_index, u8 is_inside, int is_del);
u8 twice_nat);
uword unformat_snat_protocol(unformat_input_t * input, va_list * args);
u8 * format_snat_protocol(u8 * s, va_list * args);
+u8 * format_nat_addr_and_port_alloc_alg(u8 * s, va_list * args);
int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
- snat_protocol_t proto, u32 vrf_id,
+ snat_protocol_t proto,
nat44_lb_addr_port_t *locals, u8 is_add,
- u8 twice_nat, u8 out2in_only, u8 *tag);
+ twice_nat_type_t twice_nat, u8 out2in_only,
+ u8 *tag, u32 affinity);
int nat44_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
snat_protocol_t proto, u32 vrf_id, int is_in);
+int nat44_del_ed_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
+ ip4_address_t *eh_addr, u16 eh_port, u8 proto,
+ u32 vrf_id, int is_in);
void nat_free_session_data (snat_main_t * sm, snat_session_t * s,
u32 thread_index);
snat_user_t * nat_user_get_or_create (snat_main_t *sm, ip4_address_t *addr,
u32 fib_index, u32 thread_index);
snat_session_t * nat_session_alloc_or_recycle (snat_main_t *sm, snat_user_t *u,
u32 thread_index);
+snat_session_t * nat_ed_session_alloc (snat_main_t *sm, snat_user_t *u,
+ u32 thread_index);
void nat_set_alloc_addr_and_port_mape (u16 psid, u16 psid_offset,
u16 psid_length);
+void nat_set_alloc_addr_and_port_range (u16 start_port, u16 end_port);
void nat_set_alloc_addr_and_port_default (void);
-
-static_always_inline u8
-icmp_is_error_message (icmp46_header_t * icmp)
-{
- switch(icmp->type)
- {
- case ICMP4_destination_unreachable:
- case ICMP4_time_exceeded:
- case ICMP4_parameter_problem:
- case ICMP4_source_quench:
- case ICMP4_redirect:
- case ICMP4_alternate_host_address:
- return 1;
- }
- return 0;
-}
-
-static_always_inline u8
-is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0,
- u32 ip4_addr)
-{
- snat_runtime_t *rt = (snat_runtime_t *) node->runtime_data;
- ip4_address_t * first_int_addr;
-
- if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
- {
- first_int_addr =
- ip4_interface_first_address (sm->ip4_main, sw_if_index0,
- 0 /* just want the address */);
- rt->cached_sw_if_index = sw_if_index0;
- if (first_int_addr)
- rt->cached_ip4_address = first_int_addr->as_u32;
- else
- rt->cached_ip4_address = 0;
- }
-
- if (PREDICT_FALSE(ip4_addr == rt->cached_ip4_address))
- return 1;
- else
- return 0;
-}
-
-always_inline u8
-maximum_sessions_exceeded (snat_main_t *sm, u32 thread_index)
-{
- if (pool_elts (sm->per_thread_data[thread_index].sessions) >= sm->max_translations)
- return 1;
-
- return 0;
-}
-
-static_always_inline void
-nat_send_all_to_node(vlib_main_t *vm, u32 *bi_vector,
- vlib_node_runtime_t *node, vlib_error_t *error, u32 next)
-{
- u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
-
- from = bi_vector;
- n_left_from = vec_len(bi_vector);
- next_index = node->cached_next_index;
- while (n_left_from > 0) {
- vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
- while (n_left_from > 0 && n_left_to_next > 0) {
- u32 bi0 = to_next[0] = from[0];
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
- vlib_buffer_t *p0 = vlib_get_buffer(vm, bi0);
- p0->error = *error;
- vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
- n_left_to_next, bi0, next);
- }
- vlib_put_next_frame(vm, node, next_index, n_left_to_next);
- }
-}
-
-always_inline void
-user_session_increment(snat_main_t *sm, snat_user_t *u, u8 is_static)
-{
- if (u->nsessions + u->nstaticsessions < sm->max_translations_per_user)
- {
- if (is_static)
- u->nstaticsessions++;
- else
- u->nsessions++;
- }
-}
+int nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t *kv, void *arg);
+int nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t *kv, void *arg);
+int nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t *kv, void *arg);
+int nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t *kv, void *arg);
#endif /* __included_snat_h__ */