X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fnat%2Fnat44-ed%2Fnat44_ed_in2out.c;h=6e0a21d890a4e80914e54d1ad144ff772eb38e83;hb=caf4e754ea635b0ed028e9e8ecba29b32ff4a8a7;hp=784dea0faf6687ee0efa899d8b2504d8f4bbfd1e;hpb=0eaf4e6784efb2d058fe2f031578251b6bcc0aa8;p=vpp.git diff --git a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c index 784dea0faf6..6e0a21d890a 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c @@ -25,18 +25,12 @@ #include #include -#include #include #include #include #include -/* number of attempts to get a port for ED overloading algorithm, if rolling - * a dice this many times doesn't produce a free port, it's treated - * as if there were no free ports available to conserve resources */ -#define ED_PORT_ALLOC_ATTEMPTS (10) - static char *nat_in2out_ed_error_strings[] = { #define _(sym,string) string, foreach_nat_in2out_ed_error @@ -55,6 +49,7 @@ typedef struct u8 is_slow_path; u8 translation_via_i2of; u8 lookup_skipped; + u8 tcp_state; } nat_in2out_ed_trace_t; static u8 * @@ -69,12 +64,13 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" : "NAT44_IN2OUT_ED_FAST_PATH"; - s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, - t->sw_if_index, t->next_index, t->session_index); + s = format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index, + t->next_index); if (~0 != t->session_index) { - s = format (s, ", translation result '%U' via %s", - format_nat_ed_translation_error, t->translation_error, + s = format (s, ", session %d, translation result '%U' via %s", + t->session_index, format_nat_ed_translation_error, + t->translation_error, t->translation_via_i2of ? "i2of" : "o2if"); s = format (s, "\n i2of %U", format_nat_6t_flow, &t->i2of); s = format (s, "\n o2if %U", format_nat_6t_flow, &t->o2if); @@ -83,7 +79,7 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) { if (t->lookup_skipped) { - s = format (s, "\n lookup skipped - cached session index used"); + s = format (s, "\n lookup skipped - cached session index used"); } else { @@ -91,6 +87,11 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) &t->search_key); } } + if (IP_PROTOCOL_TCP == t->i2of.match.proto) + { + s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state, + t->tcp_state); + } return s; } @@ -105,14 +106,13 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) * @param rt NAT runtime data * @param sw_if_index0 index of the inside interface * @param ip0 IPv4 header - * @param proto0 NAT protocol * @param rx_fib_index0 RX FIB index * * @returns 0 if packet should be translated otherwise 1 */ static inline int snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node, - u32 sw_if_index0, ip4_header_t *ip0, u32 proto0, + u32 sw_if_index0, ip4_header_t *ip0, u32 rx_fib_index0) { fib_node_index_t fei = FIB_NODE_INDEX_INVALID; @@ -155,7 +155,7 @@ snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node, pool_foreach (i, sm->interfaces) { /* NAT packet aimed at outside interface */ - if ((nat_interface_is_outside (i)) && + if ((nat44_ed_is_interface_outside (i)) && (sw_if_index == i->sw_if_index)) return 0; } @@ -165,91 +165,176 @@ snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node, } static int -nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto, - u32 thread_index, ip4_address_t r_addr, u16 r_port, - u8 proto, u16 port_per_thread, - u32 snat_thread_index, snat_session_t *s, - ip4_address_t *outside_addr, u16 *outside_port) +nat_ed_alloc_addr_and_port_with_snat_address ( + snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a, + u16 port_per_thread, u32 snat_thread_index, snat_session_t *s, + ip4_address_t *outside_addr, u16 *outside_port) { - int i; - snat_address_t *a, *ga = 0; - const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024; - for (i = 0; i < vec_len (sm->addresses); i++) + s->o2i.match.daddr = a->addr; + /* first try port suggested by caller */ + u16 port = clib_net_to_host_u16 (*outside_port); + u16 port_offset = port - port_thread_offset; + if (port <= port_thread_offset || + port > port_thread_offset + port_per_thread) { - a = sm->addresses + i; - switch (nat_proto) + /* need to pick a different port, suggested port doesn't fit in + * this thread's port range */ + port_offset = snat_random_port (0, port_per_thread - 1); + port = port_thread_offset + port_offset; + } + u16 attempts = ED_PORT_ALLOC_ATTEMPTS; + do + { + if (IP_PROTOCOL_ICMP == proto) { -#define _(N, j, n, unused) \ - case NAT_PROTOCOL_##N: \ - if (a->fib_index == rx_fib_index) \ - { \ - s->o2i.match.daddr = a->addr; \ - /* first try port suggested by caller */ \ - u16 port = clib_net_to_host_u16 (*outside_port); \ - u16 port_offset = port - port_thread_offset; \ - if (port <= port_thread_offset || \ - port > port_thread_offset + port_per_thread) \ - { \ - /* need to pick a different port, suggested port doesn't fit in \ - * this thread's port range */ \ - port_offset = snat_random_port (0, port_per_thread - 1); \ - port = port_thread_offset + port_offset; \ - } \ - u16 attempts = ED_PORT_ALLOC_ATTEMPTS; \ - do \ - { \ - if (NAT_PROTOCOL_ICMP == nat_proto) \ - { \ - s->o2i.match.sport = clib_host_to_net_u16 (port); \ - } \ - s->o2i.match.dport = clib_host_to_net_u16 (port); \ - if (0 == \ - nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) \ - { \ - ++a->busy_##n##_port_refcounts[port]; \ - a->busy_##n##_ports_per_thread[thread_index]++; \ - a->busy_##n##_ports++; \ - *outside_addr = a->addr; \ - *outside_port = clib_host_to_net_u16 (port); \ - return 0; \ - } \ - port_offset = snat_random_port (0, port_per_thread - 1); \ - port = port_thread_offset + port_offset; \ - --attempts; \ - } \ - while (attempts > 0); \ - } \ - else if (a->fib_index == ~0) \ - { \ - ga = a; \ - } \ - break; - - foreach_nat_protocol; - default: - nat_elog_info (sm, "unknown protocol"); - return 1; + s->o2i.match.sport = clib_host_to_net_u16 (port); } + s->o2i.match.dport = clib_host_to_net_u16 (port); + if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) + { + *outside_addr = a->addr; + *outside_port = clib_host_to_net_u16 (port); + return 0; + } + port_offset = snat_random_port (0, port_per_thread - 1); + port = port_thread_offset + port_offset; + --attempts; } + while (attempts > 0); + return 1; +} - if (ga) +static int +nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, + u32 tx_sw_if_index, u32 nat_proto, + u32 thread_index, ip4_address_t s_addr, + ip4_address_t d_addr, u32 snat_thread_index, + snat_session_t *s, ip4_address_t *outside_addr, + u16 *outside_port) +{ + if (vec_len (sm->addresses) > 0) { - /* fake fib_index to reuse macro */ - rx_fib_index = ~0; - a = ga; - switch (nat_proto) + u32 s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses); + snat_address_t *a, *ja = 0, *ra = 0, *ba = 0; + int i; + + // output feature + if (tx_sw_if_index != ~0) { - foreach_nat_protocol; - default: - nat_elog_info (sm, "unknown protocol"); - return 1; + for (i = s_addr_offset; i < vec_len (sm->addresses); ++i) + { + a = sm->addresses + i; + if (a->fib_index == rx_fib_index) + { + if (a->sw_if_index == tx_sw_if_index) + { + if ((a->addr_len != ~0) && + (a->net.as_u32 == + (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len]))) + + { + return nat_ed_alloc_addr_and_port_with_snat_address ( + sm, nat_proto, thread_index, a, + sm->port_per_thread, snat_thread_index, s, + outside_addr, outside_port); + } + ra = a; + } + ja = a; + } + else if (a->fib_index == ~0) + { + ba = a; + } + } + for (i = 0; i < s_addr_offset; ++i) + { + a = sm->addresses + i; + if (a->fib_index == rx_fib_index) + { + if (a->sw_if_index == tx_sw_if_index) + { + if ((a->addr_len != ~0) && + (a->net.as_u32 == + (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len]))) + + { + return nat_ed_alloc_addr_and_port_with_snat_address ( + sm, nat_proto, thread_index, a, + sm->port_per_thread, snat_thread_index, s, + outside_addr, outside_port); + } + ra = a; + } + ja = a; + } + else if (a->fib_index == ~0) + { + ba = a; + } + } + if (ra) + { + return nat_ed_alloc_addr_and_port_with_snat_address ( + sm, nat_proto, thread_index, ra, sm->port_per_thread, + snat_thread_index, s, outside_addr, outside_port); + } + } + else + { + // first try nat pool addresses to sw interface addreses mappings + for (i = s_addr_offset; i < vec_len (sm->addresses); ++i) + { + a = sm->addresses + i; + if (a->fib_index == rx_fib_index) + { + if ((a->addr_len != ~0) && + (a->net.as_u32 == + (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len]))) + { + return nat_ed_alloc_addr_and_port_with_snat_address ( + sm, nat_proto, thread_index, a, sm->port_per_thread, + snat_thread_index, s, outside_addr, outside_port); + } + ja = a; + } + else if (a->fib_index == ~0) + { + ba = a; + } + } + for (i = 0; i < s_addr_offset; ++i) + { + a = sm->addresses + i; + if (a->fib_index == rx_fib_index) + { + if ((a->addr_len != ~0) && + (a->net.as_u32 == + (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len]))) + { + return nat_ed_alloc_addr_and_port_with_snat_address ( + sm, nat_proto, thread_index, a, sm->port_per_thread, + snat_thread_index, s, outside_addr, outside_port); + } + ja = a; + } + else if (a->fib_index == ~0) + { + ba = a; + } + } } - } - -#undef _ + if (ja || ba) + { + a = ja ? ja : ba; + return nat_ed_alloc_addr_and_port_with_snat_address ( + sm, nat_proto, thread_index, a, sm->port_per_thread, + snat_thread_index, s, outside_addr, outside_port); + } + } /* Totally out of translations to use... */ nat_ipfix_logging_addresses_exhausted (thread_index, 0); return 1; @@ -266,7 +351,6 @@ nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr) .fp_addr = {.ip4.as_u32 = addr.as_u32,} , }; - // TODO: multiple vrfs none can resolve addr vec_foreach (outside_fib, sm->outside_fibs) { fei = fib_table_lookup (outside_fib->fib_index, &pfx); @@ -283,61 +367,45 @@ nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr) static_always_inline int nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr, - u16 match_port, nat_protocol_t match_protocol, - u32 match_fib_index, ip4_address_t *daddr, - u16 *dport) + u16 match_port, ip_protocol_t match_protocol, + ip4_address_t *daddr, u16 *dport) { - clib_bihash_kv_8_8_t kv, value; - init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol); - if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + snat_static_mapping_t *m = + nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol); + if (!m) { /* Try address only mapping */ - init_nat_k (&kv, match_addr, 0, 0, 0); - if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, - &value)) + m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0); + if (!m) return 0; } - - snat_static_mapping_t *m = - pool_elt_at_index (sm->static_mappings, value.value); *daddr = m->local_addr; if (dport) { /* Address only mapping doesn't change port */ - *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port; + *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port; } return 1; } static u32 -slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, - ip4_address_t r_addr, u16 l_port, u16 r_port, u8 proto, - u32 rx_fib_index, snat_session_t **sessionp, - vlib_node_runtime_t *node, u32 next, u32 thread_index, f64 now) +slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b, + ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port, + u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index, + snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next, + u32 thread_index, f64 now) { snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; ip4_address_t outside_addr; u16 outside_port; u32 outside_fib_index; - u8 is_identity_nat; + u8 is_identity_nat = 0; - u32 nat_proto = ip_proto_to_nat_proto (proto); snat_session_t *s = NULL; lb_nat_type_t lb = 0; ip4_address_t daddr = r_addr; u16 dport = r_port; - if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP)) - { - if (PREDICT_FALSE - (!tcp_flags_is_init - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) - { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; - return NAT_NEXT_DROP; - } - } - if (PREDICT_FALSE (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) { @@ -371,7 +439,7 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, u32 sm_fib_index; /* First try to match static mapping by local address and port */ int is_sm; - if (snat_static_mapping_match (sm, l_addr, l_port, rx_fib_index, nat_proto, + if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index, proto, &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0, &lb, 0, &is_identity_nat, 0)) { @@ -379,13 +447,22 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, } else { + if (PREDICT_FALSE (is_identity_nat)) + { + *sessionp = NULL; + return next; + } is_sm = 1; } - if (PREDICT_FALSE (is_sm && is_identity_nat)) + if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP)) { - *sessionp = NULL; - return next; + if (PREDICT_FALSE (!tcp_flags_is_init ( + vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; + return NAT_NEXT_DROP; + } } s = nat_ed_session_alloc (sm, thread_index, now, proto); @@ -395,7 +472,7 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, { s->in2out.addr = l_addr; s->in2out.port = l_port; - s->nat_proto = nat_proto; + s->proto = proto; s->in2out.fib_index = rx_fib_index; s->out2in.fib_index = outside_fib_index; @@ -403,8 +480,8 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, outside_port = l_port; // hairpinning? - int is_hairpinning = nat44_ed_external_sm_lookup ( - sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port, + proto, &daddr, &dport); s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; // destination addr/port updated with real values in @@ -412,7 +489,7 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0, s->out2in.fib_index, proto); nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32); - if (NAT_PROTOCOL_ICMP == nat_proto) + if (IP_PROTOCOL_ICMP == proto) { nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port); } @@ -423,9 +500,8 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); if (nat_ed_alloc_addr_and_port ( - sm, rx_fib_index, nat_proto, thread_index, daddr, dport, proto, - sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr, - &outside_port)) + sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr, + r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port)) { nat_elog_notice (sm, "addresses exhausted"); b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS]; @@ -442,17 +518,17 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, s->out2in.port = outside_port = sm_port; s->in2out.addr = l_addr; s->in2out.port = l_port; - s->nat_proto = nat_proto; + s->proto = proto; s->in2out.fib_index = rx_fib_index; s->out2in.fib_index = outside_fib_index; s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; // hairpinning? - int is_hairpinning = nat44_ed_external_sm_lookup ( - sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port, + proto, &daddr, &dport); s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; - if (NAT_PROTOCOL_ICMP == nat_proto) + if (IP_PROTOCOL_ICMP == proto) { nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr, sm_port, s->out2in.fib_index, proto); @@ -475,7 +551,6 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, if (lb) s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING; - s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; s->ext_host_addr = r_addr; s->ext_host_port = r_port; @@ -483,7 +558,8 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, rx_fib_index, proto); nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32); nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32); - if (NAT_PROTOCOL_ICMP == nat_proto) + + if (IP_PROTOCOL_ICMP == proto) { nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port); } @@ -501,17 +577,14 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, } /* log NAT event */ - nat_ipfix_logging_nat44_ses_create (thread_index, - s->in2out.addr.as_u32, - s->out2in.addr.as_u32, - s->nat_proto, - s->in2out.port, - s->out2in.port, s->in2out.fib_index); + nat_ipfix_logging_nat44_ses_create ( + thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto, + s->in2out.port, s->out2in.port, s->in2out.fib_index); nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port, &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr, s->out2in.port, - &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0); + &s->ext_host_addr, s->ext_host_port, s->proto, 0); per_vrf_sessions_register_session (s, thread_index); @@ -520,12 +593,6 @@ slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, error: if (s) { - if (!is_sm) - { - snat_free_outside_address_and_port (sm->addresses, thread_index, - &outside_addr, outside_port, - nat_proto); - } nat_ed_session_delete (sm, s, thread_index, 1); } *sessionp = s = NULL; @@ -533,15 +600,17 @@ error: } static_always_inline int -nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node, - u32 sw_if_index, ip4_header_t * ip, u32 proto, - u32 rx_fib_index, u32 thread_index) +nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm, + vlib_node_runtime_t *node, u32 sw_if_index, + vlib_buffer_t *b, ip4_header_t *ip, u32 proto, + u32 rx_fib_index) { - udp_header_t *udp = ip4_next_header (ip); clib_bihash_kv_16_8_t kv, value; - init_ed_k (&kv, ip->dst_address, udp->dst_port, ip->src_address, - udp->src_port, sm->outside_fib_index, ip->protocol); + init_ed_k (&kv, ip->dst_address.as_u32, + vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32, + vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index, + ip->protocol); /* NAT packet aimed at external address if has active sessions */ if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) @@ -550,10 +619,10 @@ nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node, ip4_address_t placeholder_addr; u16 placeholder_port; u32 placeholder_fib_index; - if (!snat_static_mapping_match - (sm, ip->dst_address, udp->dst_port, sm->outside_fib_index, proto, - &placeholder_addr, &placeholder_port, &placeholder_fib_index, 1, 0, - 0, 0, 0, 0, 0)) + if (!snat_static_mapping_match ( + vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, + sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port, + &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0)) return 0; } else @@ -562,8 +631,7 @@ nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node, if (sm->forwarding_enabled) return 1; - return snat_not_translate_fast (sm, node, sw_if_index, ip, proto, - rx_fib_index); + return snat_not_translate_fast (sm, node, sw_if_index, ip, rx_fib_index); } static_always_inline int @@ -587,18 +655,18 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, &lookup_sport, &lookup_daddr, &lookup_dport, &lookup_protocol)) return 0; - init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, - 0, lookup_protocol); + init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32, + lookup_dport, 0, lookup_protocol); } else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) { - init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port, - ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0, - ip->protocol); + init_ed_k (&kv, ip->src_address.as_u32, + vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32, + vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol); } else { - init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0, + init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0, ip->protocol); } @@ -609,11 +677,13 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); - if (is_fwd_bypass_session (s)) + if (na44_ed_is_fwd_bypass_session (s)) { if (ip->protocol == IP_PROTOCOL_TCP) { - nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index); + nat44_set_tcp_session_state_i2o ( + sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags, + thread_index); } /* Accounting */ nat44_session_update_counters (s, now, @@ -631,10 +701,11 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, } static_always_inline int -nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, - u16 src_port, u16 dst_port, - u32 thread_index, u32 rx_sw_if_index, - u32 tx_sw_if_index, f64 now) +nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, u16 src_port, + u16 dst_port, u32 thread_index, + u32 rx_sw_if_index, u32 tx_sw_if_index, + int is_multi_worker) { clib_bihash_kv_16_8_t kv, value; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; @@ -644,26 +715,40 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index); /* src NAT check */ - init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, - tx_fib_index, ip->protocol); + init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32, + dst_port, tx_fib_index, ip->protocol); if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); - if (nat44_is_ses_closed (s) - && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp)) - { - nat_free_session_data (sm, s, thread_index, 0); - nat_ed_session_delete (sm, s, thread_index, 1); - } return 1; } /* dst NAT check */ - init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port, - rx_fib_index, ip->protocol); + if (is_multi_worker && + PREDICT_TRUE (!pool_is_free_index ( + tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index))) + { + nat_6t_t lookup; + lookup.fib_index = rx_fib_index; + lookup.proto = ip->protocol; + lookup.daddr.as_u32 = ip->src_address.as_u32; + lookup.dport = src_port; + lookup.saddr.as_u32 = ip->dst_address.as_u32; + lookup.sport = dst_port; + s = pool_elt_at_index ( + tsm->sessions, vnet_buffer2 (b)->nat.cached_dst_nat_session_index); + if (PREDICT_TRUE (nat_6t_t_eq (&s->i2o.match, &lookup))) + { + goto skip_dst_nat_lookup; + } + s = NULL; + } + + init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32, + src_port, rx_fib_index, ip->protocol); if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); @@ -671,15 +756,17 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); - if (is_fwd_bypass_session (s)) + skip_dst_nat_lookup: + if (na44_ed_is_fwd_bypass_session (s)) return 0; /* hairpinning */ pool_foreach (i, sm->output_feature_interfaces) - { - if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index)) - return 0; - } + { + if ((nat44_ed_is_interface_inside (i)) && + (rx_sw_if_index == i->sw_if_index)) + return 0; + } return 1; } @@ -689,9 +776,10 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, static inline u32 icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, icmp46_header_t *icmp, u32 sw_if_index, - u32 rx_fib_index, vlib_node_runtime_t *node, - u32 next, f64 now, u32 thread_index, - nat_protocol_t nat_proto, snat_session_t **s_p) + u32 tx_sw_if_index, u32 rx_fib_index, + vlib_node_runtime_t *node, u32 next, f64 now, + u32 thread_index, snat_session_t **s_p, + int is_multi_worker) { vlib_main_t *vm = vlib_get_main (); u16 checksum; @@ -710,20 +798,19 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, return NAT_NEXT_DROP; } - if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0) + if (tx_sw_if_index != ~0) { if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( - sm, ip, lookup_sport, lookup_dport, thread_index, sw_if_index, - vnet_buffer (b)->sw_if_index[VLIB_TX], now))) + sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index, + tx_sw_if_index, is_multi_worker))) { return next; } } else { - if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, ip, - NAT_PROTOCOL_ICMP, - rx_fib_index, thread_index))) + if (PREDICT_FALSE (nat44_ed_not_translate ( + vm, sm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index))) { return next; } @@ -736,9 +823,10 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, return NAT_NEXT_DROP; } - next = slow_path_ed (sm, b, ip->src_address, ip->dst_address, lookup_sport, - lookup_dport, ip->protocol, rx_fib_index, &s, node, - next, thread_index, vlib_time_now (vm)); + next = + slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport, + lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s, + node, next, thread_index, vlib_time_now (vm)); if (NAT_NEXT_DROP == next) goto out; @@ -776,7 +864,6 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, vlib_main_t *vm, vlib_node_runtime_t *node) { - clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; snat_static_mapping_t *m = NULL; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; @@ -809,12 +896,11 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, break; } - init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0); - /* Try to find static mapping first */ - if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) + m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index, + ip->protocol); + if (m) { - m = pool_elt_at_index (sm->static_mappings, value.value); new_src_addr = m->external_addr; } else @@ -823,8 +909,9 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, { if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32) { - init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0, - outside_fib_index, ip->protocol); + init_ed_k (&s_kv, s->out2in.addr.as_u32, 0, + ip->dst_address.as_u32, 0, outside_fib_index, + ip->protocol); if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) { new_src_addr = s->out2in.addr; @@ -837,8 +924,9 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, { for (i = 0; i < vec_len (sm->addresses); i++) { - init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0, - outside_fib_index, ip->protocol); + init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0, + ip->dst_address.as_u32, 0, outside_fib_index, + ip->protocol); if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) { new_src_addr = sm->addresses[i].addr; @@ -867,9 +955,8 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); // hairpinning? - int is_hairpinning = - nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER, - outside_fib_index, &new_dst_addr, NULL); + int is_hairpinning = nat44_ed_external_sm_lookup ( + sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL); s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32); @@ -882,8 +969,6 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); s->ext_host_addr.as_u32 = ip->dst_address.as_u32; - s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; - s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; s->out2in.addr.as_u32 = new_src_addr.as_u32; s->out2in.fib_index = outside_fib_index; s->in2out.addr.as_u32 = ip->src_address.as_u32; @@ -918,10 +1003,11 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, } static inline uword -nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - int is_output_feature) +nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame, + int is_output_feature, + int is_multi_worker) { u32 n_left_from, *from; snat_main_t *sm = &snat_main; @@ -941,16 +1027,16 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; - nat_protocol_t proto0; + u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0; + u32 tx_sw_if_index0; + u32 cntr_sw_if_index0; + ip_protocol_t proto0; ip4_header_t *ip0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; nat_6t_flow_t *f = 0; - ip4_address_t lookup_saddr, lookup_daddr; - u16 lookup_sport, lookup_dport; - u8 lookup_protocol; + nat_6t_t lookup; int lookup_skipped = 0; b0 = *b; @@ -965,7 +1051,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, vlib_prefetch_buffer_header (p2, LOAD); - CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + clib_prefetch_load (p2->data); } if (is_output_feature) @@ -978,11 +1064,15 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0); - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - rx_fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0); + rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + cntr_sw_if_index0 = + is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0; + rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, + rx_sw_if_index0); + lookup.fib_index = rx_fib_index0; - if (PREDICT_FALSE (ip0->ttl == 1)) + if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, @@ -992,7 +1082,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - proto0 = ip_proto_to_nat_proto (ip0->protocol); + proto0 = ip0->protocol; if (is_output_feature) { @@ -1002,7 +1092,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP)) { if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_request && @@ -1016,8 +1106,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } int err = nat_get_icmp_session_lookup_values ( - b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr, - &lookup_dport, &lookup_protocol); + b0, ip0, &lookup.saddr, &lookup.sport, &lookup.daddr, + &lookup.dport, &lookup.proto); if (err != 0) { b0->error = node->errors[err]; @@ -1027,30 +1117,27 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, } else { - lookup_protocol = ip0->protocol; - lookup_saddr = ip0->src_address; - lookup_daddr = ip0->dst_address; - lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port; - lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port; + lookup.proto = ip0->protocol; + lookup.saddr.as_u32 = ip0->src_address.as_u32; + lookup.daddr.as_u32 = ip0->dst_address.as_u32; + lookup.sport = vnet_buffer (b0)->ip.reass.l4_src_port; + lookup.dport = vnet_buffer (b0)->ip.reass.l4_dst_port; } /* there might be a stashed index in vnet_buffer2 from handoff or * classify node, see if it can be used */ - if (!pool_is_free_index (tsm->sessions, + if (is_multi_worker && + !pool_is_free_index (tsm->sessions, vnet_buffer2 (b0)->nat.cached_session_index)) { s0 = pool_elt_at_index (tsm->sessions, vnet_buffer2 (b0)->nat.cached_session_index); if (PREDICT_TRUE ( - nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, - lookup_daddr, lookup_dport, lookup_protocol, - rx_fib_index0) + nat_6t_t_eq (&s0->i2o.match, &lookup) // for some hairpinning cases there are two "i2i" flows instead // of i2o and o2i as both hosts are on inside || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && - nat_6t_flow_match ( - &s0->o2i, b0, lookup_saddr, lookup_sport, lookup_daddr, - lookup_dport, lookup_protocol, rx_fib_index0)))) + nat_6t_t_eq (&s0->o2i.match, &lookup)))) { /* yes, this is the droid we're looking for */ lookup_skipped = 1; @@ -1059,8 +1146,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, s0 = NULL; } - init_ed_k (&kv0, ip0->src_address, lookup_sport, ip0->dst_address, - lookup_dport, rx_fib_index0, lookup_protocol); + init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32, + lookup.dport, lookup.fib_index, lookup.proto); // lookup flow if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) @@ -1077,38 +1164,24 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, skip_lookup: + ASSERT (thread_index == s0->thread_index); + if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index))) { // session is closed, go slow path - nat_free_session_data (sm, s0, thread_index, 0); + nat44_ed_free_session_data (sm, s0, thread_index, 0); nat_ed_session_delete (sm, s0, thread_index, 1); next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; goto trace0; } - if (s0->tcp_closed_timestamp) - { - if (now >= s0->tcp_closed_timestamp) - { - // session is closed, go slow path, freed in slow path - next[0] = def_slow; - } - else - { - // session in transitory timeout, drop - b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED]; - next[0] = NAT_NEXT_DROP; - } - goto trace0; - } - // drop if session expired u64 sess_timeout_time; sess_timeout_time = s0->last_heard + (f64) nat44_session_get_timeout (sm, s0); if (now >= sess_timeout_time) { - nat_free_session_data (sm, s0, thread_index, 0); + nat44_ed_free_session_data (sm, s0, thread_index, 0); nat_ed_session_delete (sm, s0, thread_index, 1); // session is closed, go slow path next[0] = def_slow; @@ -1117,56 +1190,56 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, b0->flags |= VNET_BUFFER_F_IS_NATED; - if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, - lookup_daddr, lookup_dport, lookup_protocol, - rx_fib_index0)) + if (nat_6t_t_eq (&s0->i2o.match, &lookup)) { f = &s0->i2o; } else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && - nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, - lookup_daddr, lookup_dport, lookup_protocol, - rx_fib_index0)) + nat_6t_t_eq (&s0->o2i.match, &lookup)) { f = &s0->o2i; } else { translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH; - nat_free_session_data (sm, s0, thread_index, 0); + nat44_ed_free_session_data (sm, s0, thread_index, 0); nat_ed_session_delete (sm, s0, thread_index, 1); next[0] = NAT_NEXT_DROP; + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED]; goto trace0; } if (NAT_ED_TRNSL_ERR_SUCCESS != - (translation_error = nat_6t_flow_buf_translate ( - sm, b0, ip0, f, proto0, is_output_feature))) + (translation_error = nat_6t_flow_buf_translate_i2o ( + vm, sm, b0, ip0, f, proto0, is_output_feature))) { - nat_free_session_data (sm, s0, thread_index, 0); + nat44_ed_free_session_data (sm, s0, thread_index, 0); nat_ed_session_delete (sm, s0, thread_index, 1); next[0] = NAT_NEXT_DROP; + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED]; goto trace0; } switch (proto0) { - case NAT_PROTOCOL_TCP: + case IP_PROTOCOL_TCP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp, - thread_index, sw_if_index0, 1); - nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); + thread_index, cntr_sw_if_index0, 1); + nat44_set_tcp_session_state_i2o ( + sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags, + thread_index); break; - case NAT_PROTOCOL_UDP: + case IP_PROTOCOL_UDP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp, - thread_index, sw_if_index0, 1); + thread_index, cntr_sw_if_index0, 1); break; - case NAT_PROTOCOL_ICMP: + case IP_PROTOCOL_ICMP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp, - thread_index, sw_if_index0, 1); + thread_index, cntr_sw_if_index0, 1); break; - case NAT_PROTOCOL_OTHER: + default: vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other, - thread_index, sw_if_index0, 1); + thread_index, cntr_sw_if_index0, 1); break; } @@ -1184,7 +1257,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, { nat_in2out_ed_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); - t->sw_if_index = sw_if_index0; + t->sw_if_index = rx_sw_if_index0; t->next_index = next[0]; t->is_slow_path = 0; t->translation_error = translation_error; @@ -1197,6 +1270,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); t->translation_via_i2of = (&s0->i2o == f); + t->tcp_state = s0->tcp_state; } else { @@ -1207,7 +1281,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, if (next[0] == NAT_NEXT_DROP) { vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops, - thread_index, sw_if_index0, 1); + thread_index, cntr_sw_if_index0, 1); } n_left_from--; @@ -1220,10 +1294,11 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, } static inline uword -nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - int is_output_feature) +nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame, + int is_output_feature, + int is_multi_worker) { u32 n_left_from, *from; snat_main_t *sm = &snat_main; @@ -1241,8 +1316,10 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; - nat_protocol_t proto0; + u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0; + u32 tx_sw_if_index0; + u32 cntr_sw_if_index0; + ip_protocol_t proto0; ip4_header_t *ip0; udp_header_t *udp0; icmp46_header_t *icmp0; @@ -1260,11 +1337,14 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0); - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - rx_fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0); + rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + cntr_sw_if_index0 = + is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0; + rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, + rx_sw_if_index0); - if (PREDICT_FALSE (ip0->ttl == 1)) + if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, @@ -1276,102 +1356,106 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, udp0 = ip4_next_header (ip0); icmp0 = (icmp46_header_t *) udp0; - proto0 = ip_proto_to_nat_proto (ip0->protocol); + proto0 = ip0->protocol; - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0))) { s0 = nat44_ed_in2out_slowpath_unknown_proto ( sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!s0) next[0] = NAT_NEXT_DROP; - if (NAT_ED_TRNSL_ERR_SUCCESS != - (translation_error = nat_6t_flow_buf_translate ( - sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + if (NAT_NEXT_DROP != next[0] && s0 && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate_i2o ( + vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) { + nat44_ed_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED]; goto trace0; } vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other, - thread_index, sw_if_index0, 1); + thread_index, cntr_sw_if_index0, 1); goto trace0; } - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP)) { - next[0] = icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, - rx_fib_index0, node, next[0], - now, thread_index, proto0, &s0); + next[0] = icmp_in2out_ed_slow_path ( + sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0, + rx_fib_index0, node, next[0], now, thread_index, &s0, + is_multi_worker); if (NAT_NEXT_DROP != next[0] && s0 && NAT_ED_TRNSL_ERR_SUCCESS != - (translation_error = nat_6t_flow_buf_translate ( - sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + (translation_error = nat_6t_flow_buf_translate_i2o ( + vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) { + nat44_ed_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED]; goto trace0; } - vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp, - thread_index, sw_if_index0, 1); + if (NAT_NEXT_DROP != next[0]) + { + vlib_increment_simple_counter ( + &sm->counters.slowpath.in2out.icmp, thread_index, + cntr_sw_if_index0, 1); + } goto trace0; } - init_ed_k (&kv0, ip0->src_address, - vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, - vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, - ip0->protocol); + init_ed_k ( + &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port, + ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port, + rx_fib_index0, ip0->protocol); if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { ASSERT (thread_index == ed_value_get_thread_index (&value0)); s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); - - if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp) - { - nat_free_session_data (sm, s0, thread_index, 0); - nat_ed_session_delete (sm, s0, thread_index, 1); - s0 = NULL; - } } if (!s0) { if (is_output_feature) { - if (PREDICT_FALSE - (nat44_ed_not_translate_output_feature - (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port, + if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( + sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index, - sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX], - now))) + rx_sw_if_index0, tx_sw_if_index0, is_multi_worker))) goto trace0; /* * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ - if (PREDICT_FALSE - (proto0 == NAT_PROTOCOL_UDP - && (vnet_buffer (b0)->ip.reass.l4_dst_port == - clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) - && ip0->dst_address.as_u32 == 0xffffffff)) + if (PREDICT_FALSE ( + proto0 == IP_PROTOCOL_UDP && + (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) && + ip0->dst_address.as_u32 == 0xffffffff)) goto trace0; } else { - if (PREDICT_FALSE - (nat44_ed_not_translate - (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0, - thread_index))) + if (PREDICT_FALSE ( + nat44_ed_not_translate (vm, sm, node, rx_sw_if_index0, b0, + ip0, proto0, rx_fib_index0))) goto trace0; } next[0] = - slow_path_ed (sm, b0, ip0->src_address, ip0->dst_address, + slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, - ip0->protocol, rx_fib_index0, &s0, node, next[0], - thread_index, now); + ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0, + node, next[0], thread_index, now); if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP)) goto trace0; @@ -1384,25 +1468,28 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, b0->flags |= VNET_BUFFER_F_IS_NATED; if (NAT_ED_TRNSL_ERR_SUCCESS != - (translation_error = nat_6t_flow_buf_translate ( - sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + (translation_error = nat_6t_flow_buf_translate_i2o ( + vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) { - nat_free_session_data (sm, s0, thread_index, 0); + nat44_ed_free_session_data (sm, s0, thread_index, 0); nat_ed_session_delete (sm, s0, thread_index, 1); - s0 = NULL; + next[0] = NAT_NEXT_DROP; + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED]; goto trace0; } - if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP)) { vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp, - thread_index, sw_if_index0, 1); - nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); + thread_index, cntr_sw_if_index0, 1); + nat44_set_tcp_session_state_i2o ( + sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags, + thread_index); } else { vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp, - thread_index, sw_if_index0, 1); + thread_index, cntr_sw_if_index0, 1); } /* Accounting */ @@ -1418,7 +1505,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, { nat_in2out_ed_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); - t->sw_if_index = sw_if_index0; + t->sw_if_index = rx_sw_if_index0; t->next_index = next[0]; t->is_slow_path = 1; t->translation_error = translation_error; @@ -1430,6 +1517,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); t->translation_via_i2of = 1; + t->tcp_state = s0->tcp_state; } else @@ -1441,7 +1529,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, if (next[0] == NAT_NEXT_DROP) { vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops, - thread_index, sw_if_index0, 1); + thread_index, cntr_sw_if_index0, 1); } n_left_from--; @@ -1459,7 +1547,14 @@ VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0); + if (snat_main.num_workers > 1) + { + return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 1); + } + else + { + return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0, 0); + } } VLIB_REGISTER_NODE (nat44_ed_in2out_node) = { @@ -1477,7 +1572,14 @@ VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1); + if (snat_main.num_workers > 1) + { + return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 1); + } + else + { + return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1, 0); + } } VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = { @@ -1495,7 +1597,14 @@ VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0); + if (snat_main.num_workers > 1) + { + return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 1); + } + else + { + return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0, 0); + } } VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = { @@ -1514,7 +1623,14 @@ VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm, * node, vlib_frame_t * frame) { - return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1); + if (snat_main.num_workers > 1) + { + return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 1); + } + else + { + return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1, 0); + } } VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = {