2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * @brief NAT44 endpoint-dependent outside to inside network translation
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/pg/pg.h>
23 #include <vnet/ip/ip.h>
24 #include <vnet/ethernet/ethernet.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vnet/udp/udp.h>
27 #include <vppinfra/error.h>
29 #include <nat/nat_ipfix_logging.h>
30 #include <nat/nat_inlines.h>
31 #include <nat/nat44/inlines.h>
32 #include <nat/nat_syslog.h>
33 #include <nat/nat_ha.h>
34 #include <nat/nat44/ed_inlines.h>
36 static char *nat_out2in_ed_error_strings[] = {
37 #define _(sym,string) string,
38 foreach_nat_out2in_ed_error
48 } nat44_ed_out2in_trace_t;
51 format_nat44_ed_out2in_trace (u8 * s, va_list * args)
53 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
54 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
55 nat44_ed_out2in_trace_t *t = va_arg (*args, nat44_ed_out2in_trace_t *);
59 t->is_slow_path ? "NAT44_OUT2IN_ED_SLOW_PATH" :
60 "NAT44_OUT2IN_ED_FAST_PATH";
62 s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
63 t->sw_if_index, t->next_index, t->session_index);
69 icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0,
70 ip4_header_t * ip0, icmp46_header_t * icmp0,
71 u32 sw_if_index0, u32 rx_fib_index0,
72 vlib_node_runtime_t * node, u32 next0, f64 now,
73 u32 thread_index, snat_session_t ** p_s0)
75 vlib_main_t *vm = vlib_get_main ();
77 next0 = icmp_out2in (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
78 next0, thread_index, p_s0, 0);
79 snat_session_t *s0 = *p_s0;
80 if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0))
83 nat44_session_update_counters (s0, now,
84 vlib_buffer_length_in_chain
85 (vm, b0), thread_index);
86 /* Per-user LRU list maintenance */
87 nat44_session_update_lru (sm, s0, thread_index);
92 #ifndef CLIB_MARCH_VARIANT
94 nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg)
96 snat_main_t *sm = &snat_main;
97 nat44_is_idle_session_ctx_t *ctx = arg;
99 u64 sess_timeout_time;
102 ip4_address_t *l_addr, *r_addr;
104 clib_bihash_kv_16_8_t ed_kv;
107 snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
110 s = pool_elt_at_index (tsm->sessions, kv->value);
111 sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s);
112 if (ctx->now >= sess_timeout_time)
114 l_addr = &s->in2out.addr;
115 r_addr = &s->ext_host_addr;
116 fib_index = s->in2out.fib_index;
117 if (snat_is_unk_proto_session (s))
119 proto = s->in2out.port;
125 proto = nat_proto_to_ip_proto (s->nat_proto);
126 l_port = s->in2out.port;
127 r_port = s->ext_host_port;
129 if (is_twice_nat_session (s))
131 r_addr = &s->ext_host_nat_addr;
132 r_port = s->ext_host_nat_port;
134 init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto);
135 if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0))
136 nat_elog_warn ("in2out_ed key del failed");
138 if (snat_is_unk_proto_session (s))
141 snat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
142 s->in2out.addr.as_u32,
143 s->out2in.addr.as_u32,
147 s->in2out.fib_index);
149 nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index,
150 &s->in2out.addr, s->in2out.port,
151 &s->ext_host_nat_addr, s->ext_host_nat_port,
152 &s->out2in.addr, s->out2in.port,
153 &s->ext_host_addr, s->ext_host_port,
154 s->nat_proto, is_twice_nat_session (s));
156 nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
157 s->ext_host_port, s->nat_proto, s->out2in.fib_index,
160 if (is_twice_nat_session (s))
162 for (i = 0; i < vec_len (sm->twice_nat_addresses); i++)
164 // FIXME TODO this is obviously wrong code ... needs fix!
165 // key.protocol = s->nat_proto;
166 // key.port = s->ext_host_nat_port;
167 // a = sm->twice_nat_addresses + i;
168 // if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32)
170 // snat_free_outside_address_and_port (sm->twice_nat_addresses,
171 // ctx->thread_index,
178 if (snat_is_session_static (s))
181 snat_free_outside_address_and_port (sm->addresses, ctx->thread_index,
182 &s->out2in.addr, s->out2in.port,
185 nat_ed_session_delete (sm, s, ctx->thread_index, 1);
193 // allocate exact address based on preference
194 static_always_inline int
195 nat_alloc_addr_and_port_exact (snat_address_t * a,
197 nat_protocol_t proto,
198 ip4_address_t * addr,
200 u16 port_per_thread, u32 snat_thread_index)
206 #define _(N, j, n, s) \
207 case NAT_PROTOCOL_##N: \
208 if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
212 portnum = (port_per_thread * \
213 snat_thread_index) + \
214 snat_random_port(0, port_per_thread - 1) + 1024; \
215 if (a->busy_##n##_port_refcounts[portnum]) \
217 --a->busy_##n##_port_refcounts[portnum]; \
218 a->busy_##n##_ports_per_thread[thread_index]++; \
219 a->busy_##n##_ports++; \
221 *port = clib_host_to_net_u16(portnum); \
229 nat_elog_info ("unknown protocol");
233 /* Totally out of translations to use... */
234 snat_ipfix_logging_addresses_exhausted (thread_index, 0);
239 static snat_session_t *
240 create_session_for_static_mapping_ed (snat_main_t * sm,
242 ip4_address_t i2o_addr,
245 ip4_address_t o2i_addr,
248 nat_protocol_t nat_proto,
249 vlib_node_runtime_t * node,
252 twice_nat_type_t twice_nat,
253 lb_nat_type_t lb_nat, f64 now,
254 snat_static_mapping_t * mapping)
259 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
260 clib_bihash_kv_16_8_t kv;
261 nat44_is_idle_session_ctx_t ctx;
264 (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
266 b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
267 nat_elog_notice ("maximum sessions exceeded");
271 s = nat_ed_session_alloc (sm, thread_index, now, nat_proto);
274 b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED];
275 nat_elog_warn ("create NAT session failed");
279 ip = vlib_buffer_get_current (b);
280 udp = ip4_next_header (ip);
282 s->ext_host_addr.as_u32 = ip->src_address.as_u32;
283 s->ext_host_port = nat_proto == NAT_PROTOCOL_ICMP ? 0 : udp->src_port;
284 s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
286 s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
287 if (lb_nat == AFFINITY_LB_NAT)
288 s->flags |= SNAT_SESSION_FLAG_AFFINITY;
289 s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
290 s->out2in.addr = o2i_addr;
291 s->out2in.port = o2i_port;
292 s->out2in.fib_index = o2i_fib_index;
293 s->in2out.addr = i2o_addr;
294 s->in2out.port = i2o_port;
295 s->in2out.fib_index = i2o_fib_index;
296 s->nat_proto = nat_proto;
298 /* Add to lookup tables */
299 init_ed_kv (&kv, o2i_addr, o2i_port, s->ext_host_addr, s->ext_host_port,
300 o2i_fib_index, ip->protocol, thread_index, s - tsm->sessions);
302 ctx.thread_index = thread_index;
303 if (clib_bihash_add_or_overwrite_stale_16_8 (&sm->out2in_ed, &kv,
304 nat44_o2i_ed_is_idle_session_cb,
306 nat_elog_notice ("out2in-ed key add failed");
308 if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF &&
309 ip->src_address.as_u32 == i2o_addr.as_u32))
312 snat_address_t *filter = 0;
314 // if exact address is specified use this address
315 if (is_exact_address (mapping))
318 vec_foreach (ap, sm->twice_nat_addresses)
320 if (mapping->pool_addr.as_u32 == ap->addr.as_u32)
330 rc = nat_alloc_addr_and_port_exact (filter,
333 &s->ext_host_nat_addr,
334 &s->ext_host_nat_port,
336 tsm->snat_thread_index);
337 s->flags |= SNAT_SESSION_FLAG_EXACT_ADDRESS;
342 snat_alloc_outside_address_and_port (sm->twice_nat_addresses, 0,
343 thread_index, nat_proto,
344 &s->ext_host_nat_addr,
345 &s->ext_host_nat_port,
347 tsm->snat_thread_index);
352 b->error = node->errors[NAT_OUT2IN_ED_ERROR_OUT_OF_PORTS];
353 nat_ed_session_delete (sm, s, thread_index, 1);
354 if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 0))
355 nat_elog_notice ("out2in-ed key del failed");
359 s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
360 init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_nat_addr,
361 s->ext_host_nat_port, i2o_fib_index, ip->protocol,
362 thread_index, s - tsm->sessions);
366 init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_addr,
367 s->ext_host_port, i2o_fib_index, ip->protocol,
368 thread_index, s - tsm->sessions);
370 if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &kv,
371 nat44_i2o_ed_is_idle_session_cb,
373 nat_elog_notice ("in2out-ed key add failed");
375 snat_ipfix_logging_nat44_ses_create (thread_index,
376 s->in2out.addr.as_u32,
377 s->out2in.addr.as_u32,
380 s->out2in.port, s->in2out.fib_index);
382 nat_syslog_nat44_sadd (s->user_index, s->in2out.fib_index,
383 &s->in2out.addr, s->in2out.port,
384 &s->ext_host_nat_addr, s->ext_host_nat_port,
385 &s->out2in.addr, s->out2in.port,
386 &s->ext_host_addr, s->ext_host_port,
387 s->nat_proto, is_twice_nat_session (s));
389 nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr,
390 s->out2in.port, &s->ext_host_addr, s->ext_host_port,
391 &s->ext_host_nat_addr, s->ext_host_nat_port,
392 s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0);
394 per_vrf_sessions_register_session (s, thread_index);
400 next_src_nat (snat_main_t * sm, ip4_header_t * ip, u16 src_port,
401 u16 dst_port, u32 thread_index, u32 rx_fib_index)
403 clib_bihash_kv_16_8_t kv, value;
404 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
406 init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
407 rx_fib_index, ip->protocol);
408 if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
415 create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip,
416 u32 rx_fib_index, u32 thread_index)
418 clib_bihash_kv_16_8_t kv, value;
420 snat_session_t *s = 0;
421 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
422 vlib_main_t *vm = vlib_get_main ();
423 f64 now = vlib_time_now (vm);
426 if (ip->protocol == IP_PROTOCOL_ICMP)
428 if (get_icmp_o2i_ed_key
429 (b, ip, rx_fib_index, ~0, ~0, 0, &l_port, &r_port, &kv))
434 if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
436 udp = ip4_next_header (ip);
437 l_port = udp->dst_port;
438 r_port = udp->src_port;
445 init_ed_k (&kv, ip->dst_address, l_port, ip->src_address, r_port,
446 rx_fib_index, ip->protocol);
449 if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value))
451 ASSERT (thread_index == ed_value_get_thread_index (&value));
453 pool_elt_at_index (tsm->sessions,
454 ed_value_get_session_index (&value));
461 (nat44_ed_maximum_sessions_exceeded
462 (sm, rx_fib_index, thread_index)))
465 s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
468 nat_elog_warn ("create NAT session failed");
472 proto = ip_proto_to_nat_proto (ip->protocol);
474 s->ext_host_addr = ip->src_address;
475 s->ext_host_port = r_port;
476 s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS;
477 s->out2in.addr = ip->dst_address;
478 s->out2in.port = l_port;
479 s->nat_proto = proto;
480 if (proto == NAT_PROTOCOL_OTHER)
482 s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
483 s->out2in.port = ip->protocol;
485 s->out2in.fib_index = 0;
486 s->in2out.addr = s->out2in.addr;
487 s->in2out.port = s->out2in.port;
488 s->in2out.fib_index = s->out2in.fib_index;
490 kv.value = s - tsm->sessions;
491 if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1))
492 nat_elog_notice ("in2out_ed key add failed");
494 per_vrf_sessions_register_session (s, thread_index);
497 if (ip->protocol == IP_PROTOCOL_TCP)
499 tcp_header_t *tcp = ip4_next_header (ip);
500 if (nat44_set_tcp_session_state_o2i
501 (sm, now, s, tcp->flags, tcp->ack_number, tcp->seq_number,
507 nat44_session_update_counters (s, now, 0, thread_index);
508 /* Per-user LRU list maintenance */
509 nat44_session_update_lru (sm, s, thread_index);
513 create_bypass_for_fwd_worker (snat_main_t * sm, vlib_buffer_t * b,
514 ip4_header_t * ip, u32 rx_fib_index)
516 ip4_header_t ip_wkr = {
517 .src_address = ip->dst_address,
519 u32 thread_index = sm->worker_in2out_cb (&ip_wkr, rx_fib_index, 0);
521 create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index);
524 #ifndef CLIB_MARCH_VARIANT
526 icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
527 u32 thread_index, vlib_buffer_t * b,
528 ip4_header_t * ip, ip4_address_t * addr,
529 u16 * port, u32 * fib_index, nat_protocol_t * proto,
530 void *d, void *e, u8 * dont_translate)
532 u32 next = ~0, sw_if_index, rx_fib_index;
533 clib_bihash_kv_16_8_t kv, value;
534 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
535 snat_session_t *s = 0;
536 u8 is_addr_only, identity_nat;
538 vlib_main_t *vm = vlib_get_main ();
539 ip4_address_t sm_addr;
543 snat_static_mapping_t *m;
545 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
546 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
548 if (get_icmp_o2i_ed_key
549 (b, ip, rx_fib_index, ~0, ~0, proto, &l_port, &r_port, &kv))
551 b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL];
552 next = NAT_NEXT_DROP;
556 if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value))
558 if (snat_static_mapping_match
559 (sm, ip->dst_address, l_port, rx_fib_index,
560 ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port,
561 &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m))
563 // static mapping not matched
564 if (!sm->forwarding_enabled)
566 /* Don't NAT packet aimed at the intfc address */
567 if (PREDICT_FALSE (is_interface_addr (sm, node, sw_if_index,
568 ip->dst_address.as_u32)))
574 b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
575 next = NAT_NEXT_DROP;
581 if (next_src_nat (sm, ip, l_port, r_port,
582 thread_index, rx_fib_index))
584 next = NAT_NEXT_IN2OUT_ED_FAST_PATH;
588 if (sm->num_workers > 1)
589 create_bypass_for_fwd_worker (sm, b, ip, rx_fib_index);
591 create_bypass_for_fwd (sm, b, ip, rx_fib_index,
599 (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
601 && (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
602 ICMP4_echo_request || !is_addr_only)))
604 b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
605 next = NAT_NEXT_DROP;
609 if (PREDICT_FALSE (identity_nat))
615 /* Create session initiated by host from external network */
617 create_session_for_static_mapping_ed (sm, b, sm_addr, sm_port,
618 sm_fib_index, ip->dst_address,
619 l_port, rx_fib_index, *proto,
622 vlib_time_now (vm), m);
624 next = NAT_NEXT_DROP;
629 (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
631 && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags !=
633 && !icmp_type_is_error_message (vnet_buffer (b)->ip.
634 reass.icmp_type_or_tcp_flags)))
636 b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE];
637 next = NAT_NEXT_DROP;
641 ASSERT (thread_index == ed_value_get_thread_index (&value));
643 pool_elt_at_index (tsm->sessions,
644 ed_value_get_session_index (&value));
649 *addr = s->in2out.addr;
650 *port = s->in2out.port;
651 *fib_index = s->in2out.fib_index;
654 *(snat_session_t **) d = s;
659 static snat_session_t *
660 nat44_ed_out2in_unknown_proto (snat_main_t * sm,
666 vlib_main_t * vm, vlib_node_runtime_t * node)
668 clib_bihash_kv_8_8_t kv, value;
669 clib_bihash_kv_16_8_t s_kv, s_value;
670 snat_static_mapping_t *m;
671 u32 old_addr, new_addr;
674 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
676 old_addr = ip->dst_address.as_u32;
678 init_ed_k (&s_kv, ip->dst_address, 0, ip->src_address, 0, rx_fib_index,
681 if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
683 ASSERT (thread_index == ed_value_get_thread_index (&s_value));
685 pool_elt_at_index (tsm->sessions,
686 ed_value_get_session_index (&s_value));
687 new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
692 (nat44_ed_maximum_sessions_exceeded
693 (sm, rx_fib_index, thread_index)))
695 b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
696 nat_elog_notice ("maximum sessions exceeded");
700 init_nat_k (&kv, ip->dst_address, 0, 0, 0);
701 if (clib_bihash_search_8_8
702 (&sm->static_mapping_by_external, &kv, &value))
704 b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
708 m = pool_elt_at_index (sm->static_mappings, value.value);
710 new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
712 /* Create a new session */
713 s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
716 b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED];
717 nat_elog_warn ("create NAT session failed");
721 s->ext_host_addr.as_u32 = ip->src_address.as_u32;
722 s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
723 s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
724 s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
725 s->out2in.addr.as_u32 = old_addr;
726 s->out2in.fib_index = rx_fib_index;
727 s->in2out.addr.as_u32 = new_addr;
728 s->in2out.fib_index = m->fib_index;
729 s->in2out.port = s->out2in.port = ip->protocol;
731 /* Add to lookup tables */
732 s_kv.value = s - tsm->sessions;
733 if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
734 nat_elog_notice ("out2in key add failed");
736 init_ed_kv (&s_kv, ip->dst_address, 0, ip->src_address, 0, m->fib_index,
737 ip->protocol, thread_index, s - tsm->sessions);
738 if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1))
739 nat_elog_notice ("in2out key add failed");
741 per_vrf_sessions_register_session (s, thread_index);
744 /* Update IP checksum */
746 sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
747 ip->checksum = ip_csum_fold (sum);
749 vnet_buffer (b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
752 nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b),
754 /* Per-user LRU list maintenance */
755 nat44_session_update_lru (sm, s, thread_index);
761 nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
762 vlib_node_runtime_t * node,
763 vlib_frame_t * frame,
766 u32 n_left_from, *from;
767 snat_main_t *sm = &snat_main;
768 f64 now = vlib_time_now (vm);
769 u32 thread_index = vm->thread_index;
770 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
772 from = vlib_frame_vector_args (frame);
773 n_left_from = frame->n_vectors;
775 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
776 u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
777 vlib_get_buffers (vm, from, b, n_left_from);
779 while (n_left_from > 0)
782 u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
783 u16 old_port0, new_port0;
787 snat_session_t *s0 = 0;
788 clib_bihash_kv_16_8_t kv0, value0;
794 /* Prefetch next iteration. */
795 if (PREDICT_TRUE (n_left_from >= 2))
801 vlib_prefetch_buffer_header (p2, LOAD);
803 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
806 next[0] = vnet_buffer2 (b0)->nat.arc_next;
808 vnet_buffer (b0)->snat.flags = 0;
809 ip0 = vlib_buffer_get_current (b0);
811 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
813 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
815 if (PREDICT_FALSE (ip0->ttl == 1))
817 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
818 icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
819 ICMP4_time_exceeded_ttl_exceeded_in_transit,
821 next[0] = NAT_NEXT_ICMP_ERROR;
825 udp0 = ip4_next_header (ip0);
826 tcp0 = (tcp_header_t *) udp0;
827 proto0 = ip_proto_to_nat_proto (ip0->protocol);
829 if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
831 next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
835 if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
837 next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
841 init_ed_k (&kv0, ip0->dst_address,
842 vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address,
843 vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0,
846 /* there is a stashed index in vnet_buffer2 from handoff node,
847 * see if we can use it */
850 PREDICT_TRUE (!pool_is_free_index
852 vnet_buffer2 (b0)->nat.ed_out2in_nat_session_index)))
854 s0 = pool_elt_at_index (tsm->sessions,
856 nat.ed_out2in_nat_session_index);
858 (s0->out2in.addr.as_u32 == ip0->dst_address.as_u32
859 && s0->out2in.port == vnet_buffer (b0)->ip.reass.l4_dst_port
860 && s0->nat_proto == ip_proto_to_nat_proto (ip0->protocol)
861 && s0->out2in.fib_index == rx_fib_index0
862 && s0->ext_host_addr.as_u32 == ip0->src_address.as_u32
863 && s0->ext_host_port ==
864 vnet_buffer (b0)->ip.reass.l4_src_port))
866 /* yes, this is the droid we're looking for */
871 // lookup for session
872 if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0))
874 // session does not exist go slow path
875 next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
878 ASSERT (thread_index == ed_value_get_thread_index (&value0));
880 pool_elt_at_index (tsm->sessions,
881 ed_value_get_session_index (&value0));
885 if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
887 // session is closed, go slow path
888 nat_free_session_data (sm, s0, thread_index, 0);
889 nat_ed_session_delete (sm, s0, thread_index, 1);
890 next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
894 if (s0->tcp_closed_timestamp)
896 if (now >= s0->tcp_closed_timestamp)
898 // session is closed, go slow path, freed in slow path
899 next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
903 // session in transitory timeout, drop
904 b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED];
905 next[0] = NAT_NEXT_DROP;
910 // drop if session expired
911 u64 sess_timeout_time;
913 s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
914 if (now >= sess_timeout_time)
916 // session is closed, go slow path
917 nat_free_session_data (sm, s0, thread_index, 0);
918 nat_ed_session_delete (sm, s0, thread_index, 1);
919 next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
923 old_addr0 = ip0->dst_address.as_u32;
924 new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
925 vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
927 sum0 = ip0->checksum;
928 sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
930 if (PREDICT_FALSE (is_twice_nat_session (s0)))
931 sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
932 s0->ext_host_nat_addr.as_u32, ip4_header_t,
934 ip0->checksum = ip_csum_fold (sum0);
936 old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
938 if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
940 if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
942 new_port0 = udp0->dst_port = s0->in2out.port;
943 sum0 = tcp0->checksum;
945 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
948 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
950 if (is_twice_nat_session (s0))
952 sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
953 s0->ext_host_nat_addr.as_u32,
954 ip4_header_t, dst_address);
956 ip_csum_update (sum0,
957 vnet_buffer (b0)->ip.reass.l4_src_port,
958 s0->ext_host_nat_port, ip4_header_t,
960 tcp0->src_port = s0->ext_host_nat_port;
961 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
963 tcp0->checksum = ip_csum_fold (sum0);
965 vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.tcp,
966 thread_index, sw_if_index0, 1);
967 if (nat44_set_tcp_session_state_o2i
968 (sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
969 vnet_buffer (b0)->ip.reass.tcp_ack_number,
970 vnet_buffer (b0)->ip.reass.tcp_seq_number, thread_index))
973 else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
976 new_port0 = udp0->dst_port = s0->in2out.port;
977 sum0 = udp0->checksum;
979 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
982 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length);
983 if (PREDICT_FALSE (is_twice_nat_session (s0)))
986 ip_csum_update (sum0, ip0->src_address.as_u32,
987 s0->ext_host_nat_addr.as_u32, ip4_header_t,
990 ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_src_port,
991 s0->ext_host_nat_port, ip4_header_t, length);
992 udp0->src_port = s0->ext_host_nat_port;
993 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
995 udp0->checksum = ip_csum_fold (sum0);
996 vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp,
997 thread_index, sw_if_index0, 1);
1001 if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1003 new_port0 = udp0->dst_port = s0->in2out.port;
1004 if (PREDICT_FALSE (is_twice_nat_session (s0)))
1006 udp0->src_port = s0->ext_host_nat_port;
1007 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1010 vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp,
1011 thread_index, sw_if_index0, 1);
1015 nat44_session_update_counters (s0, now,
1016 vlib_buffer_length_in_chain (vm, b0),
1018 /* Per-user LRU list maintenance */
1019 nat44_session_update_lru (sm, s0, thread_index);
1022 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1023 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1025 nat44_ed_out2in_trace_t *t =
1026 vlib_add_trace (vm, node, b0, sizeof (*t));
1027 t->sw_if_index = sw_if_index0;
1028 t->next_index = next[0];
1029 t->is_slow_path = 0;
1032 t->session_index = s0 - tsm->sessions;
1034 t->session_index = ~0;
1037 if (next[0] == NAT_NEXT_DROP)
1039 vlib_increment_simple_counter (&sm->counters.fastpath.
1040 out2in_ed.drops, thread_index,
1048 vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1050 return frame->n_vectors;
1054 nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
1055 vlib_node_runtime_t * node,
1056 vlib_frame_t * frame)
1058 u32 n_left_from, *from;
1059 snat_main_t *sm = &snat_main;
1060 f64 now = vlib_time_now (vm);
1061 u32 thread_index = vm->thread_index;
1062 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1063 snat_static_mapping_t *m;
1065 from = vlib_frame_vector_args (frame);
1066 n_left_from = frame->n_vectors;
1068 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
1069 u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
1070 vlib_get_buffers (vm, from, b, n_left_from);
1072 while (n_left_from > 0)
1075 u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0;
1076 u16 old_port0, new_port0;
1080 icmp46_header_t *icmp0;
1081 snat_session_t *s0 = 0;
1082 clib_bihash_kv_16_8_t kv0, value0;
1084 lb_nat_type_t lb_nat0;
1085 twice_nat_type_t twice_nat0;
1087 ip4_address_t sm_addr;
1092 next[0] = vnet_buffer2 (b0)->nat.arc_next;
1094 vnet_buffer (b0)->snat.flags = 0;
1095 ip0 = vlib_buffer_get_current (b0);
1097 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1099 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
1101 if (PREDICT_FALSE (ip0->ttl == 1))
1103 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1104 icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1105 ICMP4_time_exceeded_ttl_exceeded_in_transit,
1107 next[0] = NAT_NEXT_ICMP_ERROR;
1111 udp0 = ip4_next_header (ip0);
1112 tcp0 = (tcp_header_t *) udp0;
1113 icmp0 = (icmp46_header_t *) udp0;
1114 proto0 = ip_proto_to_nat_proto (ip0->protocol);
1116 if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1119 nat44_ed_out2in_unknown_proto (sm, b0, ip0, rx_fib_index0,
1120 thread_index, now, vm, node);
1121 if (!sm->forwarding_enabled)
1124 next[0] = NAT_NEXT_DROP;
1126 vlib_increment_simple_counter (&sm->counters.slowpath.
1127 out2in_ed.other, thread_index,
1132 if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
1134 next[0] = icmp_out2in_ed_slow_path
1135 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1136 next[0], now, thread_index, &s0);
1137 vlib_increment_simple_counter (&sm->counters.slowpath.
1138 out2in_ed.icmp, thread_index,
1143 init_ed_k (&kv0, ip0->dst_address,
1144 vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address,
1145 vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0,
1149 if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0))
1151 ASSERT (thread_index == ed_value_get_thread_index (&value0));
1153 pool_elt_at_index (tsm->sessions,
1154 ed_value_get_session_index (&value0));
1156 if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
1158 nat_free_session_data (sm, s0, thread_index, 0);
1159 nat_ed_session_delete (sm, s0, thread_index, 1);
1166 /* Try to match static mapping by external address and port,
1167 destination address and port in packet */
1169 if (snat_static_mapping_match
1170 (sm, ip0->dst_address,
1171 vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
1172 proto0, &sm_addr, &sm_port, &sm_fib_index, 1, 0,
1173 &twice_nat0, &lb_nat0, &ip0->src_address, &identity_nat0, &m))
1176 * Send DHCP packets to the ipv4 stack, or we won't
1177 * be able to use dhcp client on the outside interface
1179 if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_UDP
1180 && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
1181 clib_host_to_net_u16
1182 (UDP_DST_PORT_dhcp_to_client))))
1187 if (!sm->forwarding_enabled)
1190 node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
1191 next[0] = NAT_NEXT_DROP;
1196 (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
1197 vnet_buffer (b0)->ip.reass.l4_dst_port,
1198 thread_index, rx_fib_index0))
1200 next[0] = NAT_NEXT_IN2OUT_ED_FAST_PATH;
1204 if (sm->num_workers > 1)
1205 create_bypass_for_fwd_worker (sm, b0, ip0,
1208 create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
1215 if (PREDICT_FALSE (identity_nat0))
1218 if ((proto0 == NAT_PROTOCOL_TCP)
1219 && !tcp_flags_is_init (vnet_buffer (b0)->ip.
1220 reass.icmp_type_or_tcp_flags))
1222 b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
1223 next[0] = NAT_NEXT_DROP;
1227 /* Create session initiated by host from external network */
1228 s0 = create_session_for_static_mapping_ed (sm, b0,
1233 ip.reass.l4_dst_port,
1234 rx_fib_index0, proto0,
1235 node, rx_fib_index0,
1236 thread_index, twice_nat0,
1240 next[0] = NAT_NEXT_DROP;
1245 old_addr0 = ip0->dst_address.as_u32;
1246 new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32;
1247 vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
1249 sum0 = ip0->checksum;
1250 sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1252 if (PREDICT_FALSE (is_twice_nat_session (s0)))
1253 sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1254 s0->ext_host_nat_addr.as_u32, ip4_header_t,
1256 ip0->checksum = ip_csum_fold (sum0);
1258 old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port;
1260 if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
1262 if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1264 new_port0 = udp0->dst_port = s0->in2out.port;
1265 sum0 = tcp0->checksum;
1267 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1270 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1272 if (is_twice_nat_session (s0))
1274 sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1275 s0->ext_host_nat_addr.as_u32,
1276 ip4_header_t, dst_address);
1278 ip_csum_update (sum0,
1279 vnet_buffer (b0)->ip.reass.l4_src_port,
1280 s0->ext_host_nat_port, ip4_header_t,
1282 tcp0->src_port = s0->ext_host_nat_port;
1283 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1285 tcp0->checksum = ip_csum_fold (sum0);
1287 vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.tcp,
1288 thread_index, sw_if_index0, 1);
1289 if (nat44_set_tcp_session_state_o2i
1290 (sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
1291 vnet_buffer (b0)->ip.reass.tcp_ack_number,
1292 vnet_buffer (b0)->ip.reass.tcp_seq_number, thread_index))
1295 else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
1298 new_port0 = udp0->dst_port = s0->in2out.port;
1299 sum0 = udp0->checksum;
1300 sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
1302 sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
1304 if (PREDICT_FALSE (is_twice_nat_session (s0)))
1306 sum0 = ip_csum_update (sum0, ip0->src_address.as_u32,
1307 s0->ext_host_nat_addr.as_u32,
1308 ip4_header_t, dst_address);
1310 ip_csum_update (sum0,
1311 vnet_buffer (b0)->ip.reass.l4_src_port,
1312 s0->ext_host_nat_port, ip4_header_t, length);
1313 udp0->src_port = s0->ext_host_nat_port;
1314 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1316 udp0->checksum = ip_csum_fold (sum0);
1317 vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp,
1318 thread_index, sw_if_index0, 1);
1322 if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment)
1324 new_port0 = udp0->dst_port = s0->in2out.port;
1325 if (PREDICT_FALSE (is_twice_nat_session (s0)))
1327 udp0->src_port = s0->ext_host_nat_port;
1328 ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32;
1331 vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp,
1332 thread_index, sw_if_index0, 1);
1336 nat44_session_update_counters (s0, now,
1337 vlib_buffer_length_in_chain (vm, b0),
1339 /* Per-user LRU list maintenance */
1340 nat44_session_update_lru (sm, s0, thread_index);
1343 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1344 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1346 nat44_ed_out2in_trace_t *t =
1347 vlib_add_trace (vm, node, b0, sizeof (*t));
1348 t->sw_if_index = sw_if_index0;
1349 t->next_index = next[0];
1350 t->is_slow_path = 1;
1353 t->session_index = s0 - tsm->sessions;
1355 t->session_index = ~0;
1358 if (next[0] == NAT_NEXT_DROP)
1360 vlib_increment_simple_counter (&sm->counters.slowpath.
1361 out2in_ed.drops, thread_index,
1370 vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
1373 return frame->n_vectors;
1376 VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm,
1377 vlib_node_runtime_t * node,
1378 vlib_frame_t * frame)
1380 if (snat_main.num_workers > 1)
1382 return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 1);
1386 return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 0);
1391 VLIB_REGISTER_NODE (nat44_ed_out2in_node) = {
1392 .name = "nat44-ed-out2in",
1393 .vector_size = sizeof (u32),
1394 .sibling_of = "nat-default",
1395 .format_trace = format_nat44_ed_out2in_trace,
1396 .type = VLIB_NODE_TYPE_INTERNAL,
1397 .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings),
1398 .error_strings = nat_out2in_ed_error_strings,
1399 .runtime_data_bytes = sizeof (snat_runtime_t),
1403 VLIB_NODE_FN (nat44_ed_out2in_slowpath_node) (vlib_main_t * vm,
1404 vlib_node_runtime_t * node,
1405 vlib_frame_t * frame)
1407 return nat44_ed_out2in_slow_path_node_fn_inline (vm, node, frame);
1411 VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = {
1412 .name = "nat44-ed-out2in-slowpath",
1413 .vector_size = sizeof (u32),
1414 .sibling_of = "nat-default",
1415 .format_trace = format_nat44_ed_out2in_trace,
1416 .type = VLIB_NODE_TYPE_INTERNAL,
1417 .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings),
1418 .error_strings = nat_out2in_ed_error_strings,
1419 .runtime_data_bytes = sizeof (snat_runtime_t),
1424 format_nat_pre_trace (u8 * s, va_list * args)
1426 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1427 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1428 nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *);
1429 return format (s, "out2in next_index %d arc_next_index %d", t->next_index,
1433 VLIB_NODE_FN (nat_pre_out2in_node) (vlib_main_t * vm,
1434 vlib_node_runtime_t * node,
1435 vlib_frame_t * frame)
1437 return nat_pre_node_fn_inline (vm, node, frame,
1438 NAT_NEXT_OUT2IN_ED_FAST_PATH);
1442 VLIB_REGISTER_NODE (nat_pre_out2in_node) = {
1443 .name = "nat-pre-out2in",
1444 .vector_size = sizeof (u32),
1445 .sibling_of = "nat-default",
1446 .format_trace = format_nat_pre_trace,
1447 .type = VLIB_NODE_TYPE_INTERNAL,
1453 * fd.io coding-style-patch-verification: ON
1456 * eval: (c-set-style "gnu")