2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ethernet/ethernet.h>
23 #include <vnet/fib/ip4_fib.h>
25 #include <nat/nat_ipfix_logging.h>
26 #include <nat/nat_det.h>
27 #include <nat/nat_reass.h>
29 #include <vppinfra/hash.h>
30 #include <vppinfra/error.h>
31 #include <vppinfra/elog.h>
38 } snat_in2out_trace_t;
41 u32 next_worker_index;
43 } snat_in2out_worker_handoff_trace_t;
45 /* packet trace format function */
46 static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
48 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
49 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
50 snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
53 tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
55 s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
56 t->sw_if_index, t->next_index, t->session_index);
61 static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
63 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
64 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
65 snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
67 s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
68 t->sw_if_index, t->next_index);
73 static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
75 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
76 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
77 snat_in2out_worker_handoff_trace_t * t =
78 va_arg (*args, snat_in2out_worker_handoff_trace_t *);
81 m = t->do_handoff ? "next worker" : "same worker";
82 s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
91 } nat44_in2out_reass_trace_t;
93 static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args)
95 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
96 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
97 nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *);
99 s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s",
100 t->sw_if_index, t->next_index,
101 t->cached ? "cached" : "translated");
106 vlib_node_registration_t snat_in2out_node;
107 vlib_node_registration_t snat_in2out_slowpath_node;
108 vlib_node_registration_t snat_in2out_fast_node;
109 vlib_node_registration_t snat_in2out_worker_handoff_node;
110 vlib_node_registration_t snat_det_in2out_node;
111 vlib_node_registration_t snat_in2out_output_node;
112 vlib_node_registration_t snat_in2out_output_slowpath_node;
113 vlib_node_registration_t snat_in2out_output_worker_handoff_node;
114 vlib_node_registration_t snat_hairpin_dst_node;
115 vlib_node_registration_t snat_hairpin_src_node;
116 vlib_node_registration_t nat44_hairpinning_node;
117 vlib_node_registration_t nat44_in2out_reass_node;
120 #define foreach_snat_in2out_error \
121 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
122 _(IN2OUT_PACKETS, "Good in2out packets processed") \
123 _(OUT_OF_PORTS, "Out of ports") \
124 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found") \
125 _(BAD_ICMP_TYPE, "unsupported ICMP type") \
126 _(NO_TRANSLATION, "No translation") \
127 _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded") \
128 _(DROP_FRAGMENT, "Drop fragment") \
129 _(MAX_REASS, "Maximum reassemblies exceeded") \
130 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
133 #define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
134 foreach_snat_in2out_error
137 } snat_in2out_error_t;
139 static char * snat_in2out_error_strings[] = {
140 #define _(sym,string) string,
141 foreach_snat_in2out_error
146 SNAT_IN2OUT_NEXT_LOOKUP,
147 SNAT_IN2OUT_NEXT_DROP,
148 SNAT_IN2OUT_NEXT_ICMP_ERROR,
149 SNAT_IN2OUT_NEXT_SLOW_PATH,
150 SNAT_IN2OUT_NEXT_REASS,
152 } snat_in2out_next_t;
155 SNAT_HAIRPIN_SRC_NEXT_DROP,
156 SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
157 SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
158 SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
159 SNAT_HAIRPIN_SRC_N_NEXT,
160 } snat_hairpin_next_t;
163 * @brief Check if packet should be translated
165 * Packets aimed at outside interface and external address with active session
166 * should be translated.
169 * @param rt NAT runtime data
170 * @param sw_if_index0 index of the inside interface
171 * @param ip0 IPv4 header
172 * @param proto0 NAT protocol
173 * @param rx_fib_index0 RX FIB index
175 * @returns 0 if packet should be translated otherwise 1
178 snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node,
179 u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
185 fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
187 .fp_proto = FIB_PROTOCOL_IP4,
190 .ip4.as_u32 = ip0->dst_address.as_u32,
194 /* Don't NAT packet aimed at the intfc address */
195 if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0,
196 ip0->dst_address.as_u32)))
199 fei = fib_table_lookup (rx_fib_index0, &pfx);
200 if (FIB_NODE_INDEX_INVALID != fei)
202 u32 sw_if_index = fib_entry_get_resolving_interface (fei);
203 if (sw_if_index == ~0)
205 fei = fib_table_lookup (sm->outside_fib_index, &pfx);
206 if (FIB_NODE_INDEX_INVALID != fei)
207 sw_if_index = fib_entry_get_resolving_interface (fei);
210 pool_foreach (i, sm->interfaces,
212 /* NAT packet aimed at outside interface */
213 if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index))
222 snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
223 u32 sw_if_index0, ip4_header_t * ip0, u32 proto0,
224 u32 rx_fib_index0, u32 thread_index)
226 udp_header_t * udp0 = ip4_next_header (ip0);
227 snat_session_key_t key0, sm0;
228 clib_bihash_kv_8_8_t kv0, value0;
230 key0.addr = ip0->dst_address;
231 key0.port = udp0->dst_port;
232 key0.protocol = proto0;
233 key0.fib_index = sm->outside_fib_index;
234 kv0.key = key0.as_u64;
236 /* NAT packet aimed at external address if */
237 /* has active sessions */
238 if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
241 /* or is static mappings */
242 if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
248 if (sm->forwarding_enabled)
251 return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0,
256 nat_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip0,
257 u32 proto0, u16 src_port, u32 thread_index)
259 snat_session_key_t key0;
260 clib_bihash_kv_8_8_t kv0, value0;
262 key0.addr = ip0->src_address;
263 key0.port = src_port;
264 key0.protocol = proto0;
265 key0.fib_index = sm->outside_fib_index;
266 kv0.key = key0.as_u64;
268 if (!clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
275 static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
278 snat_session_key_t * key0,
279 snat_session_t ** sessionp,
280 vlib_node_runtime_t * node,
286 clib_bihash_kv_8_8_t kv0;
287 snat_session_key_t key1;
288 u32 address_index = ~0;
289 u32 outside_fib_index;
291 udp_header_t * udp0 = ip4_next_header (ip0);
293 if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
295 b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
296 nat_ipfix_logging_max_sessions(sm->max_translations);
297 return SNAT_IN2OUT_NEXT_DROP;
300 p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
303 b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
304 return SNAT_IN2OUT_NEXT_DROP;
306 outside_fib_index = p[0];
308 key1.protocol = key0->protocol;
310 u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0,
314 clib_warning ("create NAT user failed");
315 return SNAT_IN2OUT_NEXT_DROP;
318 /* First try to match static mapping by local address and port */
319 if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0))
321 /* Try to create dynamic translation */
322 if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
326 sm->per_thread_data[thread_index].snat_thread_index))
328 b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
329 return SNAT_IN2OUT_NEXT_DROP;
335 u->nstaticsessions++;
338 s = nat_session_alloc_or_recycle (sm, u, thread_index);
341 clib_warning ("create NAT session failed");
342 return SNAT_IN2OUT_NEXT_DROP;
345 if (address_index == ~0)
346 s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
347 s->outside_address_index = address_index;
350 s->out2in.protocol = key0->protocol;
351 s->out2in.fib_index = outside_fib_index;
352 s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
353 s->ext_host_port = udp0->dst_port;
356 /* Add to translation hashes */
357 kv0.key = s->in2out.as_u64;
358 kv0.value = s - sm->per_thread_data[thread_index].sessions;
359 if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
361 clib_warning ("in2out key add failed");
363 kv0.key = s->out2in.as_u64;
364 kv0.value = s - sm->per_thread_data[thread_index].sessions;
366 if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0,
368 clib_warning ("out2in key add failed");
371 snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
372 s->out2in.addr.as_u32,
376 s->in2out.fib_index);
381 snat_in2out_error_t icmp_get_key(ip4_header_t *ip0,
382 snat_session_key_t *p_key0)
384 icmp46_header_t *icmp0;
385 snat_session_key_t key0;
386 icmp_echo_header_t *echo0, *inner_echo0 = 0;
387 ip4_header_t *inner_ip0 = 0;
389 icmp46_header_t *inner_icmp0;
391 icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
392 echo0 = (icmp_echo_header_t *)(icmp0+1);
394 if (!icmp_is_error_message (icmp0))
396 key0.protocol = SNAT_PROTOCOL_ICMP;
397 key0.addr = ip0->src_address;
398 key0.port = echo0->identifier;
402 inner_ip0 = (ip4_header_t *)(echo0+1);
403 l4_header = ip4_next_header (inner_ip0);
404 key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
405 key0.addr = inner_ip0->dst_address;
406 switch (key0.protocol)
408 case SNAT_PROTOCOL_ICMP:
409 inner_icmp0 = (icmp46_header_t*)l4_header;
410 inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
411 key0.port = inner_echo0->identifier;
413 case SNAT_PROTOCOL_UDP:
414 case SNAT_PROTOCOL_TCP:
415 key0.port = ((tcp_udp_header_t*)l4_header)->dst_port;
418 return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
422 return -1; /* success */
425 static_always_inline int
426 icmp_get_ed_key(ip4_header_t *ip0, nat_ed_ses_key_t *p_key0)
428 icmp46_header_t *icmp0;
429 nat_ed_ses_key_t key0;
430 icmp_echo_header_t *echo0, *inner_echo0 = 0;
431 ip4_header_t *inner_ip0 = 0;
433 icmp46_header_t *inner_icmp0;
435 icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
436 echo0 = (icmp_echo_header_t *)(icmp0+1);
438 if (!icmp_is_error_message (icmp0))
440 key0.proto = IP_PROTOCOL_ICMP;
441 key0.l_addr = ip0->src_address;
442 key0.r_addr = ip0->dst_address;
443 key0.l_port = key0.r_port = echo0->identifier;
447 inner_ip0 = (ip4_header_t *)(echo0+1);
448 l4_header = ip4_next_header (inner_ip0);
449 key0.proto = inner_ip0->protocol;
450 key0.r_addr = inner_ip0->src_address;
451 key0.l_addr = inner_ip0->dst_address;
452 switch (ip_proto_to_snat_proto (inner_ip0->protocol))
454 case SNAT_PROTOCOL_ICMP:
455 inner_icmp0 = (icmp46_header_t*)l4_header;
456 inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
457 key0.r_port = key0.l_port = inner_echo0->identifier;
459 case SNAT_PROTOCOL_UDP:
460 case SNAT_PROTOCOL_TCP:
461 key0.l_port = ((tcp_udp_header_t*)l4_header)->dst_port;
462 key0.r_port = ((tcp_udp_header_t*)l4_header)->src_port;
465 return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL;
473 nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip)
475 nat_ed_ses_key_t key;
476 clib_bihash_kv_16_8_t kv, value;
479 if (!sm->forwarding_enabled)
482 if (ip->protocol == IP_PROTOCOL_ICMP)
484 if (icmp_get_ed_key (ip, &key))
487 else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
489 udp = ip4_next_header(ip);
490 key.l_addr = ip->src_address;
491 key.r_addr = ip->dst_address;
492 key.proto = ip->protocol;
493 key.r_port = udp->dst_port;
494 key.l_port = udp->src_port;
498 key.l_addr = ip->src_address;
499 key.r_addr = ip->dst_address;
500 key.proto = ip->protocol;
501 key.l_port = key.r_port = 0;
504 kv.key[0] = key.as_u64[0];
505 kv.key[1] = key.as_u64[1];
507 if (!clib_bihash_search_16_8 (&sm->in2out_ed, &kv, &value))
508 return value.value == ~0ULL;
514 * Get address and port values to be used for ICMP packet translation
515 * and create session if needed
517 * @param[in,out] sm NAT main
518 * @param[in,out] node NAT node runtime
519 * @param[in] thread_index thread index
520 * @param[in,out] b0 buffer containing packet to be translated
521 * @param[out] p_proto protocol used for matching
522 * @param[out] p_value address and port after NAT translation
523 * @param[out] p_dont_translate if packet should not be translated
524 * @param d optional parameter
525 * @param e optional parameter
527 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
528 u32 thread_index, vlib_buffer_t *b0,
529 ip4_header_t *ip0, u8 *p_proto,
530 snat_session_key_t *p_value,
531 u8 *p_dont_translate, void *d, void *e)
533 icmp46_header_t *icmp0;
536 snat_session_key_t key0;
537 snat_session_t *s0 = 0;
538 u8 dont_translate = 0;
539 clib_bihash_kv_8_8_t kv0, value0;
543 icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
544 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
545 rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
547 err = icmp_get_key (ip0, &key0);
550 b0->error = node->errors[err];
551 next0 = SNAT_IN2OUT_NEXT_DROP;
554 key0.fib_index = rx_fib_index0;
556 kv0.key = key0.as_u64;
558 if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0,
561 if (vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)
563 if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
564 ip0, SNAT_PROTOCOL_ICMP, key0.port, thread_index)))
572 if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
573 ip0, SNAT_PROTOCOL_ICMP, rx_fib_index0, thread_index)))
580 if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
582 b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
583 next0 = SNAT_IN2OUT_NEXT_DROP;
587 next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
588 &s0, node, next0, thread_index);
590 if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
595 if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
596 icmp0->type != ICMP4_echo_reply &&
597 !icmp_is_error_message (icmp0)))
599 b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
600 next0 = SNAT_IN2OUT_NEXT_DROP;
604 if (PREDICT_FALSE (value0.value == ~0ULL))
606 nat_ed_ses_key_t key;
607 clib_bihash_kv_16_8_t s_kv, s_value;
611 if (icmp_get_ed_key (ip0, &key))
613 b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
614 next0 = SNAT_IN2OUT_NEXT_DROP;
617 key.fib_index = rx_fib_index0;
618 s_kv.key[0] = key.as_u64[0];
619 s_kv.key[1] = key.as_u64[1];
620 if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
621 s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
625 next0 = SNAT_IN2OUT_NEXT_DROP;
630 s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
635 *p_proto = key0.protocol;
637 *p_value = s0->out2in;
638 *p_dont_translate = dont_translate;
640 *(snat_session_t**)d = s0;
645 * Get address and port values to be used for ICMP packet translation
647 * @param[in] sm NAT main
648 * @param[in,out] node NAT node runtime
649 * @param[in] thread_index thread index
650 * @param[in,out] b0 buffer containing packet to be translated
651 * @param[out] p_proto protocol used for matching
652 * @param[out] p_value address and port after NAT translation
653 * @param[out] p_dont_translate if packet should not be translated
654 * @param d optional parameter
655 * @param e optional parameter
657 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
658 u32 thread_index, vlib_buffer_t *b0,
659 ip4_header_t *ip0, u8 *p_proto,
660 snat_session_key_t *p_value,
661 u8 *p_dont_translate, void *d, void *e)
663 icmp46_header_t *icmp0;
666 snat_session_key_t key0;
667 snat_session_key_t sm0;
668 u8 dont_translate = 0;
673 icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
674 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
675 rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
677 err = icmp_get_key (ip0, &key0);
680 b0->error = node->errors[err];
681 next0 = SNAT_IN2OUT_NEXT_DROP;
684 key0.fib_index = rx_fib_index0;
686 if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0))
688 if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
689 IP_PROTOCOL_ICMP, rx_fib_index0)))
695 if (icmp_is_error_message (icmp0))
697 next0 = SNAT_IN2OUT_NEXT_DROP;
701 b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
702 next0 = SNAT_IN2OUT_NEXT_DROP;
706 if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
707 (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
708 !icmp_is_error_message (icmp0)))
710 b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
711 next0 = SNAT_IN2OUT_NEXT_DROP;
718 *p_proto = key0.protocol;
719 *p_dont_translate = dont_translate;
723 static inline u32 icmp_in2out (snat_main_t *sm,
726 icmp46_header_t * icmp0,
729 vlib_node_runtime_t * node,
735 snat_session_key_t sm0;
737 icmp_echo_header_t *echo0, *inner_echo0 = 0;
738 ip4_header_t *inner_ip0;
740 icmp46_header_t *inner_icmp0;
742 u32 new_addr0, old_addr0;
743 u16 old_id0, new_id0;
748 echo0 = (icmp_echo_header_t *)(icmp0+1);
750 next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0,
751 &protocol, &sm0, &dont_translate, d, e);
754 if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate)
757 sum0 = ip_incremental_checksum (0, icmp0,
758 ntohs(ip0->length) - ip4_header_bytes (ip0));
759 checksum0 = ~ip_csum_fold (sum0);
760 if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff))
762 next0 = SNAT_IN2OUT_NEXT_DROP;
766 old_addr0 = ip0->src_address.as_u32;
767 new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
768 if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
769 vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
771 sum0 = ip0->checksum;
772 sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
773 src_address /* changed member */);
774 ip0->checksum = ip_csum_fold (sum0);
776 if (!icmp_is_error_message (icmp0))
779 if (PREDICT_FALSE(new_id0 != echo0->identifier))
781 old_id0 = echo0->identifier;
783 echo0->identifier = new_id0;
785 sum0 = icmp0->checksum;
786 sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
788 icmp0->checksum = ip_csum_fold (sum0);
793 inner_ip0 = (ip4_header_t *)(echo0+1);
794 l4_header = ip4_next_header (inner_ip0);
796 if (!ip4_header_checksum_is_valid (inner_ip0))
798 next0 = SNAT_IN2OUT_NEXT_DROP;
802 old_addr0 = inner_ip0->dst_address.as_u32;
803 inner_ip0->dst_address = sm0.addr;
804 new_addr0 = inner_ip0->dst_address.as_u32;
806 sum0 = icmp0->checksum;
807 sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
808 dst_address /* changed member */);
809 icmp0->checksum = ip_csum_fold (sum0);
813 case SNAT_PROTOCOL_ICMP:
814 inner_icmp0 = (icmp46_header_t*)l4_header;
815 inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
817 old_id0 = inner_echo0->identifier;
819 inner_echo0->identifier = new_id0;
821 sum0 = icmp0->checksum;
822 sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
824 icmp0->checksum = ip_csum_fold (sum0);
826 case SNAT_PROTOCOL_UDP:
827 case SNAT_PROTOCOL_TCP:
828 old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port;
830 ((tcp_udp_header_t*)l4_header)->dst_port = new_id0;
832 sum0 = icmp0->checksum;
833 sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
835 icmp0->checksum = ip_csum_fold (sum0);
849 * Hairpinning allows two endpoints on the internal side of the NAT to
850 * communicate even if they only use each other's external IP addresses
853 * @param sm NAT main.
854 * @param b0 Vlib buffer.
855 * @param ip0 IP header.
856 * @param udp0 UDP header.
857 * @param tcp0 TCP header.
858 * @param proto0 NAT protocol.
861 snat_hairpinning (snat_main_t *sm,
868 snat_session_key_t key0, sm0;
870 clib_bihash_kv_8_8_t kv0, value0;
872 u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
873 u16 new_dst_port0, old_dst_port0;
875 key0.addr = ip0->dst_address;
876 key0.port = udp0->dst_port;
877 key0.protocol = proto0;
878 key0.fib_index = sm->outside_fib_index;
879 kv0.key = key0.as_u64;
881 /* Check if destination is static mappings */
882 if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
884 new_dst_addr0 = sm0.addr.as_u32;
885 new_dst_port0 = sm0.port;
886 vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
888 /* or active session */
891 if (sm->num_workers > 1)
892 ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
894 ti = sm->num_workers;
896 if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
900 s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
901 new_dst_addr0 = s0->in2out.addr.as_u32;
902 new_dst_port0 = s0->in2out.port;
903 vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
907 /* Destination is behind the same NAT, use internal address and port */
910 old_dst_addr0 = ip0->dst_address.as_u32;
911 ip0->dst_address.as_u32 = new_dst_addr0;
912 sum0 = ip0->checksum;
913 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
914 ip4_header_t, dst_address);
915 ip0->checksum = ip_csum_fold (sum0);
917 old_dst_port0 = tcp0->dst;
918 if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
920 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
922 tcp0->dst = new_dst_port0;
923 sum0 = tcp0->checksum;
924 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
925 ip4_header_t, dst_address);
926 sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
927 ip4_header_t /* cheat */, length);
928 tcp0->checksum = ip_csum_fold(sum0);
932 udp0->dst_port = new_dst_port0;
938 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
940 sum0 = tcp0->checksum;
941 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
942 ip4_header_t, dst_address);
943 tcp0->checksum = ip_csum_fold(sum0);
952 snat_icmp_hairpinning (snat_main_t *sm,
955 icmp46_header_t * icmp0)
957 snat_session_key_t key0, sm0;
958 clib_bihash_kv_8_8_t kv0, value0;
959 u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
963 if (!icmp_is_error_message (icmp0))
965 icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
966 u16 icmp_id0 = echo0->identifier;
967 key0.addr = ip0->dst_address;
968 key0.port = icmp_id0;
969 key0.protocol = SNAT_PROTOCOL_ICMP;
970 key0.fib_index = sm->outside_fib_index;
971 kv0.key = key0.as_u64;
973 if (sm->num_workers > 1)
974 ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
976 ti = sm->num_workers;
978 /* Check if destination is in active sessions */
979 if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0,
982 /* or static mappings */
983 if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
985 new_dst_addr0 = sm0.addr.as_u32;
986 vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
993 s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
994 new_dst_addr0 = s0->in2out.addr.as_u32;
995 vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
996 echo0->identifier = s0->in2out.port;
997 sum0 = icmp0->checksum;
998 sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
999 icmp_echo_header_t, identifier);
1000 icmp0->checksum = ip_csum_fold (sum0);
1003 /* Destination is behind the same NAT, use internal address and port */
1006 old_dst_addr0 = ip0->dst_address.as_u32;
1007 ip0->dst_address.as_u32 = new_dst_addr0;
1008 sum0 = ip0->checksum;
1009 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
1010 ip4_header_t, dst_address);
1011 ip0->checksum = ip_csum_fold (sum0);
1017 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
1020 icmp46_header_t * icmp0,
1023 vlib_node_runtime_t * node,
1027 snat_session_t ** p_s0)
1029 next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1030 next0, thread_index, p_s0, 0);
1031 snat_session_t * s0 = *p_s0;
1032 if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
1035 if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
1036 snat_icmp_hairpinning(sm, b0, ip0, icmp0);
1038 s0->last_heard = now;
1040 s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
1041 /* Per-user LRU list maintenance */
1042 clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1043 s0->per_user_index);
1044 clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1045 s0->per_user_list_head_index,
1046 s0->per_user_index);
1051 snat_hairpinning_unknown_proto (snat_main_t *sm,
1055 u32 old_addr, new_addr = 0, ti = 0;
1056 clib_bihash_kv_8_8_t kv, value;
1057 clib_bihash_kv_16_8_t s_kv, s_value;
1058 nat_ed_ses_key_t key;
1059 snat_session_key_t m_key;
1060 snat_static_mapping_t *m;
1064 old_addr = ip->dst_address.as_u32;
1065 key.l_addr.as_u32 = ip->dst_address.as_u32;
1066 key.r_addr.as_u32 = ip->src_address.as_u32;
1067 key.fib_index = sm->outside_fib_index;
1068 key.proto = ip->protocol;
1071 s_kv.key[0] = key.as_u64[0];
1072 s_kv.key[1] = key.as_u64[1];
1073 if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1075 m_key.addr = ip->dst_address;
1076 m_key.fib_index = sm->outside_fib_index;
1079 kv.key = m_key.as_u64;
1080 if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
1083 m = pool_elt_at_index (sm->static_mappings, value.value);
1084 if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1085 vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
1086 new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
1090 if (sm->num_workers > 1)
1091 ti = sm->worker_out2in_cb (ip, sm->outside_fib_index);
1093 ti = sm->num_workers;
1095 s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
1096 if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1097 vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
1098 new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
1101 sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
1102 ip->checksum = ip_csum_fold (sum);
1105 static snat_session_t *
1106 snat_in2out_unknown_proto (snat_main_t *sm,
1113 vlib_node_runtime_t * node)
1115 clib_bihash_kv_8_8_t kv, value;
1116 clib_bihash_kv_16_8_t s_kv, s_value;
1117 snat_static_mapping_t *m;
1118 snat_session_key_t m_key;
1119 u32 old_addr, new_addr = 0;
1122 dlist_elt_t *head, *elt;
1123 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1124 u32 elt_index, head_index, ses_index;
1126 nat_ed_ses_key_t key;
1127 u32 address_index = ~0;
1131 old_addr = ip->src_address.as_u32;
1133 key.l_addr = ip->src_address;
1134 key.r_addr = ip->dst_address;
1135 key.fib_index = rx_fib_index;
1136 key.proto = ip->protocol;
1139 s_kv.key[0] = key.as_u64[0];
1140 s_kv.key[1] = key.as_u64[1];
1142 if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1144 s = pool_elt_at_index (tsm->sessions, s_value.value);
1145 new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1149 if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index)))
1151 b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1152 nat_ipfix_logging_max_sessions(sm->max_translations);
1156 u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1160 clib_warning ("create NAT user failed");
1164 m_key.addr = ip->src_address;
1167 m_key.fib_index = rx_fib_index;
1168 kv.key = m_key.as_u64;
1170 /* Try to find static mapping first */
1171 if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
1173 m = pool_elt_at_index (sm->static_mappings, value.value);
1174 new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
1178 /* Fallback to 3-tuple key */
1181 /* Choose same out address as for TCP/UDP session to same destination */
1182 if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
1184 head_index = u->sessions_per_user_list_head_index;
1185 head = pool_elt_at_index (tsm->list_pool, head_index);
1186 elt_index = head->next;
1187 elt = pool_elt_at_index (tsm->list_pool, elt_index);
1188 ses_index = elt->value;
1189 while (ses_index != ~0)
1191 s = pool_elt_at_index (tsm->sessions, ses_index);
1192 elt_index = elt->next;
1193 elt = pool_elt_at_index (tsm->list_pool, elt_index);
1194 ses_index = elt->value;
1196 if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
1198 new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1199 address_index = s->outside_address_index;
1201 key.fib_index = sm->outside_fib_index;
1202 key.l_addr.as_u32 = new_addr;
1203 s_kv.key[0] = key.as_u64[0];
1204 s_kv.key[1] = key.as_u64[1];
1205 if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1212 key.fib_index = sm->outside_fib_index;
1213 for (i = 0; i < vec_len (sm->addresses); i++)
1215 key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
1216 s_kv.key[0] = key.as_u64[0];
1217 s_kv.key[1] = key.as_u64[1];
1218 if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
1220 new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
1229 s = nat_session_alloc_or_recycle (sm, u, thread_index);
1232 clib_warning ("create NAT session failed");
1236 s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1237 s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
1238 s->outside_address_index = address_index;
1239 s->out2in.addr.as_u32 = new_addr;
1240 s->out2in.fib_index = sm->outside_fib_index;
1241 s->in2out.addr.as_u32 = old_addr;
1242 s->in2out.fib_index = rx_fib_index;
1243 s->in2out.port = s->out2in.port = ip->protocol;
1246 u->nstaticsessions++;
1247 s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1254 /* Add to lookup tables */
1255 key.l_addr.as_u32 = old_addr;
1256 key.r_addr = ip->dst_address;
1257 key.proto = ip->protocol;
1258 key.fib_index = rx_fib_index;
1259 s_kv.key[0] = key.as_u64[0];
1260 s_kv.key[1] = key.as_u64[1];
1261 s_kv.value = s - tsm->sessions;
1262 if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1263 clib_warning ("in2out key add failed");
1265 key.l_addr.as_u32 = new_addr;
1266 key.fib_index = sm->outside_fib_index;
1267 s_kv.key[0] = key.as_u64[0];
1268 s_kv.key[1] = key.as_u64[1];
1269 if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1270 clib_warning ("out2in key add failed");
1273 /* Update IP checksum */
1275 sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1276 ip->checksum = ip_csum_fold (sum);
1279 s->last_heard = now;
1281 s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1282 /* Per-user LRU list maintenance */
1283 clib_dlist_remove (tsm->list_pool, s->per_user_index);
1284 clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1288 if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1289 snat_hairpinning_unknown_proto(sm, b, ip);
1291 if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1292 vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1297 static snat_session_t *
1298 snat_in2out_lb (snat_main_t *sm,
1305 vlib_node_runtime_t * node)
1307 nat_ed_ses_key_t key;
1308 clib_bihash_kv_16_8_t s_kv, s_value;
1309 udp_header_t *udp = ip4_next_header (ip);
1310 tcp_header_t *tcp = (tcp_header_t *) udp;
1311 snat_session_t *s = 0;
1312 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
1313 u32 old_addr, new_addr;
1314 u16 new_port, old_port;
1316 u32 proto = ip_proto_to_snat_proto (ip->protocol);
1317 snat_session_key_t e_key, l_key;
1320 old_addr = ip->src_address.as_u32;
1322 key.l_addr = ip->src_address;
1323 key.r_addr = ip->dst_address;
1324 key.fib_index = rx_fib_index;
1325 key.proto = ip->protocol;
1326 key.r_port = udp->dst_port;
1327 key.l_port = udp->src_port;
1328 s_kv.key[0] = key.as_u64[0];
1329 s_kv.key[1] = key.as_u64[1];
1331 if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
1333 if (s_value.value == ~0ULL)
1335 s = pool_elt_at_index (tsm->sessions, s_value.value);
1339 if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index)))
1341 b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED];
1342 nat_ipfix_logging_max_sessions(sm->max_translations);
1346 l_key.addr = ip->src_address;
1347 l_key.port = udp->src_port;
1348 l_key.protocol = proto;
1349 l_key.fib_index = rx_fib_index;
1350 if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0, 0))
1353 u = nat_user_get_or_create (sm, &ip->src_address, rx_fib_index,
1357 clib_warning ("create NAT user failed");
1361 s = nat_session_alloc_or_recycle (sm, u, thread_index);
1364 clib_warning ("create NAT session failed");
1368 s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
1369 s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
1370 s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
1371 s->outside_address_index = ~0;
1374 s->out2in.protocol = l_key.protocol;
1375 u->nstaticsessions++;
1377 /* Add to lookup tables */
1378 s_kv.value = s - tsm->sessions;
1379 if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
1380 clib_warning ("in2out-ed key add failed");
1382 key.l_addr = e_key.addr;
1383 key.fib_index = e_key.fib_index;
1384 key.l_port = e_key.port;
1385 s_kv.key[0] = key.as_u64[0];
1386 s_kv.key[1] = key.as_u64[1];
1387 if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
1388 clib_warning ("out2in-ed key add failed");
1391 new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
1393 /* Update IP checksum */
1395 sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1396 if (is_twice_nat_session (s))
1397 sum = ip_csum_update (sum, ip->dst_address.as_u32,
1398 s->ext_host_addr.as_u32, ip4_header_t, dst_address);
1399 ip->checksum = ip_csum_fold (sum);
1401 if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
1403 old_port = tcp->src_port;
1404 tcp->src_port = s->out2in.port;
1405 new_port = tcp->src_port;
1407 sum = tcp->checksum;
1408 sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
1409 sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
1410 if (is_twice_nat_session (s))
1412 sum = ip_csum_update (sum, ip->dst_address.as_u32,
1413 s->ext_host_addr.as_u32, ip4_header_t,
1415 sum = ip_csum_update (sum, tcp->dst_port, s->ext_host_port,
1416 ip4_header_t, length);
1417 tcp->dst_port = s->ext_host_port;
1418 ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1420 tcp->checksum = ip_csum_fold(sum);
1424 udp->src_port = s->out2in.port;
1425 if (is_twice_nat_session (s))
1427 udp->dst_port = s->ext_host_port;
1428 ip->dst_address.as_u32 = s->ext_host_addr.as_u32;
1433 if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
1434 vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
1437 s->last_heard = now;
1439 s->total_bytes += vlib_buffer_length_in_chain (vm, b);
1440 /* Per-user LRU list maintenance */
1441 clib_dlist_remove (tsm->list_pool, s->per_user_index);
1442 clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
1448 snat_in2out_node_fn_inline (vlib_main_t * vm,
1449 vlib_node_runtime_t * node,
1450 vlib_frame_t * frame, int is_slow_path,
1451 int is_output_feature)
1453 u32 n_left_from, * from, * to_next;
1454 snat_in2out_next_t next_index;
1455 u32 pkts_processed = 0;
1456 snat_main_t * sm = &snat_main;
1457 f64 now = vlib_time_now (vm);
1458 u32 stats_node_index;
1459 u32 thread_index = vlib_get_thread_index ();
1461 stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
1462 snat_in2out_node.index;
1464 from = vlib_frame_vector_args (frame);
1465 n_left_from = frame->n_vectors;
1466 next_index = node->cached_next_index;
1468 while (n_left_from > 0)
1472 vlib_get_next_frame (vm, node, next_index,
1473 to_next, n_left_to_next);
1475 while (n_left_from >= 4 && n_left_to_next >= 2)
1478 vlib_buffer_t * b0, * b1;
1480 u32 sw_if_index0, sw_if_index1;
1481 ip4_header_t * ip0, * ip1;
1482 ip_csum_t sum0, sum1;
1483 u32 new_addr0, old_addr0, new_addr1, old_addr1;
1484 u16 old_port0, new_port0, old_port1, new_port1;
1485 udp_header_t * udp0, * udp1;
1486 tcp_header_t * tcp0, * tcp1;
1487 icmp46_header_t * icmp0, * icmp1;
1488 snat_session_key_t key0, key1;
1489 u32 rx_fib_index0, rx_fib_index1;
1491 snat_session_t * s0 = 0, * s1 = 0;
1492 clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
1493 u32 iph_offset0 = 0, iph_offset1 = 0;
1495 /* Prefetch next iteration. */
1497 vlib_buffer_t * p2, * p3;
1499 p2 = vlib_get_buffer (vm, from[2]);
1500 p3 = vlib_get_buffer (vm, from[3]);
1502 vlib_prefetch_buffer_header (p2, LOAD);
1503 vlib_prefetch_buffer_header (p3, LOAD);
1505 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
1506 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
1509 /* speculatively enqueue b0 and b1 to the current next frame */
1510 to_next[0] = bi0 = from[0];
1511 to_next[1] = bi1 = from[1];
1515 n_left_to_next -= 2;
1517 b0 = vlib_get_buffer (vm, bi0);
1518 b1 = vlib_get_buffer (vm, bi1);
1520 if (is_output_feature)
1521 iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1523 ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1526 udp0 = ip4_next_header (ip0);
1527 tcp0 = (tcp_header_t *) udp0;
1528 icmp0 = (icmp46_header_t *) udp0;
1530 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1531 rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1534 next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
1536 if (PREDICT_FALSE(ip0->ttl == 1))
1538 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1539 icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1540 ICMP4_time_exceeded_ttl_exceeded_in_transit,
1542 next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1546 proto0 = ip_proto_to_snat_proto (ip0->protocol);
1548 /* Next configured feature, probably ip4-lookup */
1551 if (PREDICT_FALSE (proto0 == ~0))
1553 s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1554 thread_index, now, vm, node);
1556 next0 = SNAT_IN2OUT_NEXT_DROP;
1560 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1562 next0 = icmp_in2out_slow_path
1563 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
1564 node, next0, now, thread_index, &s0);
1570 if (is_output_feature)
1572 if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(sm, ip0)))
1576 if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1578 next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1582 if (ip4_is_fragment (ip0))
1584 next0 = SNAT_IN2OUT_NEXT_REASS;
1589 key0.addr = ip0->src_address;
1590 key0.port = udp0->src_port;
1591 key0.protocol = proto0;
1592 key0.fib_index = rx_fib_index0;
1594 kv0.key = key0.as_u64;
1596 if (PREDICT_FALSE (clib_bihash_search_8_8 (
1597 &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0))
1601 if (is_output_feature)
1603 if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1604 ip0, proto0, udp0->src_port, thread_index)))
1609 if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
1610 ip0, proto0, rx_fib_index0, thread_index)))
1614 next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
1615 &s0, node, next0, thread_index);
1616 if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
1621 next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1627 if (PREDICT_FALSE (value0.value == ~0ULL))
1631 s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
1632 thread_index, now, vm, node);
1633 if (!s0 && !sm->forwarding_enabled)
1634 next0 = SNAT_IN2OUT_NEXT_DROP;
1639 next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1645 s0 = pool_elt_at_index (
1646 sm->per_thread_data[thread_index].sessions,
1651 b0->flags |= VNET_BUFFER_F_IS_NATED;
1653 old_addr0 = ip0->src_address.as_u32;
1654 ip0->src_address = s0->out2in.addr;
1655 new_addr0 = ip0->src_address.as_u32;
1656 if (!is_output_feature)
1657 vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
1659 sum0 = ip0->checksum;
1660 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1662 src_address /* changed member */);
1663 ip0->checksum = ip_csum_fold (sum0);
1665 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1667 old_port0 = tcp0->src_port;
1668 tcp0->src_port = s0->out2in.port;
1669 new_port0 = tcp0->src_port;
1671 sum0 = tcp0->checksum;
1672 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1674 dst_address /* changed member */);
1675 sum0 = ip_csum_update (sum0, old_port0, new_port0,
1676 ip4_header_t /* cheat */,
1677 length /* changed member */);
1678 tcp0->checksum = ip_csum_fold(sum0);
1682 old_port0 = udp0->src_port;
1683 udp0->src_port = s0->out2in.port;
1688 s0->last_heard = now;
1690 s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
1691 /* Per-user LRU list maintenance */
1692 clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1693 s0->per_user_index);
1694 clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1695 s0->per_user_list_head_index,
1696 s0->per_user_index);
1699 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1700 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1702 snat_in2out_trace_t *t =
1703 vlib_add_trace (vm, node, b0, sizeof (*t));
1704 t->is_slow_path = is_slow_path;
1705 t->sw_if_index = sw_if_index0;
1706 t->next_index = next0;
1707 t->session_index = ~0;
1709 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
1712 pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
1714 if (is_output_feature)
1715 iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
1717 ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
1720 udp1 = ip4_next_header (ip1);
1721 tcp1 = (tcp_header_t *) udp1;
1722 icmp1 = (icmp46_header_t *) udp1;
1724 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
1725 rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1728 if (PREDICT_FALSE(ip1->ttl == 1))
1730 vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1731 icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
1732 ICMP4_time_exceeded_ttl_exceeded_in_transit,
1734 next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1738 proto1 = ip_proto_to_snat_proto (ip1->protocol);
1740 /* Next configured feature, probably ip4-lookup */
1743 if (PREDICT_FALSE (proto1 == ~0))
1745 s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
1746 thread_index, now, vm, node);
1748 next1 = SNAT_IN2OUT_NEXT_DROP;
1752 if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
1754 next1 = icmp_in2out_slow_path
1755 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
1756 next1, now, thread_index, &s1);
1762 if (is_output_feature)
1764 if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(sm, ip1)))
1768 if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
1770 next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1774 if (ip4_is_fragment (ip1))
1776 next1 = SNAT_IN2OUT_NEXT_REASS;
1781 key1.addr = ip1->src_address;
1782 key1.port = udp1->src_port;
1783 key1.protocol = proto1;
1784 key1.fib_index = rx_fib_index1;
1786 kv1.key = key1.as_u64;
1788 if (PREDICT_FALSE(clib_bihash_search_8_8 (
1789 &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0))
1793 if (is_output_feature)
1795 if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
1796 ip1, proto1, udp1->src_port, thread_index)))
1801 if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
1802 ip1, proto1, rx_fib_index1, thread_index)))
1806 next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
1807 &s1, node, next1, thread_index);
1808 if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
1813 next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1819 if (PREDICT_FALSE (value1.value == ~0ULL))
1823 s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1,
1824 thread_index, now, vm, node);
1825 if (!s1 && !sm->forwarding_enabled)
1826 next1 = SNAT_IN2OUT_NEXT_DROP;
1831 next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
1837 s1 = pool_elt_at_index (
1838 sm->per_thread_data[thread_index].sessions,
1843 b1->flags |= VNET_BUFFER_F_IS_NATED;
1845 old_addr1 = ip1->src_address.as_u32;
1846 ip1->src_address = s1->out2in.addr;
1847 new_addr1 = ip1->src_address.as_u32;
1848 if (!is_output_feature)
1849 vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
1851 sum1 = ip1->checksum;
1852 sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1854 src_address /* changed member */);
1855 ip1->checksum = ip_csum_fold (sum1);
1857 if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
1859 old_port1 = tcp1->src_port;
1860 tcp1->src_port = s1->out2in.port;
1861 new_port1 = tcp1->src_port;
1863 sum1 = tcp1->checksum;
1864 sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
1866 dst_address /* changed member */);
1867 sum1 = ip_csum_update (sum1, old_port1, new_port1,
1868 ip4_header_t /* cheat */,
1869 length /* changed member */);
1870 tcp1->checksum = ip_csum_fold(sum1);
1874 old_port1 = udp1->src_port;
1875 udp1->src_port = s1->out2in.port;
1880 s1->last_heard = now;
1882 s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
1883 /* Per-user LRU list maintenance */
1884 clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
1885 s1->per_user_index);
1886 clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
1887 s1->per_user_list_head_index,
1888 s1->per_user_index);
1891 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1892 && (b1->flags & VLIB_BUFFER_IS_TRACED)))
1894 snat_in2out_trace_t *t =
1895 vlib_add_trace (vm, node, b1, sizeof (*t));
1896 t->sw_if_index = sw_if_index1;
1897 t->next_index = next1;
1898 t->session_index = ~0;
1900 t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
1903 pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
1905 /* verify speculative enqueues, maybe switch current next frame */
1906 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
1907 to_next, n_left_to_next,
1908 bi0, bi1, next0, next1);
1911 while (n_left_from > 0 && n_left_to_next > 0)
1919 u32 new_addr0, old_addr0;
1920 u16 old_port0, new_port0;
1921 udp_header_t * udp0;
1922 tcp_header_t * tcp0;
1923 icmp46_header_t * icmp0;
1924 snat_session_key_t key0;
1927 snat_session_t * s0 = 0;
1928 clib_bihash_kv_8_8_t kv0, value0;
1929 u32 iph_offset0 = 0;
1931 /* speculatively enqueue b0 to the current next frame */
1937 n_left_to_next -= 1;
1939 b0 = vlib_get_buffer (vm, bi0);
1940 next0 = SNAT_IN2OUT_NEXT_LOOKUP;
1942 if (is_output_feature)
1943 iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
1945 ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
1948 udp0 = ip4_next_header (ip0);
1949 tcp0 = (tcp_header_t *) udp0;
1950 icmp0 = (icmp46_header_t *) udp0;
1952 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1953 rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
1956 if (PREDICT_FALSE(ip0->ttl == 1))
1958 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1959 icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
1960 ICMP4_time_exceeded_ttl_exceeded_in_transit,
1962 next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
1966 proto0 = ip_proto_to_snat_proto (ip0->protocol);
1968 /* Next configured feature, probably ip4-lookup */
1971 if (PREDICT_FALSE (proto0 == ~0))
1973 s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
1974 thread_index, now, vm, node);
1976 next0 = SNAT_IN2OUT_NEXT_DROP;
1980 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1982 next0 = icmp_in2out_slow_path
1983 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
1984 next0, now, thread_index, &s0);
1990 if (is_output_feature)
1992 if (PREDICT_FALSE(nat_not_translate_output_feature_fwd(sm, ip0)))
1996 if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
1998 next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2002 if (ip4_is_fragment (ip0))
2004 next0 = SNAT_IN2OUT_NEXT_REASS;
2009 key0.addr = ip0->src_address;
2010 key0.port = udp0->src_port;
2011 key0.protocol = proto0;
2012 key0.fib_index = rx_fib_index0;
2014 kv0.key = key0.as_u64;
2016 if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out,
2021 if (is_output_feature)
2023 if (PREDICT_FALSE(nat_not_translate_output_feature(sm,
2024 ip0, proto0, udp0->src_port, thread_index)))
2029 if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2030 ip0, proto0, rx_fib_index0, thread_index)))
2034 next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2035 &s0, node, next0, thread_index);
2037 if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2042 next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2048 if (PREDICT_FALSE (value0.value == ~0ULL))
2052 s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0,
2053 thread_index, now, vm, node);
2054 if (!s0 && !sm->forwarding_enabled)
2055 next0 = SNAT_IN2OUT_NEXT_DROP;
2060 next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
2066 s0 = pool_elt_at_index (
2067 sm->per_thread_data[thread_index].sessions,
2072 b0->flags |= VNET_BUFFER_F_IS_NATED;
2074 old_addr0 = ip0->src_address.as_u32;
2075 ip0->src_address = s0->out2in.addr;
2076 new_addr0 = ip0->src_address.as_u32;
2077 if (!is_output_feature)
2078 vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2080 sum0 = ip0->checksum;
2081 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2083 src_address /* changed member */);
2084 ip0->checksum = ip_csum_fold (sum0);
2086 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2088 old_port0 = tcp0->src_port;
2089 tcp0->src_port = s0->out2in.port;
2090 new_port0 = tcp0->src_port;
2092 sum0 = tcp0->checksum;
2093 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2095 dst_address /* changed member */);
2096 sum0 = ip_csum_update (sum0, old_port0, new_port0,
2097 ip4_header_t /* cheat */,
2098 length /* changed member */);
2099 tcp0->checksum = ip_csum_fold(sum0);
2103 old_port0 = udp0->src_port;
2104 udp0->src_port = s0->out2in.port;
2109 s0->last_heard = now;
2111 s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2112 /* Per-user LRU list maintenance */
2113 clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2114 s0->per_user_index);
2115 clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2116 s0->per_user_list_head_index,
2117 s0->per_user_index);
2120 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2121 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2123 snat_in2out_trace_t *t =
2124 vlib_add_trace (vm, node, b0, sizeof (*t));
2125 t->is_slow_path = is_slow_path;
2126 t->sw_if_index = sw_if_index0;
2127 t->next_index = next0;
2128 t->session_index = ~0;
2130 t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
2133 pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2135 /* verify speculative enqueue, maybe switch current next frame */
2136 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2137 to_next, n_left_to_next,
2141 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2144 vlib_node_increment_counter (vm, stats_node_index,
2145 SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2147 return frame->n_vectors;
2151 snat_in2out_fast_path_fn (vlib_main_t * vm,
2152 vlib_node_runtime_t * node,
2153 vlib_frame_t * frame)
2155 return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
2158 VLIB_REGISTER_NODE (snat_in2out_node) = {
2159 .function = snat_in2out_fast_path_fn,
2160 .name = "nat44-in2out",
2161 .vector_size = sizeof (u32),
2162 .format_trace = format_snat_in2out_trace,
2163 .type = VLIB_NODE_TYPE_INTERNAL,
2165 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2166 .error_strings = snat_in2out_error_strings,
2168 .runtime_data_bytes = sizeof (snat_runtime_t),
2170 .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2172 /* edit / add dispositions here */
2174 [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2175 [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2176 [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2177 [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2178 [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2182 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
2185 snat_in2out_output_fast_path_fn (vlib_main_t * vm,
2186 vlib_node_runtime_t * node,
2187 vlib_frame_t * frame)
2189 return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
2192 VLIB_REGISTER_NODE (snat_in2out_output_node) = {
2193 .function = snat_in2out_output_fast_path_fn,
2194 .name = "nat44-in2out-output",
2195 .vector_size = sizeof (u32),
2196 .format_trace = format_snat_in2out_trace,
2197 .type = VLIB_NODE_TYPE_INTERNAL,
2199 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2200 .error_strings = snat_in2out_error_strings,
2202 .runtime_data_bytes = sizeof (snat_runtime_t),
2204 .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2206 /* edit / add dispositions here */
2208 [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2209 [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2210 [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2211 [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2212 [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2216 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
2217 snat_in2out_output_fast_path_fn);
2220 snat_in2out_slow_path_fn (vlib_main_t * vm,
2221 vlib_node_runtime_t * node,
2222 vlib_frame_t * frame)
2224 return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
2227 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
2228 .function = snat_in2out_slow_path_fn,
2229 .name = "nat44-in2out-slowpath",
2230 .vector_size = sizeof (u32),
2231 .format_trace = format_snat_in2out_trace,
2232 .type = VLIB_NODE_TYPE_INTERNAL,
2234 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2235 .error_strings = snat_in2out_error_strings,
2237 .runtime_data_bytes = sizeof (snat_runtime_t),
2239 .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2241 /* edit / add dispositions here */
2243 [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2244 [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2245 [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2246 [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2247 [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2251 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
2252 snat_in2out_slow_path_fn);
2255 snat_in2out_output_slow_path_fn (vlib_main_t * vm,
2256 vlib_node_runtime_t * node,
2257 vlib_frame_t * frame)
2259 return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
2262 VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
2263 .function = snat_in2out_output_slow_path_fn,
2264 .name = "nat44-in2out-output-slowpath",
2265 .vector_size = sizeof (u32),
2266 .format_trace = format_snat_in2out_trace,
2267 .type = VLIB_NODE_TYPE_INTERNAL,
2269 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2270 .error_strings = snat_in2out_error_strings,
2272 .runtime_data_bytes = sizeof (snat_runtime_t),
2274 .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2276 /* edit / add dispositions here */
2278 [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2279 [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
2280 [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath",
2281 [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2282 [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2286 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
2287 snat_in2out_output_slow_path_fn);
2289 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
2292 nat44_hairpinning_fn (vlib_main_t * vm,
2293 vlib_node_runtime_t * node,
2294 vlib_frame_t * frame)
2296 u32 n_left_from, * from, * to_next;
2297 snat_in2out_next_t next_index;
2298 u32 pkts_processed = 0;
2299 snat_main_t * sm = &snat_main;
2300 vnet_feature_main_t *fm = &feature_main;
2301 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
2302 vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
2304 from = vlib_frame_vector_args (frame);
2305 n_left_from = frame->n_vectors;
2306 next_index = node->cached_next_index;
2308 while (n_left_from > 0)
2312 vlib_get_next_frame (vm, node, next_index,
2313 to_next, n_left_to_next);
2315 while (n_left_from > 0 && n_left_to_next > 0)
2322 udp_header_t * udp0;
2323 tcp_header_t * tcp0;
2325 /* speculatively enqueue b0 to the current next frame */
2331 n_left_to_next -= 1;
2333 b0 = vlib_get_buffer (vm, bi0);
2334 ip0 = vlib_buffer_get_current (b0);
2335 udp0 = ip4_next_header (ip0);
2336 tcp0 = (tcp_header_t *) udp0;
2338 proto0 = ip_proto_to_snat_proto (ip0->protocol);
2340 vnet_get_config_data (&cm->config_main, &b0->current_config_index,
2343 if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0))
2344 next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2346 pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2348 /* verify speculative enqueue, maybe switch current next frame */
2349 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2350 to_next, n_left_to_next,
2354 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2357 vlib_node_increment_counter (vm, nat44_hairpinning_node.index,
2358 SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2360 return frame->n_vectors;
2363 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
2364 .function = nat44_hairpinning_fn,
2365 .name = "nat44-hairpinning",
2366 .vector_size = sizeof (u32),
2367 .type = VLIB_NODE_TYPE_INTERNAL,
2368 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2369 .error_strings = snat_in2out_error_strings,
2372 [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2373 [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2377 VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node,
2378 nat44_hairpinning_fn);
2381 nat44_reass_hairpinning (snat_main_t *sm,
2388 snat_session_key_t key0, sm0;
2389 snat_session_t * s0;
2390 clib_bihash_kv_8_8_t kv0, value0;
2392 u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
2393 u16 new_dst_port0, old_dst_port0;
2394 udp_header_t * udp0;
2395 tcp_header_t * tcp0;
2397 key0.addr = ip0->dst_address;
2399 key0.protocol = proto0;
2400 key0.fib_index = sm->outside_fib_index;
2401 kv0.key = key0.as_u64;
2403 udp0 = ip4_next_header (ip0);
2405 /* Check if destination is static mappings */
2406 if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0))
2408 new_dst_addr0 = sm0.addr.as_u32;
2409 new_dst_port0 = sm0.port;
2410 vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
2412 /* or active sessions */
2415 if (sm->num_workers > 1)
2416 ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread;
2418 ti = sm->num_workers;
2420 if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0))
2423 s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
2424 new_dst_addr0 = s0->in2out.addr.as_u32;
2425 new_dst_port0 = s0->in2out.port;
2426 vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
2430 /* Destination is behind the same NAT, use internal address and port */
2433 old_dst_addr0 = ip0->dst_address.as_u32;
2434 ip0->dst_address.as_u32 = new_dst_addr0;
2435 sum0 = ip0->checksum;
2436 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2437 ip4_header_t, dst_address);
2438 ip0->checksum = ip_csum_fold (sum0);
2440 old_dst_port0 = dport;
2441 if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 &&
2442 ip4_is_first_fragment (ip0)))
2444 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2446 tcp0 = ip4_next_header (ip0);
2447 tcp0->dst = new_dst_port0;
2448 sum0 = tcp0->checksum;
2449 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2450 ip4_header_t, dst_address);
2451 sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
2452 ip4_header_t /* cheat */, length);
2453 tcp0->checksum = ip_csum_fold(sum0);
2457 udp0->dst_port = new_dst_port0;
2463 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2465 tcp0 = ip4_next_header (ip0);
2466 sum0 = tcp0->checksum;
2467 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
2468 ip4_header_t, dst_address);
2469 tcp0->checksum = ip_csum_fold(sum0);
2476 nat44_in2out_reass_node_fn (vlib_main_t * vm,
2477 vlib_node_runtime_t * node,
2478 vlib_frame_t * frame)
2480 u32 n_left_from, *from, *to_next;
2481 snat_in2out_next_t next_index;
2482 u32 pkts_processed = 0;
2483 snat_main_t *sm = &snat_main;
2484 f64 now = vlib_time_now (vm);
2485 u32 thread_index = vlib_get_thread_index ();
2486 snat_main_per_thread_data_t *per_thread_data =
2487 &sm->per_thread_data[thread_index];
2488 u32 *fragments_to_drop = 0;
2489 u32 *fragments_to_loopback = 0;
2491 from = vlib_frame_vector_args (frame);
2492 n_left_from = frame->n_vectors;
2493 next_index = node->cached_next_index;
2495 while (n_left_from > 0)
2499 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2501 while (n_left_from > 0 && n_left_to_next > 0)
2503 u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0;
2508 nat_reass_ip4_t *reass0;
2509 udp_header_t * udp0;
2510 tcp_header_t * tcp0;
2511 snat_session_key_t key0;
2512 clib_bihash_kv_8_8_t kv0, value0;
2513 snat_session_t * s0 = 0;
2514 u16 old_port0, new_port0;
2517 /* speculatively enqueue b0 to the current next frame */
2523 n_left_to_next -= 1;
2525 b0 = vlib_get_buffer (vm, bi0);
2526 next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2528 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2529 rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2532 if (PREDICT_FALSE (nat_reass_is_drop_frag(0)))
2534 next0 = SNAT_IN2OUT_NEXT_DROP;
2535 b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT];
2539 ip0 = (ip4_header_t *) vlib_buffer_get_current (b0);
2540 udp0 = ip4_next_header (ip0);
2541 tcp0 = (tcp_header_t *) udp0;
2542 proto0 = ip_proto_to_snat_proto (ip0->protocol);
2544 reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
2549 &fragments_to_drop);
2551 if (PREDICT_FALSE (!reass0))
2553 next0 = SNAT_IN2OUT_NEXT_DROP;
2554 b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS];
2558 if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2560 key0.addr = ip0->src_address;
2561 key0.port = udp0->src_port;
2562 key0.protocol = proto0;
2563 key0.fib_index = rx_fib_index0;
2564 kv0.key = key0.as_u64;
2566 if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0))
2568 if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
2569 ip0, proto0, rx_fib_index0, thread_index)))
2572 next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
2573 &s0, node, next0, thread_index);
2575 if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
2578 reass0->sess_index = s0 - per_thread_data->sessions;
2582 s0 = pool_elt_at_index (per_thread_data->sessions,
2584 reass0->sess_index = value0.value;
2586 nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
2590 if (PREDICT_FALSE (reass0->sess_index == (u32) ~0))
2592 if (nat_ip4_reass_add_fragment (reass0, bi0))
2594 b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG];
2595 next0 = SNAT_IN2OUT_NEXT_DROP;
2601 s0 = pool_elt_at_index (per_thread_data->sessions,
2602 reass0->sess_index);
2605 old_addr0 = ip0->src_address.as_u32;
2606 ip0->src_address = s0->out2in.addr;
2607 new_addr0 = ip0->src_address.as_u32;
2608 vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
2610 sum0 = ip0->checksum;
2611 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2613 src_address /* changed member */);
2614 ip0->checksum = ip_csum_fold (sum0);
2616 if (PREDICT_FALSE (ip4_is_first_fragment (ip0)))
2618 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2620 old_port0 = tcp0->src_port;
2621 tcp0->src_port = s0->out2in.port;
2622 new_port0 = tcp0->src_port;
2624 sum0 = tcp0->checksum;
2625 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
2627 dst_address /* changed member */);
2628 sum0 = ip_csum_update (sum0, old_port0, new_port0,
2629 ip4_header_t /* cheat */,
2630 length /* changed member */);
2631 tcp0->checksum = ip_csum_fold(sum0);
2635 old_port0 = udp0->src_port;
2636 udp0->src_port = s0->out2in.port;
2642 nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port,
2643 s0->ext_host_port, proto0);
2646 s0->last_heard = now;
2648 s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
2649 /* Per-user LRU list maintenance */
2650 clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
2651 s0->per_user_index);
2652 clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
2653 s0->per_user_list_head_index,
2654 s0->per_user_index);
2657 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2658 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2660 nat44_in2out_reass_trace_t *t =
2661 vlib_add_trace (vm, node, b0, sizeof (*t));
2662 t->cached = cached0;
2663 t->sw_if_index = sw_if_index0;
2664 t->next_index = next0;
2674 pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2676 /* verify speculative enqueue, maybe switch current next frame */
2677 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2678 to_next, n_left_to_next,
2682 if (n_left_from == 0 && vec_len (fragments_to_loopback))
2684 from = vlib_frame_vector_args (frame);
2685 u32 len = vec_len (fragments_to_loopback);
2686 if (len <= VLIB_FRAME_SIZE)
2688 clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len);
2690 vec_reset_length (fragments_to_loopback);
2695 fragments_to_loopback + (len - VLIB_FRAME_SIZE),
2696 sizeof (u32) * VLIB_FRAME_SIZE);
2697 n_left_from = VLIB_FRAME_SIZE;
2698 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
2703 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2706 vlib_node_increment_counter (vm, nat44_in2out_reass_node.index,
2707 SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
2710 nat_send_all_to_node (vm, fragments_to_drop, node,
2711 &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT],
2712 SNAT_IN2OUT_NEXT_DROP);
2714 vec_free (fragments_to_drop);
2715 vec_free (fragments_to_loopback);
2716 return frame->n_vectors;
2719 VLIB_REGISTER_NODE (nat44_in2out_reass_node) = {
2720 .function = nat44_in2out_reass_node_fn,
2721 .name = "nat44-in2out-reass",
2722 .vector_size = sizeof (u32),
2723 .format_trace = format_nat44_in2out_reass_trace,
2724 .type = VLIB_NODE_TYPE_INTERNAL,
2726 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
2727 .error_strings = snat_in2out_error_strings,
2729 .n_next_nodes = SNAT_IN2OUT_N_NEXT,
2731 [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
2732 [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
2733 [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
2734 [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2735 [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
2739 VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node,
2740 nat44_in2out_reass_node_fn);
2742 /**************************/
2743 /*** deterministic mode ***/
2744 /**************************/
2746 snat_det_in2out_node_fn (vlib_main_t * vm,
2747 vlib_node_runtime_t * node,
2748 vlib_frame_t * frame)
2750 u32 n_left_from, * from, * to_next;
2751 snat_in2out_next_t next_index;
2752 u32 pkts_processed = 0;
2753 snat_main_t * sm = &snat_main;
2754 u32 now = (u32) vlib_time_now (vm);
2755 u32 thread_index = vlib_get_thread_index ();
2757 from = vlib_frame_vector_args (frame);
2758 n_left_from = frame->n_vectors;
2759 next_index = node->cached_next_index;
2761 while (n_left_from > 0)
2765 vlib_get_next_frame (vm, node, next_index,
2766 to_next, n_left_to_next);
2768 while (n_left_from >= 4 && n_left_to_next >= 2)
2771 vlib_buffer_t * b0, * b1;
2773 u32 sw_if_index0, sw_if_index1;
2774 ip4_header_t * ip0, * ip1;
2775 ip_csum_t sum0, sum1;
2776 ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1;
2777 u16 old_port0, new_port0, lo_port0, i0;
2778 u16 old_port1, new_port1, lo_port1, i1;
2779 udp_header_t * udp0, * udp1;
2780 tcp_header_t * tcp0, * tcp1;
2782 snat_det_out_key_t key0, key1;
2783 snat_det_map_t * dm0, * dm1;
2784 snat_det_session_t * ses0 = 0, * ses1 = 0;
2785 u32 rx_fib_index0, rx_fib_index1;
2786 icmp46_header_t * icmp0, * icmp1;
2788 /* Prefetch next iteration. */
2790 vlib_buffer_t * p2, * p3;
2792 p2 = vlib_get_buffer (vm, from[2]);
2793 p3 = vlib_get_buffer (vm, from[3]);
2795 vlib_prefetch_buffer_header (p2, LOAD);
2796 vlib_prefetch_buffer_header (p3, LOAD);
2798 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
2799 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
2802 /* speculatively enqueue b0 and b1 to the current next frame */
2803 to_next[0] = bi0 = from[0];
2804 to_next[1] = bi1 = from[1];
2808 n_left_to_next -= 2;
2810 b0 = vlib_get_buffer (vm, bi0);
2811 b1 = vlib_get_buffer (vm, bi1);
2813 next0 = SNAT_IN2OUT_NEXT_LOOKUP;
2814 next1 = SNAT_IN2OUT_NEXT_LOOKUP;
2816 ip0 = vlib_buffer_get_current (b0);
2817 udp0 = ip4_next_header (ip0);
2818 tcp0 = (tcp_header_t *) udp0;
2820 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
2822 if (PREDICT_FALSE(ip0->ttl == 1))
2824 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2825 icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
2826 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2828 next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2832 proto0 = ip_proto_to_snat_proto (ip0->protocol);
2834 if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
2836 rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
2837 icmp0 = (icmp46_header_t *) udp0;
2839 next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
2840 rx_fib_index0, node, next0, thread_index,
2845 dm0 = snat_det_map_by_user(sm, &ip0->src_address);
2846 if (PREDICT_FALSE(!dm0))
2848 clib_warning("no match for internal host %U",
2849 format_ip4_address, &ip0->src_address);
2850 next0 = SNAT_IN2OUT_NEXT_DROP;
2851 b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
2855 snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
2857 key0.ext_host_addr = ip0->dst_address;
2858 key0.ext_host_port = tcp0->dst;
2860 ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
2861 if (PREDICT_FALSE(!ses0))
2863 for (i0 = 0; i0 < dm0->ports_per_host; i0++)
2865 key0.out_port = clib_host_to_net_u16 (lo_port0 +
2866 ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
2868 if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
2871 ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
2874 if (PREDICT_FALSE(!ses0))
2876 /* too many sessions for user, send ICMP error packet */
2878 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2879 icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
2880 ICMP4_destination_unreachable_destination_unreachable_host,
2882 next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2887 new_port0 = ses0->out.out_port;
2889 old_addr0.as_u32 = ip0->src_address.as_u32;
2890 ip0->src_address.as_u32 = new_addr0.as_u32;
2891 vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
2893 sum0 = ip0->checksum;
2894 sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2896 src_address /* changed member */);
2897 ip0->checksum = ip_csum_fold (sum0);
2899 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
2901 if (tcp0->flags & TCP_FLAG_SYN)
2902 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
2903 else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
2904 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2905 else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
2906 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
2907 else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
2908 snat_det_ses_close(dm0, ses0);
2909 else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
2910 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
2911 else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
2912 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
2914 old_port0 = tcp0->src;
2915 tcp0->src = new_port0;
2917 sum0 = tcp0->checksum;
2918 sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
2920 dst_address /* changed member */);
2921 sum0 = ip_csum_update (sum0, old_port0, new_port0,
2922 ip4_header_t /* cheat */,
2923 length /* changed member */);
2924 tcp0->checksum = ip_csum_fold(sum0);
2928 ses0->state = SNAT_SESSION_UDP_ACTIVE;
2929 old_port0 = udp0->src_port;
2930 udp0->src_port = new_port0;
2936 case SNAT_SESSION_UDP_ACTIVE:
2937 ses0->expire = now + sm->udp_timeout;
2939 case SNAT_SESSION_TCP_SYN_SENT:
2940 case SNAT_SESSION_TCP_FIN_WAIT:
2941 case SNAT_SESSION_TCP_CLOSE_WAIT:
2942 case SNAT_SESSION_TCP_LAST_ACK:
2943 ses0->expire = now + sm->tcp_transitory_timeout;
2945 case SNAT_SESSION_TCP_ESTABLISHED:
2946 ses0->expire = now + sm->tcp_established_timeout;
2951 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
2952 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
2954 snat_in2out_trace_t *t =
2955 vlib_add_trace (vm, node, b0, sizeof (*t));
2956 t->is_slow_path = 0;
2957 t->sw_if_index = sw_if_index0;
2958 t->next_index = next0;
2959 t->session_index = ~0;
2961 t->session_index = ses0 - dm0->sessions;
2964 pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
2966 ip1 = vlib_buffer_get_current (b1);
2967 udp1 = ip4_next_header (ip1);
2968 tcp1 = (tcp_header_t *) udp1;
2970 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
2972 if (PREDICT_FALSE(ip1->ttl == 1))
2974 vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2975 icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
2976 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2978 next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
2982 proto1 = ip_proto_to_snat_proto (ip1->protocol);
2984 if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP))
2986 rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1);
2987 icmp1 = (icmp46_header_t *) udp1;
2989 next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1,
2990 rx_fib_index1, node, next1, thread_index,
2995 dm1 = snat_det_map_by_user(sm, &ip1->src_address);
2996 if (PREDICT_FALSE(!dm1))
2998 clib_warning("no match for internal host %U",
2999 format_ip4_address, &ip0->src_address);
3000 next1 = SNAT_IN2OUT_NEXT_DROP;
3001 b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3005 snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
3007 key1.ext_host_addr = ip1->dst_address;
3008 key1.ext_host_port = tcp1->dst;
3010 ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
3011 if (PREDICT_FALSE(!ses1))
3013 for (i1 = 0; i1 < dm1->ports_per_host; i1++)
3015 key1.out_port = clib_host_to_net_u16 (lo_port1 +
3016 ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host));
3018 if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64))
3021 ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1);
3024 if (PREDICT_FALSE(!ses1))
3026 /* too many sessions for user, send ICMP error packet */
3028 vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3029 icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
3030 ICMP4_destination_unreachable_destination_unreachable_host,
3032 next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3037 new_port1 = ses1->out.out_port;
3039 old_addr1.as_u32 = ip1->src_address.as_u32;
3040 ip1->src_address.as_u32 = new_addr1.as_u32;
3041 vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3043 sum1 = ip1->checksum;
3044 sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3046 src_address /* changed member */);
3047 ip1->checksum = ip_csum_fold (sum1);
3049 if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
3051 if (tcp1->flags & TCP_FLAG_SYN)
3052 ses1->state = SNAT_SESSION_TCP_SYN_SENT;
3053 else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT)
3054 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
3055 else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED)
3056 ses1->state = SNAT_SESSION_TCP_FIN_WAIT;
3057 else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT)
3058 snat_det_ses_close(dm1, ses1);
3059 else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3060 ses1->state = SNAT_SESSION_TCP_LAST_ACK;
3061 else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN)
3062 ses1->state = SNAT_SESSION_TCP_ESTABLISHED;
3064 old_port1 = tcp1->src;
3065 tcp1->src = new_port1;
3067 sum1 = tcp1->checksum;
3068 sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32,
3070 dst_address /* changed member */);
3071 sum1 = ip_csum_update (sum1, old_port1, new_port1,
3072 ip4_header_t /* cheat */,
3073 length /* changed member */);
3074 tcp1->checksum = ip_csum_fold(sum1);
3078 ses1->state = SNAT_SESSION_UDP_ACTIVE;
3079 old_port1 = udp1->src_port;
3080 udp1->src_port = new_port1;
3086 case SNAT_SESSION_UDP_ACTIVE:
3087 ses1->expire = now + sm->udp_timeout;
3089 case SNAT_SESSION_TCP_SYN_SENT:
3090 case SNAT_SESSION_TCP_FIN_WAIT:
3091 case SNAT_SESSION_TCP_CLOSE_WAIT:
3092 case SNAT_SESSION_TCP_LAST_ACK:
3093 ses1->expire = now + sm->tcp_transitory_timeout;
3095 case SNAT_SESSION_TCP_ESTABLISHED:
3096 ses1->expire = now + sm->tcp_established_timeout;
3101 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3102 && (b1->flags & VLIB_BUFFER_IS_TRACED)))
3104 snat_in2out_trace_t *t =
3105 vlib_add_trace (vm, node, b1, sizeof (*t));
3106 t->is_slow_path = 0;
3107 t->sw_if_index = sw_if_index1;
3108 t->next_index = next1;
3109 t->session_index = ~0;
3111 t->session_index = ses1 - dm1->sessions;
3114 pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
3116 /* verify speculative enqueues, maybe switch current next frame */
3117 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
3118 to_next, n_left_to_next,
3119 bi0, bi1, next0, next1);
3122 while (n_left_from > 0 && n_left_to_next > 0)
3130 ip4_address_t new_addr0, old_addr0;
3131 u16 old_port0, new_port0, lo_port0, i0;
3132 udp_header_t * udp0;
3133 tcp_header_t * tcp0;
3135 snat_det_out_key_t key0;
3136 snat_det_map_t * dm0;
3137 snat_det_session_t * ses0 = 0;
3139 icmp46_header_t * icmp0;
3141 /* speculatively enqueue b0 to the current next frame */
3147 n_left_to_next -= 1;
3149 b0 = vlib_get_buffer (vm, bi0);
3150 next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3152 ip0 = vlib_buffer_get_current (b0);
3153 udp0 = ip4_next_header (ip0);
3154 tcp0 = (tcp_header_t *) udp0;
3156 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3158 if (PREDICT_FALSE(ip0->ttl == 1))
3160 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3161 icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3162 ICMP4_time_exceeded_ttl_exceeded_in_transit,
3164 next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3168 proto0 = ip_proto_to_snat_proto (ip0->protocol);
3170 if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP))
3172 rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3173 icmp0 = (icmp46_header_t *) udp0;
3175 next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
3176 rx_fib_index0, node, next0, thread_index,
3181 dm0 = snat_det_map_by_user(sm, &ip0->src_address);
3182 if (PREDICT_FALSE(!dm0))
3184 clib_warning("no match for internal host %U",
3185 format_ip4_address, &ip0->src_address);
3186 next0 = SNAT_IN2OUT_NEXT_DROP;
3187 b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3191 snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
3193 key0.ext_host_addr = ip0->dst_address;
3194 key0.ext_host_port = tcp0->dst;
3196 ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
3197 if (PREDICT_FALSE(!ses0))
3199 for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3201 key0.out_port = clib_host_to_net_u16 (lo_port0 +
3202 ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host));
3204 if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64))
3207 ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0);
3210 if (PREDICT_FALSE(!ses0))
3212 /* too many sessions for user, send ICMP error packet */
3214 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3215 icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
3216 ICMP4_destination_unreachable_destination_unreachable_host,
3218 next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3223 new_port0 = ses0->out.out_port;
3225 old_addr0.as_u32 = ip0->src_address.as_u32;
3226 ip0->src_address.as_u32 = new_addr0.as_u32;
3227 vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
3229 sum0 = ip0->checksum;
3230 sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3232 src_address /* changed member */);
3233 ip0->checksum = ip_csum_fold (sum0);
3235 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
3237 if (tcp0->flags & TCP_FLAG_SYN)
3238 ses0->state = SNAT_SESSION_TCP_SYN_SENT;
3239 else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT)
3240 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3241 else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED)
3242 ses0->state = SNAT_SESSION_TCP_FIN_WAIT;
3243 else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT)
3244 snat_det_ses_close(dm0, ses0);
3245 else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT)
3246 ses0->state = SNAT_SESSION_TCP_LAST_ACK;
3247 else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN)
3248 ses0->state = SNAT_SESSION_TCP_ESTABLISHED;
3250 old_port0 = tcp0->src;
3251 tcp0->src = new_port0;
3253 sum0 = tcp0->checksum;
3254 sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32,
3256 dst_address /* changed member */);
3257 sum0 = ip_csum_update (sum0, old_port0, new_port0,
3258 ip4_header_t /* cheat */,
3259 length /* changed member */);
3260 tcp0->checksum = ip_csum_fold(sum0);
3264 ses0->state = SNAT_SESSION_UDP_ACTIVE;
3265 old_port0 = udp0->src_port;
3266 udp0->src_port = new_port0;
3272 case SNAT_SESSION_UDP_ACTIVE:
3273 ses0->expire = now + sm->udp_timeout;
3275 case SNAT_SESSION_TCP_SYN_SENT:
3276 case SNAT_SESSION_TCP_FIN_WAIT:
3277 case SNAT_SESSION_TCP_CLOSE_WAIT:
3278 case SNAT_SESSION_TCP_LAST_ACK:
3279 ses0->expire = now + sm->tcp_transitory_timeout;
3281 case SNAT_SESSION_TCP_ESTABLISHED:
3282 ses0->expire = now + sm->tcp_established_timeout;
3287 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
3288 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3290 snat_in2out_trace_t *t =
3291 vlib_add_trace (vm, node, b0, sizeof (*t));
3292 t->is_slow_path = 0;
3293 t->sw_if_index = sw_if_index0;
3294 t->next_index = next0;
3295 t->session_index = ~0;
3297 t->session_index = ses0 - dm0->sessions;
3300 pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3302 /* verify speculative enqueue, maybe switch current next frame */
3303 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3304 to_next, n_left_to_next,
3308 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3311 vlib_node_increment_counter (vm, snat_det_in2out_node.index,
3312 SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3314 return frame->n_vectors;
3317 VLIB_REGISTER_NODE (snat_det_in2out_node) = {
3318 .function = snat_det_in2out_node_fn,
3319 .name = "nat44-det-in2out",
3320 .vector_size = sizeof (u32),
3321 .format_trace = format_snat_in2out_trace,
3322 .type = VLIB_NODE_TYPE_INTERNAL,
3324 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3325 .error_strings = snat_in2out_error_strings,
3327 .runtime_data_bytes = sizeof (snat_runtime_t),
3331 /* edit / add dispositions here */
3333 [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3334 [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3335 [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3339 VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn);
3342 * Get address and port values to be used for ICMP packet translation
3343 * and create session if needed
3345 * @param[in,out] sm NAT main
3346 * @param[in,out] node NAT node runtime
3347 * @param[in] thread_index thread index
3348 * @param[in,out] b0 buffer containing packet to be translated
3349 * @param[out] p_proto protocol used for matching
3350 * @param[out] p_value address and port after NAT translation
3351 * @param[out] p_dont_translate if packet should not be translated
3352 * @param d optional parameter
3353 * @param e optional parameter
3355 u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
3356 u32 thread_index, vlib_buffer_t *b0,
3357 ip4_header_t *ip0, u8 *p_proto,
3358 snat_session_key_t *p_value,
3359 u8 *p_dont_translate, void *d, void *e)
3361 icmp46_header_t *icmp0;
3365 snat_det_out_key_t key0;
3366 u8 dont_translate = 0;
3368 icmp_echo_header_t *echo0, *inner_echo0 = 0;
3369 ip4_header_t *inner_ip0;
3370 void *l4_header = 0;
3371 icmp46_header_t *inner_icmp0;
3372 snat_det_map_t * dm0 = 0;
3373 ip4_address_t new_addr0;
3375 snat_det_session_t * ses0 = 0;
3376 ip4_address_t in_addr;
3379 icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
3380 echo0 = (icmp_echo_header_t *)(icmp0+1);
3381 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3382 rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
3384 if (!icmp_is_error_message (icmp0))
3386 protocol = SNAT_PROTOCOL_ICMP;
3387 in_addr = ip0->src_address;
3388 in_port = echo0->identifier;
3392 inner_ip0 = (ip4_header_t *)(echo0+1);
3393 l4_header = ip4_next_header (inner_ip0);
3394 protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
3395 in_addr = inner_ip0->dst_address;
3398 case SNAT_PROTOCOL_ICMP:
3399 inner_icmp0 = (icmp46_header_t*)l4_header;
3400 inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
3401 in_port = inner_echo0->identifier;
3403 case SNAT_PROTOCOL_UDP:
3404 case SNAT_PROTOCOL_TCP:
3405 in_port = ((tcp_udp_header_t*)l4_header)->dst_port;
3408 b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL];
3409 next0 = SNAT_IN2OUT_NEXT_DROP;
3414 dm0 = snat_det_map_by_user(sm, &in_addr);
3415 if (PREDICT_FALSE(!dm0))
3417 clib_warning("no match for internal host %U",
3418 format_ip4_address, &in_addr);
3419 if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3420 IP_PROTOCOL_ICMP, rx_fib_index0)))
3425 next0 = SNAT_IN2OUT_NEXT_DROP;
3426 b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
3430 snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
3432 key0.ext_host_addr = ip0->dst_address;
3433 key0.ext_host_port = 0;
3435 ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
3436 if (PREDICT_FALSE(!ses0))
3438 if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
3439 IP_PROTOCOL_ICMP, rx_fib_index0)))
3444 if (icmp0->type != ICMP4_echo_request)
3446 b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3447 next0 = SNAT_IN2OUT_NEXT_DROP;
3450 for (i0 = 0; i0 < dm0->ports_per_host; i0++)
3452 key0.out_port = clib_host_to_net_u16 (lo_port0 +
3453 ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host));
3455 if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64))
3458 ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0);
3461 if (PREDICT_FALSE(!ses0))
3463 next0 = SNAT_IN2OUT_NEXT_DROP;
3464 b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
3469 if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
3470 !icmp_is_error_message (icmp0)))
3472 b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
3473 next0 = SNAT_IN2OUT_NEXT_DROP;
3477 u32 now = (u32) vlib_time_now (sm->vlib_main);
3479 ses0->state = SNAT_SESSION_ICMP_ACTIVE;
3480 ses0->expire = now + sm->icmp_timeout;
3483 *p_proto = protocol;
3486 p_value->addr = new_addr0;
3487 p_value->fib_index = sm->outside_fib_index;
3488 p_value->port = ses0->out.out_port;
3490 *p_dont_translate = dont_translate;
3492 *(snat_det_session_t**)d = ses0;
3494 *(snat_det_map_t**)e = dm0;
3498 /**********************/
3499 /*** worker handoff ***/
3500 /**********************/
3502 snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
3503 vlib_node_runtime_t * node,
3504 vlib_frame_t * frame,
3507 snat_main_t *sm = &snat_main;
3508 vlib_thread_main_t *tm = vlib_get_thread_main ();
3509 u32 n_left_from, *from, *to_next = 0;
3510 static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
3511 static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
3513 vlib_frame_queue_elt_t *hf = 0;
3514 vlib_frame_t *f = 0;
3516 u32 n_left_to_next_worker = 0, *to_next_worker = 0;
3517 u32 next_worker_index = 0;
3518 u32 current_worker_index = ~0;
3519 u32 thread_index = vlib_get_thread_index ();
3523 ASSERT (vec_len (sm->workers));
3527 fq_index = sm->fq_in2out_output_index;
3528 to_node_index = sm->in2out_output_node_index;
3532 fq_index = sm->fq_in2out_index;
3533 to_node_index = sm->in2out_node_index;
3536 if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
3538 vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
3540 vec_validate_init_empty (congested_handoff_queue_by_worker_index,
3541 sm->first_worker_index + sm->num_workers - 1,
3542 (vlib_frame_queue_t *) (~0));
3545 from = vlib_frame_vector_args (frame);
3546 n_left_from = frame->n_vectors;
3548 while (n_left_from > 0)
3561 b0 = vlib_get_buffer (vm, bi0);
3563 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
3564 rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3566 ip0 = vlib_buffer_get_current (b0);
3568 next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
3570 if (PREDICT_FALSE (next_worker_index != thread_index))
3574 if (next_worker_index != current_worker_index)
3577 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3579 hf = vlib_get_worker_handoff_queue_elt (fq_index,
3581 handoff_queue_elt_by_worker_index);
3583 n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
3584 to_next_worker = &hf->buffer_index[hf->n_vectors];
3585 current_worker_index = next_worker_index;
3588 /* enqueue to correct worker thread */
3589 to_next_worker[0] = bi0;
3591 n_left_to_next_worker--;
3593 if (n_left_to_next_worker == 0)
3595 hf->n_vectors = VLIB_FRAME_SIZE;
3596 vlib_put_frame_queue_elt (hf);
3597 current_worker_index = ~0;
3598 handoff_queue_elt_by_worker_index[next_worker_index] = 0;
3605 /* if this is 1st frame */
3608 f = vlib_get_frame_to_node (vm, to_node_index);
3609 to_next = vlib_frame_vector_args (f);
3617 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
3618 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
3620 snat_in2out_worker_handoff_trace_t *t =
3621 vlib_add_trace (vm, node, b0, sizeof (*t));
3622 t->next_worker_index = next_worker_index;
3623 t->do_handoff = do_handoff;
3628 vlib_put_frame_to_node (vm, to_node_index, f);
3631 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
3633 /* Ship frames to the worker nodes */
3634 for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
3636 if (handoff_queue_elt_by_worker_index[i])
3638 hf = handoff_queue_elt_by_worker_index[i];
3640 * It works better to let the handoff node
3641 * rate-adapt, always ship the handoff queue element.
3643 if (1 || hf->n_vectors == hf->last_n_vectors)
3645 vlib_put_frame_queue_elt (hf);
3646 handoff_queue_elt_by_worker_index[i] = 0;
3649 hf->last_n_vectors = hf->n_vectors;
3651 congested_handoff_queue_by_worker_index[i] =
3652 (vlib_frame_queue_t *) (~0);
3655 current_worker_index = ~0;
3656 return frame->n_vectors;
3660 snat_in2out_worker_handoff_fn (vlib_main_t * vm,
3661 vlib_node_runtime_t * node,
3662 vlib_frame_t * frame)
3664 return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
3667 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
3668 .function = snat_in2out_worker_handoff_fn,
3669 .name = "nat44-in2out-worker-handoff",
3670 .vector_size = sizeof (u32),
3671 .format_trace = format_snat_in2out_worker_handoff_trace,
3672 .type = VLIB_NODE_TYPE_INTERNAL,
3681 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
3682 snat_in2out_worker_handoff_fn);
3685 snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
3686 vlib_node_runtime_t * node,
3687 vlib_frame_t * frame)
3689 return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
3692 VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
3693 .function = snat_in2out_output_worker_handoff_fn,
3694 .name = "nat44-in2out-output-worker-handoff",
3695 .vector_size = sizeof (u32),
3696 .format_trace = format_snat_in2out_worker_handoff_trace,
3697 .type = VLIB_NODE_TYPE_INTERNAL,
3706 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
3707 snat_in2out_output_worker_handoff_fn);
3709 static_always_inline int
3710 is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
3712 snat_address_t * ap;
3713 clib_bihash_kv_8_8_t kv, value;
3714 snat_session_key_t m_key;
3716 vec_foreach (ap, sm->addresses)
3718 if (ap->addr.as_u32 == dst_addr->as_u32)
3722 m_key.addr.as_u32 = dst_addr->as_u32;
3723 m_key.fib_index = sm->outside_fib_index;
3726 kv.key = m_key.as_u64;
3727 if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
3734 snat_hairpin_dst_fn (vlib_main_t * vm,
3735 vlib_node_runtime_t * node,
3736 vlib_frame_t * frame)
3738 u32 n_left_from, * from, * to_next;
3739 snat_in2out_next_t next_index;
3740 u32 pkts_processed = 0;
3741 snat_main_t * sm = &snat_main;
3743 from = vlib_frame_vector_args (frame);
3744 n_left_from = frame->n_vectors;
3745 next_index = node->cached_next_index;
3747 while (n_left_from > 0)
3751 vlib_get_next_frame (vm, node, next_index,
3752 to_next, n_left_to_next);
3754 while (n_left_from > 0 && n_left_to_next > 0)
3762 /* speculatively enqueue b0 to the current next frame */
3768 n_left_to_next -= 1;
3770 b0 = vlib_get_buffer (vm, bi0);
3771 next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3772 ip0 = vlib_buffer_get_current (b0);
3774 proto0 = ip_proto_to_snat_proto (ip0->protocol);
3776 vnet_buffer (b0)->snat.flags = 0;
3777 if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
3779 if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
3781 udp_header_t * udp0 = ip4_next_header (ip0);
3782 tcp_header_t * tcp0 = (tcp_header_t *) udp0;
3784 snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
3786 else if (proto0 == SNAT_PROTOCOL_ICMP)
3788 icmp46_header_t * icmp0 = ip4_next_header (ip0);
3790 snat_icmp_hairpinning (sm, b0, ip0, icmp0);
3794 snat_hairpinning_unknown_proto (sm, b0, ip0);
3797 vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
3800 pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3802 /* verify speculative enqueue, maybe switch current next frame */
3803 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3804 to_next, n_left_to_next,
3808 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3811 vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
3812 SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3814 return frame->n_vectors;
3817 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
3818 .function = snat_hairpin_dst_fn,
3819 .name = "nat44-hairpin-dst",
3820 .vector_size = sizeof (u32),
3821 .type = VLIB_NODE_TYPE_INTERNAL,
3822 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3823 .error_strings = snat_in2out_error_strings,
3826 [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
3827 [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
3831 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
3832 snat_hairpin_dst_fn);
3835 snat_hairpin_src_fn (vlib_main_t * vm,
3836 vlib_node_runtime_t * node,
3837 vlib_frame_t * frame)
3839 u32 n_left_from, * from, * to_next;
3840 snat_in2out_next_t next_index;
3841 u32 pkts_processed = 0;
3842 snat_main_t *sm = &snat_main;
3844 from = vlib_frame_vector_args (frame);
3845 n_left_from = frame->n_vectors;
3846 next_index = node->cached_next_index;
3848 while (n_left_from > 0)
3852 vlib_get_next_frame (vm, node, next_index,
3853 to_next, n_left_to_next);
3855 while (n_left_from > 0 && n_left_to_next > 0)
3860 snat_interface_t *i;
3863 /* speculatively enqueue b0 to the current next frame */
3869 n_left_to_next -= 1;
3871 b0 = vlib_get_buffer (vm, bi0);
3872 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3873 next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
3875 pool_foreach (i, sm->output_feature_interfaces,
3877 /* Only packets from NAT inside interface */
3878 if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
3880 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
3881 SNAT_FLAG_HAIRPINNING))
3883 if (PREDICT_TRUE (sm->num_workers > 1))
3884 next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
3886 next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
3892 pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
3894 /* verify speculative enqueue, maybe switch current next frame */
3895 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3896 to_next, n_left_to_next,
3900 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3903 vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
3904 SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
3906 return frame->n_vectors;
3909 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
3910 .function = snat_hairpin_src_fn,
3911 .name = "nat44-hairpin-src",
3912 .vector_size = sizeof (u32),
3913 .type = VLIB_NODE_TYPE_INTERNAL,
3914 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
3915 .error_strings = snat_in2out_error_strings,
3916 .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
3918 [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
3919 [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
3920 [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
3921 [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
3925 VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
3926 snat_hairpin_src_fn);
3929 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
3930 vlib_node_runtime_t * node,
3931 vlib_frame_t * frame)
3933 u32 n_left_from, * from, * to_next;
3934 snat_in2out_next_t next_index;
3935 u32 pkts_processed = 0;
3936 snat_main_t * sm = &snat_main;
3937 u32 stats_node_index;
3939 stats_node_index = snat_in2out_fast_node.index;
3941 from = vlib_frame_vector_args (frame);
3942 n_left_from = frame->n_vectors;
3943 next_index = node->cached_next_index;
3945 while (n_left_from > 0)
3949 vlib_get_next_frame (vm, node, next_index,
3950 to_next, n_left_to_next);
3952 while (n_left_from > 0 && n_left_to_next > 0)
3960 u32 new_addr0, old_addr0;
3961 u16 old_port0, new_port0;
3962 udp_header_t * udp0;
3963 tcp_header_t * tcp0;
3964 icmp46_header_t * icmp0;
3965 snat_session_key_t key0, sm0;
3969 /* speculatively enqueue b0 to the current next frame */
3975 n_left_to_next -= 1;
3977 b0 = vlib_get_buffer (vm, bi0);
3978 next0 = SNAT_IN2OUT_NEXT_LOOKUP;
3980 ip0 = vlib_buffer_get_current (b0);
3981 udp0 = ip4_next_header (ip0);
3982 tcp0 = (tcp_header_t *) udp0;
3983 icmp0 = (icmp46_header_t *) udp0;
3985 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
3986 rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
3988 if (PREDICT_FALSE(ip0->ttl == 1))
3990 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
3991 icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
3992 ICMP4_time_exceeded_ttl_exceeded_in_transit,
3994 next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
3998 proto0 = ip_proto_to_snat_proto (ip0->protocol);
4000 if (PREDICT_FALSE (proto0 == ~0))
4003 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
4005 next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0,
4006 rx_fib_index0, node, next0, ~0, 0, 0);
4010 key0.addr = ip0->src_address;
4011 key0.protocol = proto0;
4012 key0.port = udp0->src_port;
4013 key0.fib_index = rx_fib_index0;
4015 if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0))
4017 b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
4018 next0= SNAT_IN2OUT_NEXT_DROP;
4022 new_addr0 = sm0.addr.as_u32;
4023 new_port0 = sm0.port;
4024 vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
4025 old_addr0 = ip0->src_address.as_u32;
4026 ip0->src_address.as_u32 = new_addr0;
4028 sum0 = ip0->checksum;
4029 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4031 src_address /* changed member */);
4032 ip0->checksum = ip_csum_fold (sum0);
4034 if (PREDICT_FALSE(new_port0 != udp0->dst_port))
4036 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4038 old_port0 = tcp0->src_port;
4039 tcp0->src_port = new_port0;
4041 sum0 = tcp0->checksum;
4042 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4044 dst_address /* changed member */);
4045 sum0 = ip_csum_update (sum0, old_port0, new_port0,
4046 ip4_header_t /* cheat */,
4047 length /* changed member */);
4048 tcp0->checksum = ip_csum_fold(sum0);
4052 old_port0 = udp0->src_port;
4053 udp0->src_port = new_port0;
4059 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
4061 sum0 = tcp0->checksum;
4062 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
4064 dst_address /* changed member */);
4065 tcp0->checksum = ip_csum_fold(sum0);
4070 snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
4073 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
4074 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
4076 snat_in2out_trace_t *t =
4077 vlib_add_trace (vm, node, b0, sizeof (*t));
4078 t->sw_if_index = sw_if_index0;
4079 t->next_index = next0;
4082 pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
4084 /* verify speculative enqueue, maybe switch current next frame */
4085 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
4086 to_next, n_left_to_next,
4090 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
4093 vlib_node_increment_counter (vm, stats_node_index,
4094 SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
4096 return frame->n_vectors;
4100 VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
4101 .function = snat_in2out_fast_static_map_fn,
4102 .name = "nat44-in2out-fast",
4103 .vector_size = sizeof (u32),
4104 .format_trace = format_snat_in2out_fast_trace,
4105 .type = VLIB_NODE_TYPE_INTERNAL,
4107 .n_errors = ARRAY_LEN(snat_in2out_error_strings),
4108 .error_strings = snat_in2out_error_strings,
4110 .runtime_data_bytes = sizeof (snat_runtime_t),
4112 .n_next_nodes = SNAT_IN2OUT_N_NEXT,
4114 /* edit / add dispositions here */
4116 [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
4117 [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
4118 [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath",
4119 [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
4120 [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass",
4124 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);