2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * @brief NAT44 hairpinning
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/fib/ip4_fib.h>
24 #include <nat/nat_inlines.h>
28 SNAT_HAIRPIN_SRC_NEXT_DROP,
29 SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
30 SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
31 SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
32 SNAT_HAIRPIN_SRC_N_NEXT,
33 } snat_hairpin_src_next_t;
37 NAT_HAIRPIN_NEXT_LOOKUP,
38 NAT_HAIRPIN_NEXT_DROP,
48 } nat_hairpin_trace_t;
51 format_nat_hairpin_trace (u8 * s, va_list * args)
53 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
54 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
55 nat_hairpin_trace_t *t = va_arg (*args, nat_hairpin_trace_t *);
58 format (s, "new dst addr %U port %u fib-index %u", format_ip4_address,
59 &t->addr, clib_net_to_host_u16 (t->port), t->fib_index);
60 if (~0 == t->session_index)
62 s = format (s, " is-static-mapping");
66 s = format (s, " session-index %u", t->session_index);
72 extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
74 static_always_inline int
75 is_hairpinning (snat_main_t * sm, ip4_address_t * dst_addr)
78 clib_bihash_kv_8_8_t kv, value;
81 vec_foreach (ap, sm->addresses)
83 if (ap->addr.as_u32 == dst_addr->as_u32)
88 init_nat_k (&kv, *dst_addr, 0, 0, 0);
89 if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
95 #ifndef CLIB_MARCH_VARIANT
97 snat_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node, snat_main_t *sm,
98 vlib_buffer_t *b0, ip4_header_t *ip0, udp_header_t *udp0,
99 tcp_header_t *tcp0, u32 proto0, int do_trace)
101 snat_session_t *s0 = NULL;
102 clib_bihash_kv_8_8_t kv0, value0;
104 u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si = ~0;
105 u16 new_dst_port0 = ~0, old_dst_port0;
107 ip4_address_t sm0_addr;
110 u32 old_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
111 /* Check if destination is static mappings */
112 if (!snat_static_mapping_match
113 (sm, ip0->dst_address, udp0->dst_port, sm->outside_fib_index, proto0,
114 &sm0_addr, &sm0_port, &sm0_fib_index, 1, 0, 0, 0, 0, 0, 0))
116 new_dst_addr0 = sm0_addr.as_u32;
117 new_dst_port0 = sm0_port;
118 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm0_fib_index;
120 /* or active session */
123 if (sm->num_workers > 1)
125 (clib_net_to_host_u16 (udp0->dst_port) -
126 1024) / sm->port_per_thread;
128 ti = sm->num_workers;
130 init_nat_k (&kv0, ip0->dst_address, udp0->dst_port,
131 sm->outside_fib_index, proto0);
132 rv = clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0);
139 si = nat_value_get_session_index (&value0);
140 s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
141 new_dst_addr0 = s0->in2out.addr.as_u32;
142 new_dst_port0 = s0->in2out.port;
143 vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
146 /* Check if anything has changed and if not, then return 0. This
147 helps avoid infinite loop, repeating the three nodes
148 nat44-hairpinning-->ip4-lookup-->ip4-local, in case nothing has
150 old_dst_addr0 = ip0->dst_address.as_u32;
151 old_dst_port0 = tcp0->dst;
152 if (new_dst_addr0 == old_dst_addr0
153 && new_dst_port0 == old_dst_port0
154 && vnet_buffer (b0)->sw_if_index[VLIB_TX] == old_sw_if_index)
157 /* Destination is behind the same NAT, use internal address and port */
160 old_dst_addr0 = ip0->dst_address.as_u32;
161 ip0->dst_address.as_u32 = new_dst_addr0;
162 sum0 = ip0->checksum;
163 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
164 ip4_header_t, dst_address);
165 ip0->checksum = ip_csum_fold (sum0);
167 old_dst_port0 = tcp0->dst;
168 if (PREDICT_TRUE (new_dst_port0 != old_dst_port0))
170 if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
172 tcp0->dst = new_dst_port0;
173 sum0 = tcp0->checksum;
174 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
175 ip4_header_t, dst_address);
176 sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
177 ip4_header_t /* cheat */ , length);
178 tcp0->checksum = ip_csum_fold (sum0);
182 udp0->dst_port = new_dst_port0;
188 if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
190 sum0 = tcp0->checksum;
191 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
192 ip4_header_t, dst_address);
193 tcp0->checksum = ip_csum_fold (sum0);
201 if (do_trace && PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
202 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
204 nat_hairpin_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
205 t->addr.as_u32 = new_dst_addr0;
206 t->port = new_dst_port0;
207 t->fib_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
210 t->session_index = si;
214 t->session_index = ~0;
221 #ifndef CLIB_MARCH_VARIANT
223 snat_icmp_hairpinning (snat_main_t *sm, vlib_buffer_t *b0, ip4_header_t *ip0,
224 icmp46_header_t *icmp0)
226 clib_bihash_kv_8_8_t kv0, value0;
227 u32 old_dst_addr0, new_dst_addr0;
228 u32 old_addr0, new_addr0;
229 u16 old_port0, new_port0;
230 u16 old_checksum0, new_checksum0;
234 snat_static_mapping_t *m0;
236 if (icmp_type_is_error_message
237 (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
239 ip4_header_t *inner_ip0 = 0;
240 tcp_udp_header_t *l4_header = 0;
242 inner_ip0 = (ip4_header_t *) ((icmp_echo_header_t *) (icmp0 + 1) + 1);
243 l4_header = ip4_next_header (inner_ip0);
244 u32 protocol = ip_proto_to_nat_proto (inner_ip0->protocol);
246 if (protocol != NAT_PROTOCOL_TCP && protocol != NAT_PROTOCOL_UDP)
249 init_nat_k (&kv0, ip0->dst_address, l4_header->src_port,
250 sm->outside_fib_index, protocol);
251 if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
253 si = nat_value_get_session_index (&value0);
254 s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
255 new_dst_addr0 = s0->in2out.addr.as_u32;
256 vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
258 /* update inner source IP address */
259 old_addr0 = inner_ip0->src_address.as_u32;
260 inner_ip0->src_address.as_u32 = new_dst_addr0;
261 new_addr0 = inner_ip0->src_address.as_u32;
262 sum0 = icmp0->checksum;
263 sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
265 icmp0->checksum = ip_csum_fold (sum0);
267 /* update inner IP header checksum */
268 old_checksum0 = inner_ip0->checksum;
269 sum0 = inner_ip0->checksum;
270 sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
272 inner_ip0->checksum = ip_csum_fold (sum0);
273 new_checksum0 = inner_ip0->checksum;
274 sum0 = icmp0->checksum;
275 sum0 = ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t,
277 icmp0->checksum = ip_csum_fold (sum0);
279 /* update inner source port */
280 old_port0 = l4_header->src_port;
281 l4_header->src_port = s0->in2out.port;
282 new_port0 = l4_header->src_port;
283 sum0 = icmp0->checksum;
284 sum0 = ip_csum_update (sum0, old_port0, new_port0, tcp_udp_header_t,
286 icmp0->checksum = ip_csum_fold (sum0);
290 init_nat_k (&kv0, ip0->dst_address, 0, sm->outside_fib_index, 0);
291 if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv0,
294 icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1);
295 u16 icmp_id0 = echo0->identifier;
296 init_nat_k (&kv0, ip0->dst_address, icmp_id0, sm->outside_fib_index,
298 if (sm->num_workers > 1)
300 (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread;
302 ti = sm->num_workers;
303 int rv = clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0);
306 si = nat_value_get_session_index (&value0);
307 s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
308 new_dst_addr0 = s0->in2out.addr.as_u32;
309 vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
310 echo0->identifier = s0->in2out.port;
311 sum0 = icmp0->checksum;
312 sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
313 icmp_echo_header_t, identifier);
314 icmp0->checksum = ip_csum_fold (sum0);
321 m0 = pool_elt_at_index (sm->static_mappings, value0.value);
323 new_dst_addr0 = m0->local_addr.as_u32;
324 if (vnet_buffer (b0)->sw_if_index[VLIB_TX] == ~0)
325 vnet_buffer (b0)->sw_if_index[VLIB_TX] = m0->fib_index;
328 /* Destination is behind the same NAT, use internal address and port */
331 old_dst_addr0 = ip0->dst_address.as_u32;
332 ip0->dst_address.as_u32 = new_dst_addr0;
333 sum0 = ip0->checksum;
334 sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
335 ip4_header_t, dst_address);
336 ip0->checksum = ip_csum_fold (sum0);
342 #ifndef CLIB_MARCH_VARIANT
344 nat_hairpinning_sm_unknown_proto (snat_main_t * sm,
345 vlib_buffer_t * b, ip4_header_t * ip)
347 clib_bihash_kv_8_8_t kv, value;
348 snat_static_mapping_t *m;
349 u32 old_addr, new_addr;
352 init_nat_k (&kv, ip->dst_address, 0, 0, 0);
353 if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
356 m = pool_elt_at_index (sm->static_mappings, value.value);
358 old_addr = ip->dst_address.as_u32;
359 new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
361 sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
362 ip->checksum = ip_csum_fold (sum);
364 if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
365 vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index;
370 nat44_hairpinning_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
373 u32 n_left_from, *from, *to_next;
374 nat_hairpin_next_t next_index;
375 snat_main_t *sm = &snat_main;
376 vnet_feature_main_t *fm = &feature_main;
377 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
378 vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
380 from = vlib_frame_vector_args (frame);
381 n_left_from = frame->n_vectors;
382 next_index = node->cached_next_index;
384 while (n_left_from > 0)
388 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
390 while (n_left_from > 0 && n_left_to_next > 0)
401 /* speculatively enqueue b0 to the current next frame */
409 b0 = vlib_get_buffer (vm, bi0);
410 ip0 = vlib_buffer_get_current (b0);
411 udp0 = ip4_next_header (ip0);
412 tcp0 = (tcp_header_t *) udp0;
413 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
415 proto0 = ip_proto_to_nat_proto (ip0->protocol);
417 vnet_get_config_data (&cm->config_main, &b0->current_config_index,
420 if (snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, proto0,
422 next0 = NAT_HAIRPIN_NEXT_LOOKUP;
424 if (next0 != NAT_HAIRPIN_NEXT_DROP)
426 vlib_increment_simple_counter (&sm->counters.hairpinning,
427 vm->thread_index, sw_if_index0,
431 /* verify speculative enqueue, maybe switch current next frame */
432 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
433 to_next, n_left_to_next,
437 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
440 return frame->n_vectors;
443 VLIB_NODE_FN (nat44_hairpinning_node) (vlib_main_t * vm,
444 vlib_node_runtime_t * node,
445 vlib_frame_t * frame)
447 return nat44_hairpinning_fn_inline (vm, node, frame);
451 VLIB_REGISTER_NODE (nat44_hairpinning_node) = {
452 .name = "nat44-hairpinning",
453 .vector_size = sizeof (u32),
454 .type = VLIB_NODE_TYPE_INTERNAL,
455 .format_trace = format_nat_hairpin_trace,
456 .n_next_nodes = NAT_HAIRPIN_N_NEXT,
458 [NAT_HAIRPIN_NEXT_DROP] = "error-drop",
459 [NAT_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup",
465 snat_hairpin_dst_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
468 u32 n_left_from, *from, *to_next;
469 nat_hairpin_next_t next_index;
470 snat_main_t *sm = &snat_main;
472 from = vlib_frame_vector_args (frame);
473 n_left_from = frame->n_vectors;
474 next_index = node->cached_next_index;
476 while (n_left_from > 0)
480 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
482 while (n_left_from > 0 && n_left_to_next > 0)
491 /* speculatively enqueue b0 to the current next frame */
499 b0 = vlib_get_buffer (vm, bi0);
500 next0 = NAT_HAIRPIN_NEXT_LOOKUP;
501 ip0 = vlib_buffer_get_current (b0);
502 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
504 proto0 = ip_proto_to_nat_proto (ip0->protocol);
506 vnet_buffer (b0)->snat.flags = 0;
507 if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
509 if (proto0 == NAT_PROTOCOL_TCP || proto0 == NAT_PROTOCOL_UDP)
511 udp_header_t *udp0 = ip4_next_header (ip0);
512 tcp_header_t *tcp0 = (tcp_header_t *) udp0;
514 snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, proto0,
517 else if (proto0 == NAT_PROTOCOL_ICMP)
519 icmp46_header_t *icmp0 = ip4_next_header (ip0);
521 snat_icmp_hairpinning (sm, b0, ip0, icmp0);
525 nat_hairpinning_sm_unknown_proto (sm, b0, ip0);
528 vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
532 if (next0 != NAT_HAIRPIN_NEXT_DROP)
534 vlib_increment_simple_counter (&sm->counters.hairpinning,
535 vm->thread_index, sw_if_index0,
539 /* verify speculative enqueue, maybe switch current next frame */
540 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
541 to_next, n_left_to_next,
545 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
548 return frame->n_vectors;
551 VLIB_NODE_FN (snat_hairpin_dst_node) (vlib_main_t * vm,
552 vlib_node_runtime_t * node,
553 vlib_frame_t * frame)
555 return snat_hairpin_dst_fn_inline (vm, node, frame);
559 VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
560 .name = "nat44-hairpin-dst",
561 .vector_size = sizeof (u32),
562 .type = VLIB_NODE_TYPE_INTERNAL,
563 .format_trace = format_nat_hairpin_trace,
564 .n_next_nodes = NAT_HAIRPIN_N_NEXT,
566 [NAT_HAIRPIN_NEXT_DROP] = "error-drop",
567 [NAT_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup",
573 snat_hairpin_src_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
576 u32 n_left_from, *from, *to_next;
577 snat_hairpin_src_next_t next_index;
578 snat_main_t *sm = &snat_main;
580 from = vlib_frame_vector_args (frame);
581 n_left_from = frame->n_vectors;
582 next_index = node->cached_next_index;
584 while (n_left_from > 0)
588 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
590 while (n_left_from > 0 && n_left_to_next > 0)
598 /* speculatively enqueue b0 to the current next frame */
606 b0 = vlib_get_buffer (vm, bi0);
607 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
608 vnet_feature_next (&next0, b0);
611 pool_foreach (i, sm->output_feature_interfaces)
613 /* Only packets from NAT inside interface */
614 if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index))
616 if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
617 SNAT_FLAG_HAIRPINNING))
619 if (PREDICT_TRUE (sm->num_workers > 1))
620 next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
622 next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
629 if (next0 != SNAT_HAIRPIN_SRC_NEXT_DROP)
631 vlib_increment_simple_counter (&sm->counters.hairpinning,
632 vm->thread_index, sw_if_index0,
636 /* verify speculative enqueue, maybe switch current next frame */
637 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
638 to_next, n_left_to_next,
642 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
645 return frame->n_vectors;
648 VLIB_NODE_FN (snat_hairpin_src_node) (vlib_main_t * vm,
649 vlib_node_runtime_t * node,
650 vlib_frame_t * frame)
652 return snat_hairpin_src_fn_inline (vm, node, frame);
656 VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
657 .name = "nat44-hairpin-src",
658 .vector_size = sizeof (u32),
659 .type = VLIB_NODE_TYPE_INTERNAL,
660 .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
662 [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
663 [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output",
664 [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
665 [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff",
671 * fd.io coding-style-patch-verification: ON
674 * eval: (c-set-style "gnu")