2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vlib/vlib.h>
17 #include <vnet/vnet.h>
18 #include <vnet/pg/pg.h>
19 #include <vnet/handoff.h>
21 #include <vnet/ip/ip.h>
22 #include <vnet/ip/udp.h>
23 #include <vnet/ethernet/ethernet.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <snat/snat.h>
26 #include <snat/snat_ipfix_logging.h>
28 #include <vppinfra/hash.h>
29 #include <vppinfra/error.h>
30 #include <vppinfra/elog.h>
36 } snat_out2in_trace_t;
39 u32 next_worker_index;
41 } snat_out2in_worker_handoff_trace_t;
43 /* packet trace format function */
44 static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
46 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
47 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
48 snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
50 s = format (s, "SNAT_OUT2IN: sw_if_index %d, next index %d, session index %d",
51 t->sw_if_index, t->next_index, t->session_index);
55 static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
57 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
58 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
59 snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
61 s = format (s, "SNAT_OUT2IN_FAST: sw_if_index %d, next index %d",
62 t->sw_if_index, t->next_index);
66 static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
68 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
69 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
70 snat_out2in_worker_handoff_trace_t * t =
71 va_arg (*args, snat_out2in_worker_handoff_trace_t *);
74 m = t->do_handoff ? "next worker" : "same worker";
75 s = format (s, "SNAT_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
80 vlib_node_registration_t snat_out2in_node;
81 vlib_node_registration_t snat_out2in_fast_node;
82 vlib_node_registration_t snat_out2in_worker_handoff_node;
84 #define foreach_snat_out2in_error \
85 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
86 _(OUT2IN_PACKETS, "Good out2in packets processed") \
87 _(BAD_ICMP_TYPE, "icmp type not echo-reply") \
88 _(NO_TRANSLATION, "No translation")
91 #define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
92 foreach_snat_out2in_error
95 } snat_out2in_error_t;
97 static char * snat_out2in_error_strings[] = {
98 #define _(sym,string) string,
99 foreach_snat_out2in_error
104 SNAT_OUT2IN_NEXT_DROP,
105 SNAT_OUT2IN_NEXT_LOOKUP,
106 SNAT_OUT2IN_NEXT_ICMP_ERROR,
108 } snat_out2in_next_t;
111 * @brief Create session for static mapping.
113 * Create NAT session initiated by host from external network with static
116 * @param sm SNAT main.
117 * @param b0 Vlib buffer.
118 * @param in2out In2out SNAT session key.
119 * @param out2in Out2in SNAT session key.
120 * @param node Vlib node.
122 * @returns SNAT session if successfully created otherwise 0.
124 static inline snat_session_t *
125 create_session_for_static_mapping (snat_main_t *sm,
127 snat_session_key_t in2out,
128 snat_session_key_t out2in,
129 vlib_node_runtime_t * node,
133 snat_user_key_t user_key;
135 clib_bihash_kv_8_8_t kv0, value0;
136 dlist_elt_t * per_user_translation_list_elt;
137 dlist_elt_t * per_user_list_head_elt;
139 user_key.addr = in2out.addr;
140 user_key.fib_index = in2out.fib_index;
141 kv0.key = user_key.as_u64;
143 /* Ever heard of the "user" = inside ip4 address before? */
144 if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
146 /* no, make a new one */
147 pool_get (sm->per_thread_data[cpu_index].users, u);
148 memset (u, 0, sizeof (*u));
149 u->addr = in2out.addr;
151 pool_get (sm->per_thread_data[cpu_index].list_pool,
152 per_user_list_head_elt);
154 u->sessions_per_user_list_head_index = per_user_list_head_elt -
155 sm->per_thread_data[cpu_index].list_pool;
157 clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
158 u->sessions_per_user_list_head_index);
160 kv0.value = u - sm->per_thread_data[cpu_index].users;
163 clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
165 /* add non-traslated packets worker lookup */
166 kv0.value = cpu_index;
167 clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
171 u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
175 pool_get (sm->per_thread_data[cpu_index].sessions, s);
176 memset (s, 0, sizeof (*s));
178 s->outside_address_index = ~0;
179 s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
180 u->nstaticsessions++;
182 /* Create list elts */
183 pool_get (sm->per_thread_data[cpu_index].list_pool,
184 per_user_translation_list_elt);
185 clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
186 per_user_translation_list_elt -
187 sm->per_thread_data[cpu_index].list_pool);
189 per_user_translation_list_elt->value =
190 s - sm->per_thread_data[cpu_index].sessions;
192 per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool;
193 s->per_user_list_head_index = u->sessions_per_user_list_head_index;
195 clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
196 s->per_user_list_head_index,
197 per_user_translation_list_elt -
198 sm->per_thread_data[cpu_index].list_pool);
202 s->in2out.protocol = out2in.protocol;
204 /* Add to translation hashes */
205 kv0.key = s->in2out.as_u64;
206 kv0.value = s - sm->per_thread_data[cpu_index].sessions;
207 if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
208 clib_warning ("in2out key add failed");
210 kv0.key = s->out2in.as_u64;
211 kv0.value = s - sm->per_thread_data[cpu_index].sessions;
213 if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
214 clib_warning ("out2in key add failed");
217 snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32,
218 s->out2in.addr.as_u32,
222 s->in2out.fib_index);
227 u16 src_port, dst_port;
230 static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
233 icmp46_header_t * icmp0,
236 vlib_node_runtime_t * node,
239 snat_session_t ** p_s0)
241 snat_session_key_t key0, sm0;
242 icmp_echo_header_t *echo0, *inner_echo0 = 0;
243 ip4_header_t *inner_ip0 = 0;
245 icmp46_header_t *inner_icmp0;
246 clib_bihash_kv_8_8_t kv0, value0;
247 snat_session_t * s0 = 0;
248 u32 new_addr0, old_addr0;
249 u16 old_id0, new_id0;
252 snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
253 u8 is_error_message = 0;
255 echo0 = (icmp_echo_header_t *)(icmp0+1);
257 key0.addr = ip0->dst_address;
258 key0.fib_index = rx_fib_index0;
262 case ICMP4_destination_unreachable:
263 case ICMP4_time_exceeded:
264 case ICMP4_parameter_problem:
265 case ICMP4_source_quench:
267 case ICMP4_alternate_host_address:
268 is_error_message = 1;
271 if (!is_error_message)
273 if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply))
275 b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE];
276 next0 = SNAT_OUT2IN_NEXT_DROP;
279 key0.protocol = SNAT_PROTOCOL_ICMP;
280 key0.port = echo0->identifier;
284 inner_ip0 = (ip4_header_t *)(echo0+1);
285 l4_header = ip4_next_header (inner_ip0);
286 key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol);
287 switch (key0.protocol)
289 case SNAT_PROTOCOL_ICMP:
290 inner_icmp0 = (icmp46_header_t*)l4_header;
291 inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1);
292 key0.port = inner_echo0->identifier;
294 case SNAT_PROTOCOL_UDP:
295 case SNAT_PROTOCOL_TCP:
296 key0.port = ((tcp_udp_header_t*)l4_header)->src_port;
299 b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL];
300 next0 = SNAT_OUT2IN_NEXT_DROP;
305 kv0.key = key0.as_u64;
307 if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
309 /* Try to match static mapping by external address and port,
310 destination address and port in packet */
311 if (snat_static_mapping_match(sm, key0, &sm0, 1))
313 ip4_address_t * first_int_addr;
315 if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
318 ip4_interface_first_address (sm->ip4_main, sw_if_index0,
319 0 /* just want the address */);
320 rt->cached_sw_if_index = sw_if_index0;
322 rt->cached_ip4_address = first_int_addr->as_u32;
324 rt->cached_ip4_address = 0;
327 /* Don't NAT packet aimed at the intfc address */
328 if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address))
331 b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
332 next0 = SNAT_OUT2IN_NEXT_DROP;
336 if (is_error_message)
338 next0 = SNAT_OUT2IN_NEXT_DROP;
342 /* Create session initiated by host from external network */
343 s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
348 next0 = SNAT_OUT2IN_NEXT_DROP;
353 s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
356 sum0 = ip_incremental_checksum (0, icmp0,
357 ntohs(ip0->length) - ip4_header_bytes (ip0));
358 checksum0 = ~ip_csum_fold (sum0);
359 if (checksum0 != 0 && checksum0 != 0xffff)
361 next0 = SNAT_OUT2IN_NEXT_DROP;
365 old_addr0 = ip0->dst_address.as_u32;
366 ip0->dst_address = s0->in2out.addr;
367 new_addr0 = ip0->dst_address.as_u32;
368 vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
370 sum0 = ip0->checksum;
371 sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
372 dst_address /* changed member */);
373 ip0->checksum = ip_csum_fold (sum0);
375 if (!is_error_message)
377 old_id0 = echo0->identifier;
378 new_id0 = s0->in2out.port;
379 echo0->identifier = new_id0;
381 sum0 = icmp0->checksum;
382 sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
383 identifier /* changed member */);
384 icmp0->checksum = ip_csum_fold (sum0);
388 if (!ip4_header_checksum_is_valid (inner_ip0))
390 next0 = SNAT_OUT2IN_NEXT_DROP;
394 old_addr0 = inner_ip0->src_address.as_u32;
395 inner_ip0->src_address = s0->in2out.addr;
396 new_addr0 = inner_ip0->src_address.as_u32;
398 sum0 = icmp0->checksum;
399 sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
400 src_address /* changed member */);
401 icmp0->checksum = ip_csum_fold (sum0);
403 switch (key0.protocol)
405 case SNAT_PROTOCOL_ICMP:
406 old_id0 = inner_echo0->identifier;
407 new_id0 = s0->in2out.port;
408 inner_echo0->identifier = new_id0;
410 sum0 = icmp0->checksum;
411 sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
413 icmp0->checksum = ip_csum_fold (sum0);
415 case SNAT_PROTOCOL_UDP:
416 case SNAT_PROTOCOL_TCP:
417 old_id0 = ((tcp_udp_header_t*)l4_header)->src_port;
418 new_id0 = s0->in2out.port;
419 ((tcp_udp_header_t*)l4_header)->src_port = new_id0;
421 sum0 = icmp0->checksum;
422 sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t,
424 icmp0->checksum = ip_csum_fold (sum0);
432 s0->last_heard = now;
434 s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
435 /* Per-user LRU list maintenance for dynamic translation */
436 if (!snat_is_session_static (s0))
438 clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
440 clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
441 s0->per_user_list_head_index,
451 snat_out2in_node_fn (vlib_main_t * vm,
452 vlib_node_runtime_t * node,
453 vlib_frame_t * frame)
455 u32 n_left_from, * from, * to_next;
456 snat_out2in_next_t next_index;
457 u32 pkts_processed = 0;
458 snat_main_t * sm = &snat_main;
459 f64 now = vlib_time_now (vm);
460 u32 cpu_index = os_get_cpu_number ();
462 from = vlib_frame_vector_args (frame);
463 n_left_from = frame->n_vectors;
464 next_index = node->cached_next_index;
466 while (n_left_from > 0)
470 vlib_get_next_frame (vm, node, next_index,
471 to_next, n_left_to_next);
473 while (n_left_from >= 4 && n_left_to_next >= 2)
476 vlib_buffer_t * b0, * b1;
477 u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
478 u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
479 u32 sw_if_index0, sw_if_index1;
480 ip4_header_t * ip0, *ip1;
481 ip_csum_t sum0, sum1;
482 u32 new_addr0, old_addr0;
483 u16 new_port0, old_port0;
484 u32 new_addr1, old_addr1;
485 u16 new_port1, old_port1;
486 udp_header_t * udp0, * udp1;
487 tcp_header_t * tcp0, * tcp1;
488 icmp46_header_t * icmp0, * icmp1;
489 snat_session_key_t key0, key1, sm0, sm1;
490 u32 rx_fib_index0, rx_fib_index1;
492 snat_session_t * s0 = 0, * s1 = 0;
493 clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
495 /* Prefetch next iteration. */
497 vlib_buffer_t * p2, * p3;
499 p2 = vlib_get_buffer (vm, from[2]);
500 p3 = vlib_get_buffer (vm, from[3]);
502 vlib_prefetch_buffer_header (p2, LOAD);
503 vlib_prefetch_buffer_header (p3, LOAD);
505 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
506 CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
509 /* speculatively enqueue b0 and b1 to the current next frame */
510 to_next[0] = bi0 = from[0];
511 to_next[1] = bi1 = from[1];
517 b0 = vlib_get_buffer (vm, bi0);
518 b1 = vlib_get_buffer (vm, bi1);
520 ip0 = vlib_buffer_get_current (b0);
521 udp0 = ip4_next_header (ip0);
522 tcp0 = (tcp_header_t *) udp0;
523 icmp0 = (icmp46_header_t *) udp0;
525 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
526 rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
529 proto0 = ip_proto_to_snat_proto (ip0->protocol);
531 if (PREDICT_FALSE (proto0 == ~0))
534 if (PREDICT_FALSE(ip0->ttl == 1))
536 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
537 icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
538 ICMP4_time_exceeded_ttl_exceeded_in_transit,
540 next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
544 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
546 next0 = icmp_out2in_slow_path
547 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
548 next0, now, cpu_index, &s0);
552 key0.addr = ip0->dst_address;
553 key0.port = udp0->dst_port;
554 key0.protocol = proto0;
555 key0.fib_index = rx_fib_index0;
557 kv0.key = key0.as_u64;
559 if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
561 /* Try to match static mapping by external address and port,
562 destination address and port in packet */
563 if (snat_static_mapping_match(sm, key0, &sm0, 1))
565 b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
567 * Send DHCP packets to the ipv4 stack, or we won't
568 * be able to use dhcp client on the outside interface
570 if (proto0 != SNAT_PROTOCOL_UDP
572 != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
573 next0 = SNAT_OUT2IN_NEXT_DROP;
577 /* Create session initiated by host from external network */
578 s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
582 b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
583 next0 = SNAT_OUT2IN_NEXT_DROP;
588 s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
591 old_addr0 = ip0->dst_address.as_u32;
592 ip0->dst_address = s0->in2out.addr;
593 new_addr0 = ip0->dst_address.as_u32;
594 vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
596 sum0 = ip0->checksum;
597 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
599 dst_address /* changed member */);
600 ip0->checksum = ip_csum_fold (sum0);
602 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
604 old_port0 = tcp0->ports.dst;
605 tcp0->ports.dst = s0->in2out.port;
606 new_port0 = tcp0->ports.dst;
608 sum0 = tcp0->checksum;
609 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
611 dst_address /* changed member */);
613 sum0 = ip_csum_update (sum0, old_port0, new_port0,
614 ip4_header_t /* cheat */,
615 length /* changed member */);
616 tcp0->checksum = ip_csum_fold(sum0);
620 old_port0 = udp0->dst_port;
621 udp0->dst_port = s0->in2out.port;
626 s0->last_heard = now;
628 s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
629 /* Per-user LRU list maintenance for dynamic translation */
630 if (!snat_is_session_static (s0))
632 clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
634 clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
635 s0->per_user_list_head_index,
640 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
641 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
643 snat_out2in_trace_t *t =
644 vlib_add_trace (vm, node, b0, sizeof (*t));
645 t->sw_if_index = sw_if_index0;
646 t->next_index = next0;
647 t->session_index = ~0;
649 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
652 pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
655 ip1 = vlib_buffer_get_current (b1);
656 udp1 = ip4_next_header (ip1);
657 tcp1 = (tcp_header_t *) udp1;
658 icmp1 = (icmp46_header_t *) udp1;
660 sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
661 rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
664 proto1 = ip_proto_to_snat_proto (ip1->protocol);
666 if (PREDICT_FALSE (proto1 == ~0))
669 if (PREDICT_FALSE(ip0->ttl == 1))
671 vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
672 icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
673 ICMP4_time_exceeded_ttl_exceeded_in_transit,
675 next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
679 if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
681 next1 = icmp_out2in_slow_path
682 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
683 next1, now, cpu_index, &s1);
687 key1.addr = ip1->dst_address;
688 key1.port = udp1->dst_port;
689 key1.protocol = proto1;
690 key1.fib_index = rx_fib_index1;
692 kv1.key = key1.as_u64;
694 if (clib_bihash_search_8_8 (&sm->out2in, &kv1, &value1))
696 /* Try to match static mapping by external address and port,
697 destination address and port in packet */
698 if (snat_static_mapping_match(sm, key1, &sm1, 1))
700 b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
702 * Send DHCP packets to the ipv4 stack, or we won't
703 * be able to use dhcp client on the outside interface
705 if (proto1 != SNAT_PROTOCOL_UDP
707 != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
708 next1 = SNAT_OUT2IN_NEXT_DROP;
712 /* Create session initiated by host from external network */
713 s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
717 b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
718 next1 = SNAT_OUT2IN_NEXT_DROP;
723 s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
726 old_addr1 = ip1->dst_address.as_u32;
727 ip1->dst_address = s1->in2out.addr;
728 new_addr1 = ip1->dst_address.as_u32;
729 vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
731 sum1 = ip1->checksum;
732 sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
734 dst_address /* changed member */);
735 ip1->checksum = ip_csum_fold (sum1);
737 if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
739 old_port1 = tcp1->ports.dst;
740 tcp1->ports.dst = s1->in2out.port;
741 new_port1 = tcp1->ports.dst;
743 sum1 = tcp1->checksum;
744 sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
746 dst_address /* changed member */);
748 sum1 = ip_csum_update (sum1, old_port1, new_port1,
749 ip4_header_t /* cheat */,
750 length /* changed member */);
751 tcp1->checksum = ip_csum_fold(sum1);
755 old_port1 = udp1->dst_port;
756 udp1->dst_port = s1->in2out.port;
761 s1->last_heard = now;
763 s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
764 /* Per-user LRU list maintenance for dynamic translation */
765 if (!snat_is_session_static (s1))
767 clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
769 clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
770 s1->per_user_list_head_index,
775 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
776 && (b1->flags & VLIB_BUFFER_IS_TRACED)))
778 snat_out2in_trace_t *t =
779 vlib_add_trace (vm, node, b1, sizeof (*t));
780 t->sw_if_index = sw_if_index1;
781 t->next_index = next1;
782 t->session_index = ~0;
784 t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
787 pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
789 /* verify speculative enqueues, maybe switch current next frame */
790 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
791 to_next, n_left_to_next,
792 bi0, bi1, next0, next1);
795 while (n_left_from > 0 && n_left_to_next > 0)
799 u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
803 u32 new_addr0, old_addr0;
804 u16 new_port0, old_port0;
807 icmp46_header_t * icmp0;
808 snat_session_key_t key0, sm0;
811 snat_session_t * s0 = 0;
812 clib_bihash_kv_8_8_t kv0, value0;
814 /* speculatively enqueue b0 to the current next frame */
822 b0 = vlib_get_buffer (vm, bi0);
824 ip0 = vlib_buffer_get_current (b0);
825 udp0 = ip4_next_header (ip0);
826 tcp0 = (tcp_header_t *) udp0;
827 icmp0 = (icmp46_header_t *) udp0;
829 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
830 rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
833 proto0 = ip_proto_to_snat_proto (ip0->protocol);
835 if (PREDICT_FALSE (proto0 == ~0))
838 if (PREDICT_FALSE(ip0->ttl == 1))
840 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
841 icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
842 ICMP4_time_exceeded_ttl_exceeded_in_transit,
844 next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR;
848 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
850 next0 = icmp_out2in_slow_path
851 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
852 next0, now, cpu_index, &s0);
856 key0.addr = ip0->dst_address;
857 key0.port = udp0->dst_port;
858 key0.protocol = proto0;
859 key0.fib_index = rx_fib_index0;
861 kv0.key = key0.as_u64;
863 if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
865 /* Try to match static mapping by external address and port,
866 destination address and port in packet */
867 if (snat_static_mapping_match(sm, key0, &sm0, 1))
869 b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
871 * Send DHCP packets to the ipv4 stack, or we won't
872 * be able to use dhcp client on the outside interface
874 if (proto0 != SNAT_PROTOCOL_UDP
876 != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client)))
878 next0 = SNAT_OUT2IN_NEXT_DROP;
882 /* Create session initiated by host from external network */
883 s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
887 b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
888 next0 = SNAT_OUT2IN_NEXT_DROP;
893 s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
896 old_addr0 = ip0->dst_address.as_u32;
897 ip0->dst_address = s0->in2out.addr;
898 new_addr0 = ip0->dst_address.as_u32;
899 vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
901 sum0 = ip0->checksum;
902 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
904 dst_address /* changed member */);
905 ip0->checksum = ip_csum_fold (sum0);
907 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
909 old_port0 = tcp0->ports.dst;
910 tcp0->ports.dst = s0->in2out.port;
911 new_port0 = tcp0->ports.dst;
913 sum0 = tcp0->checksum;
914 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
916 dst_address /* changed member */);
918 sum0 = ip_csum_update (sum0, old_port0, new_port0,
919 ip4_header_t /* cheat */,
920 length /* changed member */);
921 tcp0->checksum = ip_csum_fold(sum0);
925 old_port0 = udp0->dst_port;
926 udp0->dst_port = s0->in2out.port;
931 s0->last_heard = now;
933 s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
934 /* Per-user LRU list maintenance for dynamic translation */
935 if (!snat_is_session_static (s0))
937 clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
939 clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
940 s0->per_user_list_head_index,
945 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
946 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
948 snat_out2in_trace_t *t =
949 vlib_add_trace (vm, node, b0, sizeof (*t));
950 t->sw_if_index = sw_if_index0;
951 t->next_index = next0;
952 t->session_index = ~0;
954 t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
957 pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
959 /* verify speculative enqueue, maybe switch current next frame */
960 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
961 to_next, n_left_to_next,
965 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
968 vlib_node_increment_counter (vm, snat_out2in_node.index,
969 SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
971 return frame->n_vectors;
974 VLIB_REGISTER_NODE (snat_out2in_node) = {
975 .function = snat_out2in_node_fn,
976 .name = "snat-out2in",
977 .vector_size = sizeof (u32),
978 .format_trace = format_snat_out2in_trace,
979 .type = VLIB_NODE_TYPE_INTERNAL,
981 .n_errors = ARRAY_LEN(snat_out2in_error_strings),
982 .error_strings = snat_out2in_error_strings,
984 .runtime_data_bytes = sizeof (snat_runtime_t),
986 .n_next_nodes = SNAT_OUT2IN_N_NEXT,
988 /* edit / add dispositions here */
990 [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
991 [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
992 [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
995 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
998 snat_out2in_worker_handoff_fn (vlib_main_t * vm,
999 vlib_node_runtime_t * node,
1000 vlib_frame_t * frame)
1002 snat_main_t *sm = &snat_main;
1003 vlib_thread_main_t *tm = vlib_get_thread_main ();
1004 u32 n_left_from, *from, *to_next = 0;
1005 static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1006 static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1008 vlib_frame_queue_elt_t *hf = 0;
1009 vlib_frame_t *f = 0;
1011 u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1012 u32 next_worker_index = 0;
1013 u32 current_worker_index = ~0;
1014 u32 cpu_index = os_get_cpu_number ();
1016 ASSERT (vec_len (sm->workers));
1018 if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1020 vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1022 vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1023 sm->first_worker_index + sm->num_workers - 1,
1024 (vlib_frame_queue_t *) (~0));
1027 from = vlib_frame_vector_args (frame);
1028 n_left_from = frame->n_vectors;
1030 while (n_left_from > 0)
1037 udp_header_t * udp0;
1038 snat_worker_key_t key0;
1039 clib_bihash_kv_8_8_t kv0, value0;
1046 b0 = vlib_get_buffer (vm, bi0);
1048 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1049 rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1051 ip0 = vlib_buffer_get_current (b0);
1052 udp0 = ip4_next_header (ip0);
1054 key0.addr = ip0->dst_address;
1055 key0.port = udp0->dst_port;
1056 key0.fib_index = rx_fib_index0;
1058 if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_ICMP))
1060 icmp46_header_t * icmp0 = (icmp46_header_t *) udp0;
1061 icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
1062 key0.port = echo0->identifier;
1065 kv0.key = key0.as_u64;
1067 /* Ever heard of of the "user" before? */
1068 if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
1071 kv0.key = key0.as_u64;
1073 if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
1075 /* No, assign next available worker (RR) */
1076 next_worker_index = sm->first_worker_index;
1077 if (vec_len (sm->workers))
1079 next_worker_index +=
1080 sm->workers[sm->next_worker++ % _vec_len (sm->workers)];
1085 /* Static mapping without port */
1086 next_worker_index = value0.value;
1089 /* Add to translated packets worker lookup */
1090 kv0.value = next_worker_index;
1091 clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
1094 next_worker_index = value0.value;
1096 if (PREDICT_FALSE (next_worker_index != cpu_index))
1100 if (next_worker_index != current_worker_index)
1103 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1105 hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
1107 handoff_queue_elt_by_worker_index);
1109 n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1110 to_next_worker = &hf->buffer_index[hf->n_vectors];
1111 current_worker_index = next_worker_index;
1114 /* enqueue to correct worker thread */
1115 to_next_worker[0] = bi0;
1117 n_left_to_next_worker--;
1119 if (n_left_to_next_worker == 0)
1121 hf->n_vectors = VLIB_FRAME_SIZE;
1122 vlib_put_frame_queue_elt (hf);
1123 current_worker_index = ~0;
1124 handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1131 /* if this is 1st frame */
1134 f = vlib_get_frame_to_node (vm, snat_out2in_node.index);
1135 to_next = vlib_frame_vector_args (f);
1143 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1144 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1146 snat_out2in_worker_handoff_trace_t *t =
1147 vlib_add_trace (vm, node, b0, sizeof (*t));
1148 t->next_worker_index = next_worker_index;
1149 t->do_handoff = do_handoff;
1154 vlib_put_frame_to_node (vm, snat_out2in_node.index, f);
1157 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1159 /* Ship frames to the worker nodes */
1160 for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1162 if (handoff_queue_elt_by_worker_index[i])
1164 hf = handoff_queue_elt_by_worker_index[i];
1166 * It works better to let the handoff node
1167 * rate-adapt, always ship the handoff queue element.
1169 if (1 || hf->n_vectors == hf->last_n_vectors)
1171 vlib_put_frame_queue_elt (hf);
1172 handoff_queue_elt_by_worker_index[i] = 0;
1175 hf->last_n_vectors = hf->n_vectors;
1177 congested_handoff_queue_by_worker_index[i] =
1178 (vlib_frame_queue_t *) (~0);
1181 current_worker_index = ~0;
1182 return frame->n_vectors;
1185 VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
1186 .function = snat_out2in_worker_handoff_fn,
1187 .name = "snat-out2in-worker-handoff",
1188 .vector_size = sizeof (u32),
1189 .format_trace = format_snat_out2in_worker_handoff_trace,
1190 .type = VLIB_NODE_TYPE_INTERNAL,
1199 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
1201 static inline u32 icmp_out2in_fast (snat_main_t *sm,
1204 icmp46_header_t * icmp0,
1206 vlib_node_runtime_t * node,
1210 snat_session_key_t key0, sm0;
1211 icmp_echo_header_t *echo0;
1212 u32 new_addr0, old_addr0;
1213 u16 old_id0, new_id0;
1215 snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
1217 echo0 = (icmp_echo_header_t *)(icmp0+1);
1219 key0.addr = ip0->dst_address;
1220 key0.port = echo0->identifier;
1221 key0.fib_index = rx_fib_index0;
1223 if (snat_static_mapping_match(sm, key0, &sm0, 1))
1225 ip4_address_t * first_int_addr;
1227 if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
1230 ip4_interface_first_address (sm->ip4_main, sw_if_index0,
1231 0 /* just want the address */);
1232 rt->cached_sw_if_index = sw_if_index0;
1234 rt->cached_ip4_address = first_int_addr->as_u32;
1236 rt->cached_ip4_address = 0;
1239 /* Don't NAT packet aimed at the intfc address */
1240 if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
1241 rt->cached_ip4_address))
1244 b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1245 return SNAT_OUT2IN_NEXT_DROP;
1248 new_addr0 = sm0.addr.as_u32;
1250 vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1252 old_addr0 = ip0->dst_address.as_u32;
1253 ip0->dst_address.as_u32 = new_addr0;
1255 sum0 = ip0->checksum;
1256 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1258 dst_address /* changed member */);
1259 ip0->checksum = ip_csum_fold (sum0);
1261 if (PREDICT_FALSE(new_id0 != echo0->identifier))
1263 old_id0 = echo0->identifier;
1264 echo0->identifier = new_id0;
1266 sum0 = icmp0->checksum;
1267 sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
1269 icmp0->checksum = ip_csum_fold (sum0);
1276 snat_out2in_fast_node_fn (vlib_main_t * vm,
1277 vlib_node_runtime_t * node,
1278 vlib_frame_t * frame)
1280 u32 n_left_from, * from, * to_next;
1281 snat_out2in_next_t next_index;
1282 u32 pkts_processed = 0;
1283 snat_main_t * sm = &snat_main;
1285 from = vlib_frame_vector_args (frame);
1286 n_left_from = frame->n_vectors;
1287 next_index = node->cached_next_index;
1289 while (n_left_from > 0)
1293 vlib_get_next_frame (vm, node, next_index,
1294 to_next, n_left_to_next);
1296 while (n_left_from > 0 && n_left_to_next > 0)
1300 u32 next0 = SNAT_OUT2IN_NEXT_DROP;
1304 u32 new_addr0, old_addr0;
1305 u16 new_port0, old_port0;
1306 udp_header_t * udp0;
1307 tcp_header_t * tcp0;
1308 icmp46_header_t * icmp0;
1309 snat_session_key_t key0, sm0;
1313 /* speculatively enqueue b0 to the current next frame */
1319 n_left_to_next -= 1;
1321 b0 = vlib_get_buffer (vm, bi0);
1323 ip0 = vlib_buffer_get_current (b0);
1324 udp0 = ip4_next_header (ip0);
1325 tcp0 = (tcp_header_t *) udp0;
1326 icmp0 = (icmp46_header_t *) udp0;
1328 sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
1329 rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
1331 vnet_feature_next (sw_if_index0, &next0, b0);
1333 proto0 = ip_proto_to_snat_proto (ip0->protocol);
1335 if (PREDICT_FALSE (proto0 == ~0))
1338 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
1340 next0 = icmp_out2in_fast
1341 (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
1345 key0.addr = ip0->dst_address;
1346 key0.port = udp0->dst_port;
1347 key0.fib_index = rx_fib_index0;
1349 if (snat_static_mapping_match(sm, key0, &sm0, 1))
1351 b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
1355 new_addr0 = sm0.addr.as_u32;
1356 new_port0 = sm0.port;
1357 vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
1358 old_addr0 = ip0->dst_address.as_u32;
1359 ip0->dst_address.as_u32 = new_addr0;
1361 sum0 = ip0->checksum;
1362 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1364 dst_address /* changed member */);
1365 ip0->checksum = ip_csum_fold (sum0);
1367 if (PREDICT_FALSE(new_port0 != udp0->dst_port))
1369 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1371 old_port0 = tcp0->ports.dst;
1372 tcp0->ports.dst = new_port0;
1374 sum0 = tcp0->checksum;
1375 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1377 dst_address /* changed member */);
1379 sum0 = ip_csum_update (sum0, old_port0, new_port0,
1380 ip4_header_t /* cheat */,
1381 length /* changed member */);
1382 tcp0->checksum = ip_csum_fold(sum0);
1386 old_port0 = udp0->dst_port;
1387 udp0->dst_port = new_port0;
1393 if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
1395 sum0 = tcp0->checksum;
1396 sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
1398 dst_address /* changed member */);
1400 tcp0->checksum = ip_csum_fold(sum0);
1406 if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
1407 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1409 snat_out2in_trace_t *t =
1410 vlib_add_trace (vm, node, b0, sizeof (*t));
1411 t->sw_if_index = sw_if_index0;
1412 t->next_index = next0;
1415 pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
1417 /* verify speculative enqueue, maybe switch current next frame */
1418 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1419 to_next, n_left_to_next,
1423 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1426 vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
1427 SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
1429 return frame->n_vectors;
1432 VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
1433 .function = snat_out2in_fast_node_fn,
1434 .name = "snat-out2in-fast",
1435 .vector_size = sizeof (u32),
1436 .format_trace = format_snat_out2in_fast_trace,
1437 .type = VLIB_NODE_TYPE_INTERNAL,
1439 .n_errors = ARRAY_LEN(snat_out2in_error_strings),
1440 .error_strings = snat_out2in_error_strings,
1442 .runtime_data_bytes = sizeof (snat_runtime_t),
1444 .n_next_nodes = SNAT_OUT2IN_N_NEXT,
1446 /* edit / add dispositions here */
1448 [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
1449 [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
1450 [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
1453 VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);