2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
20 #include <nat/nat64.h>
21 #include <nat/nat_inlines.h>
22 #include <vnet/ip/ip6_to_ip4.h>
23 #include <vnet/fib/fib_table.h>
30 } nat64_in2out_trace_t;
33 format_nat64_in2out_trace (u8 * s, va_list * args)
35 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
36 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
37 nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
40 tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
43 format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
49 #define foreach_nat64_in2out_error \
50 _(UNSUPPORTED_PROTOCOL, "unsupported protocol") \
51 _(IN2OUT_PACKETS, "good in2out packets processed") \
52 _(NO_TRANSLATION, "no translation") \
53 _(UNKNOWN, "unknown") \
54 _(DROP_FRAGMENT, "drop fragment") \
55 _(TCP_PACKETS, "TCP packets") \
56 _(UDP_PACKETS, "UDP packets") \
57 _(ICMP_PACKETS, "ICMP packets") \
58 _(OTHER_PACKETS, "other protocol packets") \
59 _(FRAGMENTS, "fragments") \
60 _(CACHED_FRAGMENTS, "cached fragments") \
61 _(PROCESSED_FRAGMENTS, "processed fragments")
66 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
67 foreach_nat64_in2out_error
70 } nat64_in2out_error_t;
72 static char *nat64_in2out_error_strings[] = {
73 #define _(sym,string) string,
74 foreach_nat64_in2out_error
80 NAT64_IN2OUT_NEXT_IP4_LOOKUP,
81 NAT64_IN2OUT_NEXT_IP6_LOOKUP,
82 NAT64_IN2OUT_NEXT_DROP,
83 NAT64_IN2OUT_NEXT_SLOWPATH,
85 } nat64_in2out_next_t;
87 typedef struct nat64_in2out_set_ctx_t_
92 } nat64_in2out_set_ctx_t;
95 nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr)
98 ip6_main_t *im6 = &ip6_main;
99 ip_lookup_main_t *lm6 = &im6->lookup_main;
100 ip_interface_address_t *ia = 0;
103 foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
105 addr = ip_interface_address_get_address (lm6, ia);
106 if (0 == ip6_address_compare (addr, &ip6_addr))
115 * @brief Check whether is a hairpinning.
117 * If the destination IP address of the packet is an IPv4 address assigned to
118 * the NAT64 itself, then the packet is a hairpin packet.
120 * param dst_addr Destination address of the packet.
122 * @returns 1 if hairpinning, otherwise 0.
124 static_always_inline int
125 is_hairpinning (ip6_address_t * dst_addr)
127 nat64_main_t *nm = &nat64_main;
130 for (i = 0; i < vec_len (nm->addr_pool); i++)
132 if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
140 nat64_in2out_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p, u16 l4_offset,
141 u16 frag_hdr_offset, nat64_in2out_set_ctx_t * ctx)
149 nat64_main_t *nm = &nat64_main;
150 nat64_db_bib_entry_t *bibe;
151 nat64_db_st_entry_t *ste;
152 ip46_address_t old_saddr, old_daddr;
153 ip4_address_t new_daddr;
154 u32 sw_if_index, fib_index;
155 u8 proto = vnet_buffer (p)->ip.reass.ip_proto;
156 u16 sport = vnet_buffer (p)->ip.reass.l4_src_port;
157 u16 dport = vnet_buffer (p)->ip.reass.l4_dst_port;
158 nat64_db_t *db = &nm->db[ctx->thread_index];
160 ip6 = vlib_buffer_get_current (p);
162 vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
163 ip4 = vlib_buffer_get_current (p);
165 u32 ip_version_traffic_class_and_flow_label =
166 ip6->ip_version_traffic_class_and_flow_label;
167 u16 payload_length = ip6->payload_length;
168 u8 hop_limit = ip6->hop_limit;
170 old_saddr.as_u64[0] = ip6->src_address.as_u64[0];
171 old_saddr.as_u64[1] = ip6->src_address.as_u64[1];
172 old_daddr.as_u64[0] = ip6->dst_address.as_u64[0];
173 old_daddr.as_u64[1] = ip6->dst_address.as_u64[1];
175 if (PREDICT_FALSE (frag_hdr_offset))
177 //Only the first fragment
178 ip6_frag_hdr_t *hdr =
179 (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
180 fragment_id = frag_id_6to4 (hdr->identification);
181 frag_more = ip6_frag_hdr_more (hdr);
182 frag_offset = ip6_frag_hdr_offset (hdr);
191 ip4->ip_version_and_header_length =
192 IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
193 ip4->tos = ip6_translate_tos (ip_version_traffic_class_and_flow_label);
195 u16_net_add (payload_length, sizeof (*ip4) + sizeof (*ip6) - l4_offset);
196 ip4->fragment_id = fragment_id;
197 ip4->flags_and_fragment_offset =
198 clib_host_to_net_u16 (frag_offset |
199 (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
200 ip4->ttl = hop_limit;
201 ip4->protocol = (proto == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : proto;
203 sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
205 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
208 nat64_db_st_entry_find (db, &old_saddr, &old_daddr, sport, dport, proto,
213 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
220 nat64_db_bib_entry_find (db, &old_saddr, sport, proto, fib_index, 1);
225 ip4_address_t out_addr;
226 if (nat64_alloc_out_addr_and_port
227 (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
228 &out_port, ctx->thread_index))
232 nat64_db_bib_entry_create (ctx->thread_index, db,
233 &old_saddr.ip6, &out_addr, sport,
234 out_port, fib_index, proto, 0);
238 vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0,
239 db->bib.bib_entries_num);
242 nat64_extract_ip4 (&old_daddr.ip6, &new_daddr, fib_index);
244 nat64_db_st_entry_create (ctx->thread_index, db, bibe,
245 &old_daddr.ip6, &new_daddr, dport);
249 vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
250 db->st.st_entries_num);
253 ip4->src_address.as_u32 = bibe->out_addr.as_u32;
254 ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
256 ip4->checksum = ip4_header_checksum (ip4);
258 if (!vnet_buffer (p)->ip.reass.is_non_first_fragment)
260 udp_header_t *udp = (udp_header_t *) (ip4 + 1);
261 udp->src_port = bibe->out_port;
263 //UDP checksum is optional over IPv4
264 if (proto == IP_PROTOCOL_UDP)
270 tcp_header_t *tcp = (tcp_header_t *) (ip4 + 1);
271 csum = ip_csum_sub_even (tcp->checksum, old_saddr.as_u64[0]);
272 csum = ip_csum_sub_even (csum, old_saddr.as_u64[1]);
273 csum = ip_csum_sub_even (csum, old_daddr.as_u64[0]);
274 csum = ip_csum_sub_even (csum, old_daddr.as_u64[1]);
275 csum = ip_csum_add_even (csum, ip4->dst_address.as_u32);
276 csum = ip_csum_add_even (csum, ip4->src_address.as_u32);
277 csum = ip_csum_sub_even (csum, sport);
278 csum = ip_csum_add_even (csum, udp->src_port);
279 mss_clamping (nm->sm, tcp, &csum);
280 tcp->checksum = ip_csum_fold (csum);
282 nat64_tcp_session_set_state (ste, tcp, 1);
286 nat64_session_reset_timeout (ste, ctx->vm);
292 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
294 nat64_main_t *nm = &nat64_main;
295 nat64_in2out_set_ctx_t *ctx = arg;
296 nat64_db_bib_entry_t *bibe;
297 nat64_db_st_entry_t *ste;
298 ip46_address_t saddr, daddr;
299 u32 sw_if_index, fib_index;
300 icmp46_header_t *icmp = ip6_next_header (ip6);
301 nat64_db_t *db = &nm->db[ctx->thread_index];
303 sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
305 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
307 saddr.as_u64[0] = ip6->src_address.as_u64[0];
308 saddr.as_u64[1] = ip6->src_address.as_u64[1];
309 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
310 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
312 if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
314 u16 in_id = ((u16 *) (icmp))[2];
316 nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
317 IP_PROTOCOL_ICMP, fib_index, 1);
322 nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
330 nat64_db_bib_entry_find (db, &saddr, in_id,
331 IP_PROTOCOL_ICMP, fib_index, 1);
336 ip4_address_t out_addr;
337 if (nat64_alloc_out_addr_and_port
338 (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id,
343 nat64_db_bib_entry_create (ctx->thread_index, db,
344 &ip6->src_address, &out_addr,
345 in_id, out_id, fib_index,
346 IP_PROTOCOL_ICMP, 0);
350 vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0,
351 db->bib.bib_entries_num);
354 nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
356 nat64_db_st_entry_create (ctx->thread_index, db, bibe,
357 &ip6->dst_address, &daddr.ip4, 0);
361 vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
362 db->st.st_entries_num);
365 nat64_session_reset_timeout (ste, ctx->vm);
367 ip4->src_address.as_u32 = bibe->out_addr.as_u32;
368 ((u16 *) (icmp))[2] = bibe->out_port;
370 ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
374 if (!vec_len (nm->addr_pool))
377 ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
378 nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
385 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
388 nat64_main_t *nm = &nat64_main;
389 nat64_in2out_set_ctx_t *ctx = arg;
390 nat64_db_st_entry_t *ste;
391 nat64_db_bib_entry_t *bibe;
392 ip46_address_t saddr, daddr;
393 u32 sw_if_index, fib_index;
394 u8 proto = ip6->protocol;
395 nat64_db_t *db = &nm->db[ctx->thread_index];
397 sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
399 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
401 saddr.as_u64[0] = ip6->src_address.as_u64[0];
402 saddr.as_u64[1] = ip6->src_address.as_u64[1];
403 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
404 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
406 if (proto == IP_PROTOCOL_ICMP6)
408 icmp46_header_t *icmp = ip6_next_header (ip6);
409 u16 in_id = ((u16 *) (icmp))[2];
410 proto = IP_PROTOCOL_ICMP;
413 (icmp->type == ICMP4_echo_request
414 || icmp->type == ICMP4_echo_reply))
418 nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
423 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
427 ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
428 ((u16 *) (icmp))[2] = bibe->out_port;
429 ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
433 udp_header_t *udp = ip6_next_header (ip6);
434 tcp_header_t *tcp = ip6_next_header (ip6);
438 u16 sport = udp->src_port;
439 u16 dport = udp->dst_port;
442 nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
447 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
451 ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
452 udp->dst_port = bibe->out_port;
453 ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
455 if (proto == IP_PROTOCOL_TCP)
456 checksum = &tcp->checksum;
458 checksum = &udp->checksum;
459 csum = ip_csum_sub_even (*checksum, dport);
460 csum = ip_csum_add_even (csum, udp->dst_port);
461 *checksum = ip_csum_fold (csum);
467 typedef struct unk_proto_st_walk_ctx_t_
469 ip6_address_t src_addr;
470 ip6_address_t dst_addr;
471 ip4_address_t out_addr;
475 } unk_proto_st_walk_ctx_t;
478 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
480 nat64_main_t *nm = &nat64_main;
481 unk_proto_st_walk_ctx_t *ctx = arg;
482 nat64_db_bib_entry_t *bibe;
483 ip46_address_t saddr, daddr;
484 nat64_db_t *db = &nm->db[ctx->thread_index];
486 if (ip6_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
488 bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
492 if (ip6_address_is_equal (&bibe->in_addr, &ctx->src_addr)
493 && bibe->fib_index == ctx->fib_index)
495 clib_memset (&saddr, 0, sizeof (saddr));
496 saddr.ip4.as_u32 = bibe->out_addr.as_u32;
497 clib_memset (&daddr, 0, sizeof (daddr));
498 nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
500 if (nat64_db_st_entry_find
501 (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
504 ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
513 nat64_in2out_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, u8 l4_protocol,
514 u16 l4_offset, u16 frag_hdr_offset,
515 nat64_in2out_set_ctx_t * s_ctx)
523 ip6 = vlib_buffer_get_current (p);
525 ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
527 vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
529 if (PREDICT_FALSE (frag_hdr_offset))
531 //Only the first fragment
532 ip6_frag_hdr_t *hdr =
533 (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
534 fragment_id = frag_id_6to4 (hdr->identification);
535 frag_offset = ip6_frag_hdr_offset (hdr);
536 frag_more = ip6_frag_hdr_more (hdr);
545 nat64_main_t *nm = &nat64_main;
546 nat64_db_bib_entry_t *bibe;
547 nat64_db_st_entry_t *ste;
548 ip46_address_t saddr, daddr, addr;
549 u32 sw_if_index, fib_index;
551 nat64_db_t *db = &nm->db[s_ctx->thread_index];
553 sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
555 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
557 saddr.as_u64[0] = ip6->src_address.as_u64[0];
558 saddr.as_u64[1] = ip6->src_address.as_u64[1];
559 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
560 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
563 nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, l4_protocol, fib_index,
568 bibe = nat64_db_bib_entry_by_index (db, l4_protocol, ste->bibe_index);
575 nat64_db_bib_entry_find (db, &saddr, 0, l4_protocol, fib_index, 1);
579 /* Choose same out address as for TCP/UDP session to same dst */
580 unk_proto_st_walk_ctx_t ctx = {
581 .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
582 .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
583 .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
584 .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
585 .out_addr.as_u32 = 0,
586 .fib_index = fib_index,
587 .proto = l4_protocol,
588 .thread_index = s_ctx->thread_index,
591 nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
593 if (!ctx.out_addr.as_u32)
594 nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
596 /* Verify if out address is not already in use for protocol */
597 clib_memset (&addr, 0, sizeof (addr));
598 addr.ip4.as_u32 = ctx.out_addr.as_u32;
599 if (nat64_db_bib_entry_find (db, &addr, 0, l4_protocol, 0, 0))
600 ctx.out_addr.as_u32 = 0;
602 if (!ctx.out_addr.as_u32)
604 for (i = 0; i < vec_len (nm->addr_pool); i++)
606 addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
607 if (!nat64_db_bib_entry_find
608 (db, &addr, 0, l4_protocol, 0, 0))
613 if (!ctx.out_addr.as_u32)
617 nat64_db_bib_entry_create (s_ctx->thread_index, db,
618 &ip6->src_address, &ctx.out_addr,
619 0, 0, fib_index, l4_protocol, 0);
623 vlib_set_simple_counter (&nm->total_bibs, s_ctx->thread_index, 0,
624 db->bib.bib_entries_num);
627 nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
629 nat64_db_st_entry_create (s_ctx->thread_index, db, bibe,
630 &ip6->dst_address, &daddr.ip4, 0);
634 vlib_set_simple_counter (&nm->total_sessions, s_ctx->thread_index, 0,
635 db->st.st_entries_num);
638 nat64_session_reset_timeout (ste, s_ctx->vm);
640 ip4->src_address.as_u32 = bibe->out_addr.as_u32;
641 ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
643 ip4->ip_version_and_header_length =
644 IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
645 ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label);
646 ip4->length = u16_net_add (ip6->payload_length,
647 sizeof (*ip4) + sizeof (*ip6) - l4_offset);
648 ip4->fragment_id = fragment_id;
649 ip4->flags_and_fragment_offset =
650 clib_host_to_net_u16 (frag_offset |
651 (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
652 ip4->ttl = ip6->hop_limit;
653 ip4->protocol = l4_protocol;
654 ip4->checksum = ip4_header_checksum (ip4);
660 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
661 ip6_header_t * ip6, u32 l4_offset,
664 nat64_main_t *nm = &nat64_main;
665 nat64_db_bib_entry_t *bibe;
666 nat64_db_st_entry_t *ste;
667 ip46_address_t saddr, daddr;
668 u32 sw_if_index, fib_index;
669 udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, l4_offset);
670 tcp_header_t *tcp = (tcp_header_t *) u8_ptr_add (ip6, l4_offset);
671 u8 proto = vnet_buffer (b)->ip.reass.ip_proto;
672 u16 sport = vnet_buffer (b)->ip.reass.l4_src_port;
673 u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port;
674 u16 *checksum = NULL;
676 nat64_db_t *db = &nm->db[thread_index];
678 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
680 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
682 saddr.as_u64[0] = ip6->src_address.as_u64[0];
683 saddr.as_u64[1] = ip6->src_address.as_u64[1];
684 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
685 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
687 if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
689 if (proto == IP_PROTOCOL_UDP)
690 checksum = &udp->checksum;
692 checksum = &tcp->checksum;
693 csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
694 csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
695 csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
696 csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
700 nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
705 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
711 bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
716 ip4_address_t out_addr;
717 if (nat64_alloc_out_addr_and_port
718 (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
719 &out_port, thread_index))
723 nat64_db_bib_entry_create (thread_index, db, &ip6->src_address,
724 &out_addr, sport, out_port, fib_index,
729 vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
730 db->bib.bib_entries_num);
733 nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
735 nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address,
740 vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
741 db->st.st_entries_num);
744 if (proto == IP_PROTOCOL_TCP)
745 nat64_tcp_session_set_state (ste, tcp, 1);
747 nat64_session_reset_timeout (ste, vm);
749 if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
751 udp->src_port = bibe->out_port;
754 nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
756 clib_memset (&daddr, 0, sizeof (daddr));
757 daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
761 vec_foreach (db, nm->db)
763 bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
773 ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
774 ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
776 if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
778 csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
779 csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
780 csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
781 csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
782 csum = ip_csum_sub_even (csum, sport);
783 csum = ip_csum_sub_even (csum, dport);
784 udp->dst_port = bibe->in_port;
785 csum = ip_csum_add_even (csum, udp->src_port);
786 csum = ip_csum_add_even (csum, udp->dst_port);
787 *checksum = ip_csum_fold (csum);
794 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
795 ip6_header_t * ip6, u32 thread_index)
797 nat64_main_t *nm = &nat64_main;
798 nat64_db_bib_entry_t *bibe;
799 nat64_db_st_entry_t *ste;
800 icmp46_header_t *icmp = ip6_next_header (ip6);
801 ip6_header_t *inner_ip6;
802 ip46_address_t saddr, daddr;
803 u32 sw_if_index, fib_index;
807 u16 *checksum, sport, dport;
809 nat64_db_t *db = &nm->db[thread_index];
811 if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
814 inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
816 proto = inner_ip6->protocol;
818 if (proto == IP_PROTOCOL_ICMP6)
821 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
823 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
825 saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
826 saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
827 daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
828 daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
830 udp = ip6_next_header (inner_ip6);
831 tcp = ip6_next_header (inner_ip6);
833 sport = udp->src_port;
834 dport = udp->dst_port;
836 if (proto == IP_PROTOCOL_UDP)
837 checksum = &udp->checksum;
839 checksum = &tcp->checksum;
841 csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
842 csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
843 csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
844 csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
845 csum = ip_csum_sub_even (csum, sport);
846 csum = ip_csum_sub_even (csum, dport);
849 nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
854 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
858 dport = udp->dst_port = bibe->out_port;
859 nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
861 clib_memset (&saddr, 0, sizeof (saddr));
862 clib_memset (&daddr, 0, sizeof (daddr));
863 saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
864 daddr.ip4.as_u32 = bibe->out_addr.as_u32;
868 vec_foreach (db, nm->db)
870 ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
881 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
885 inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
886 inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
887 udp->src_port = bibe->in_port;
889 csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
890 csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
891 csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
892 csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
893 csum = ip_csum_add_even (csum, udp->src_port);
894 csum = ip_csum_add_even (csum, udp->dst_port);
895 *checksum = ip_csum_fold (csum);
897 if (!vec_len (nm->addr_pool))
900 nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
901 ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
902 ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
905 csum = ip_csum_with_carry (0, ip6->payload_length);
906 csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
907 csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
908 csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
909 csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
910 csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
912 ip_incremental_checksum (csum, icmp,
913 clib_net_to_host_u16 (ip6->payload_length));
914 icmp->checksum = ~ip_csum_fold (csum);
920 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
921 ip6_header_t * ip6, u32 thread_index)
923 nat64_main_t *nm = &nat64_main;
924 nat64_db_bib_entry_t *bibe;
925 nat64_db_st_entry_t *ste;
926 ip46_address_t saddr, daddr, addr;
927 u32 sw_if_index, fib_index;
928 u8 proto = ip6->protocol;
930 nat64_db_t *db = &nm->db[thread_index];
932 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
934 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
936 saddr.as_u64[0] = ip6->src_address.as_u64[0];
937 saddr.as_u64[1] = ip6->src_address.as_u64[1];
938 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
939 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
942 nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
946 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
952 bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
956 /* Choose same out address as for TCP/UDP session to same dst */
957 unk_proto_st_walk_ctx_t ctx = {
958 .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
959 .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
960 .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
961 .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
962 .out_addr.as_u32 = 0,
963 .fib_index = fib_index,
965 .thread_index = thread_index,
968 nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
970 if (!ctx.out_addr.as_u32)
971 nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
973 /* Verify if out address is not already in use for protocol */
974 clib_memset (&addr, 0, sizeof (addr));
975 addr.ip4.as_u32 = ctx.out_addr.as_u32;
976 if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
977 ctx.out_addr.as_u32 = 0;
979 if (!ctx.out_addr.as_u32)
981 for (i = 0; i < vec_len (nm->addr_pool); i++)
983 addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
984 if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
989 if (!ctx.out_addr.as_u32)
993 nat64_db_bib_entry_create (thread_index, db, &ip6->src_address,
994 &ctx.out_addr, 0, 0, fib_index, proto,
999 vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
1000 db->bib.bib_entries_num);
1003 nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
1005 nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address,
1010 vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
1011 db->st.st_entries_num);
1014 nat64_session_reset_timeout (ste, vm);
1016 nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
1018 clib_memset (&daddr, 0, sizeof (daddr));
1019 daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1023 vec_foreach (db, nm->db)
1025 bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
1035 ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1036 ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1042 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1043 vlib_frame_t * frame, u8 is_slow_path)
1045 u32 n_left_from, *from, *to_next;
1046 nat64_in2out_next_t next_index;
1047 u32 pkts_processed = 0;
1048 u32 stats_node_index;
1049 u32 thread_index = vm->thread_index;
1050 nat64_main_t *nm = &nat64_main;
1052 u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
1056 is_slow_path ? nm->in2out_slowpath_node_index : nm->in2out_node_index;
1058 from = vlib_frame_vector_args (frame);
1059 n_left_from = frame->n_vectors;
1060 next_index = node->cached_next_index;
1062 while (n_left_from > 0)
1066 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1068 while (n_left_from > 0 && n_left_to_next > 0)
1074 u16 l4_offset0, frag_hdr_offset0;
1077 nat64_in2out_set_ctx_t ctx0;
1080 /* speculatively enqueue b0 to the current next frame */
1086 n_left_to_next -= 1;
1088 b0 = vlib_get_buffer (vm, bi0);
1089 ip60 = vlib_buffer_get_current (b0);
1093 ctx0.thread_index = thread_index;
1095 next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1099 (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1100 &frag_hdr_offset0)))
1102 next0 = NAT64_IN2OUT_NEXT_DROP;
1103 b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1107 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1109 if (nat64_not_translate (sw_if_index0, ip60->dst_address))
1111 next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1115 proto0 = ip_proto_to_snat_proto (l4_protocol0);
1119 if (PREDICT_TRUE (proto0 == ~0))
1122 if (is_hairpinning (&ip60->dst_address))
1124 next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1125 if (nat64_in2out_unk_proto_hairpinning
1126 (vm, b0, ip60, thread_index))
1128 next0 = NAT64_IN2OUT_NEXT_DROP;
1130 node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1135 if (nat64_in2out_unk_proto
1136 (vm, b0, l4_protocol0, l4_offset0, frag_hdr_offset0,
1139 next0 = NAT64_IN2OUT_NEXT_DROP;
1141 node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1149 if (PREDICT_FALSE (proto0 == ~0))
1151 next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
1156 if (proto0 == SNAT_PROTOCOL_ICMP)
1159 if (is_hairpinning (&ip60->dst_address))
1161 next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1162 if (nat64_in2out_icmp_hairpinning
1163 (vm, b0, ip60, thread_index))
1165 next0 = NAT64_IN2OUT_NEXT_DROP;
1167 node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1173 (vm, b0, nat64_in2out_icmp_set_cb, &ctx0,
1174 nat64_in2out_inner_icmp_set_cb, &ctx0))
1176 next0 = NAT64_IN2OUT_NEXT_DROP;
1177 b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1181 else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
1183 if (proto0 == SNAT_PROTOCOL_TCP)
1188 if (is_hairpinning (&ip60->dst_address))
1190 next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1191 if (nat64_in2out_tcp_udp_hairpinning
1192 (vm, b0, ip60, l4_offset0, thread_index))
1194 next0 = NAT64_IN2OUT_NEXT_DROP;
1196 node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1201 if (nat64_in2out_tcp_udp
1202 (vm, b0, l4_offset0, frag_hdr_offset0, &ctx0))
1204 next0 = NAT64_IN2OUT_NEXT_DROP;
1205 b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1211 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1212 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1214 nat64_in2out_trace_t *t =
1215 vlib_add_trace (vm, node, b0, sizeof (*t));
1216 t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1217 t->next_index = next0;
1218 t->is_slow_path = is_slow_path;
1221 pkts_processed += next0 == NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1223 /* verify speculative enqueue, maybe switch current next frame */
1224 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1225 n_left_to_next, bi0, next0);
1227 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1229 vlib_node_increment_counter (vm, stats_node_index,
1230 NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1232 vlib_node_increment_counter (vm, stats_node_index,
1233 NAT64_IN2OUT_ERROR_TCP_PACKETS, tcp_packets);
1234 vlib_node_increment_counter (vm, stats_node_index,
1235 NAT64_IN2OUT_ERROR_UDP_PACKETS, udp_packets);
1236 vlib_node_increment_counter (vm, stats_node_index,
1237 NAT64_IN2OUT_ERROR_ICMP_PACKETS, icmp_packets);
1238 vlib_node_increment_counter (vm, stats_node_index,
1239 NAT64_IN2OUT_ERROR_OTHER_PACKETS,
1241 vlib_node_increment_counter (vm, stats_node_index,
1242 NAT64_IN2OUT_ERROR_FRAGMENTS, fragments);
1244 return frame->n_vectors;
1247 VLIB_NODE_FN (nat64_in2out_node) (vlib_main_t * vm,
1248 vlib_node_runtime_t * node,
1249 vlib_frame_t * frame)
1251 return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1255 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1256 .name = "nat64-in2out",
1257 .vector_size = sizeof (u32),
1258 .format_trace = format_nat64_in2out_trace,
1259 .type = VLIB_NODE_TYPE_INTERNAL,
1260 .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1261 .error_strings = nat64_in2out_error_strings,
1262 .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1263 /* edit / add dispositions here */
1265 [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1266 [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1267 [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1268 [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1273 VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm,
1274 vlib_node_runtime_t * node,
1275 vlib_frame_t * frame)
1277 return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1281 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1282 .name = "nat64-in2out-slowpath",
1283 .vector_size = sizeof (u32),
1284 .format_trace = format_nat64_in2out_trace,
1285 .type = VLIB_NODE_TYPE_INTERNAL,
1286 .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1287 .error_strings = nat64_in2out_error_strings,
1288 .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1289 /* edit / add dispositions here */
1291 [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1292 [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1293 [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1294 [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1299 typedef struct nat64_in2out_frag_set_ctx_t_
1307 } nat64_in2out_frag_set_ctx_t;
1310 #define foreach_nat64_in2out_handoff_error \
1311 _(CONGESTION_DROP, "congestion drop") \
1312 _(SAME_WORKER, "same worker") \
1313 _(DO_HANDOFF, "do handoff")
1317 #define _(sym,str) NAT64_IN2OUT_HANDOFF_ERROR_##sym,
1318 foreach_nat64_in2out_handoff_error
1320 NAT64_IN2OUT_HANDOFF_N_ERROR,
1321 } nat64_in2out_handoff_error_t;
1323 static char *nat64_in2out_handoff_error_strings[] = {
1324 #define _(sym,string) string,
1325 foreach_nat64_in2out_handoff_error
1331 u32 next_worker_index;
1332 } nat64_in2out_handoff_trace_t;
1335 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1337 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1338 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1339 nat64_in2out_handoff_trace_t *t =
1340 va_arg (*args, nat64_in2out_handoff_trace_t *);
1343 format (s, "NAT64-IN2OUT-HANDOFF: next-worker %d", t->next_worker_index);
1348 VLIB_NODE_FN (nat64_in2out_handoff_node) (vlib_main_t * vm,
1349 vlib_node_runtime_t * node,
1350 vlib_frame_t * frame)
1352 nat64_main_t *nm = &nat64_main;
1353 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1354 u32 n_enq, n_left_from, *from;
1355 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1357 u32 thread_index = vm->thread_index;
1358 u32 do_handoff = 0, same_worker = 0;
1360 from = vlib_frame_vector_args (frame);
1361 n_left_from = frame->n_vectors;
1362 vlib_get_buffers (vm, from, bufs, n_left_from);
1365 ti = thread_indices;
1367 fq_index = nm->fq_in2out_index;
1369 while (n_left_from > 0)
1373 ip0 = vlib_buffer_get_current (b[0]);
1374 ti[0] = nat64_get_worker_in2out (&ip0->src_address);
1376 if (ti[0] != thread_index)
1382 ((node->flags & VLIB_NODE_FLAG_TRACE)
1383 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1385 nat64_in2out_handoff_trace_t *t =
1386 vlib_add_trace (vm, node, b[0], sizeof (*t));
1387 t->next_worker_index = ti[0];
1396 vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1397 frame->n_vectors, 1);
1399 if (n_enq < frame->n_vectors)
1400 vlib_node_increment_counter (vm, node->node_index,
1401 NAT64_IN2OUT_HANDOFF_ERROR_CONGESTION_DROP,
1402 frame->n_vectors - n_enq);
1403 vlib_node_increment_counter (vm, node->node_index,
1404 NAT64_IN2OUT_HANDOFF_ERROR_SAME_WORKER,
1406 vlib_node_increment_counter (vm, node->node_index,
1407 NAT64_IN2OUT_HANDOFF_ERROR_DO_HANDOFF,
1410 return frame->n_vectors;
1414 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1415 .name = "nat64-in2out-handoff",
1416 .vector_size = sizeof (u32),
1417 .format_trace = format_nat64_in2out_handoff_trace,
1418 .type = VLIB_NODE_TYPE_INTERNAL,
1419 .n_errors = ARRAY_LEN(nat64_in2out_handoff_error_strings),
1420 .error_strings = nat64_in2out_handoff_error_strings,
1431 * fd.io coding-style-patch-verification: ON
1434 * eval: (c-set-style "gnu")