2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
20 #include <nat/nat64.h>
21 #include <nat/nat_inlines.h>
22 #include <vnet/ip/ip6_to_ip4.h>
23 #include <vnet/fib/fib_table.h>
24 #include <nat/lib/nat_inlines.h>
31 } nat64_in2out_trace_t;
34 format_nat64_in2out_trace (u8 * s, va_list * args)
36 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38 nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
41 tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
44 format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
50 #define foreach_nat64_in2out_error \
51 _(UNSUPPORTED_PROTOCOL, "unsupported protocol") \
52 _(IN2OUT_PACKETS, "good in2out packets processed") \
53 _(NO_TRANSLATION, "no translation") \
54 _(UNKNOWN, "unknown") \
55 _(DROP_FRAGMENT, "drop fragment") \
56 _(TCP_PACKETS, "TCP packets") \
57 _(UDP_PACKETS, "UDP packets") \
58 _(ICMP_PACKETS, "ICMP packets") \
59 _(OTHER_PACKETS, "other protocol packets") \
60 _(FRAGMENTS, "fragments") \
61 _(CACHED_FRAGMENTS, "cached fragments") \
62 _(PROCESSED_FRAGMENTS, "processed fragments")
67 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
68 foreach_nat64_in2out_error
71 } nat64_in2out_error_t;
73 static char *nat64_in2out_error_strings[] = {
74 #define _(sym,string) string,
75 foreach_nat64_in2out_error
81 NAT64_IN2OUT_NEXT_IP4_LOOKUP,
82 NAT64_IN2OUT_NEXT_IP6_LOOKUP,
83 NAT64_IN2OUT_NEXT_DROP,
84 NAT64_IN2OUT_NEXT_SLOWPATH,
86 } nat64_in2out_next_t;
88 typedef struct nat64_in2out_set_ctx_t_
93 } nat64_in2out_set_ctx_t;
96 nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr)
99 ip6_main_t *im6 = &ip6_main;
100 ip_lookup_main_t *lm6 = &im6->lookup_main;
101 ip_interface_address_t *ia = 0;
104 foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
106 addr = ip_interface_address_get_address (lm6, ia);
107 if (0 == ip6_address_compare (addr, &ip6_addr))
116 * @brief Check whether is a hairpinning.
118 * If the destination IP address of the packet is an IPv4 address assigned to
119 * the NAT64 itself, then the packet is a hairpin packet.
121 * param dst_addr Destination address of the packet.
123 * @returns 1 if hairpinning, otherwise 0.
125 static_always_inline int
126 is_hairpinning (ip6_address_t * dst_addr)
128 nat64_main_t *nm = &nat64_main;
131 for (i = 0; i < vec_len (nm->addr_pool); i++)
133 if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
141 nat64_in2out_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p, u16 l4_offset,
142 u16 frag_hdr_offset, nat64_in2out_set_ctx_t * ctx)
150 nat64_main_t *nm = &nat64_main;
151 nat64_db_bib_entry_t *bibe;
152 nat64_db_st_entry_t *ste;
153 ip46_address_t old_saddr, old_daddr;
154 ip4_address_t new_daddr;
155 u32 sw_if_index, fib_index;
156 u8 proto = vnet_buffer (p)->ip.reass.ip_proto;
157 u16 sport = vnet_buffer (p)->ip.reass.l4_src_port;
158 u16 dport = vnet_buffer (p)->ip.reass.l4_dst_port;
159 nat64_db_t *db = &nm->db[ctx->thread_index];
161 ip6 = vlib_buffer_get_current (p);
163 vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
164 ip4 = vlib_buffer_get_current (p);
166 u32 ip_version_traffic_class_and_flow_label =
167 ip6->ip_version_traffic_class_and_flow_label;
168 u16 payload_length = ip6->payload_length;
169 u8 hop_limit = ip6->hop_limit;
171 old_saddr.as_u64[0] = ip6->src_address.as_u64[0];
172 old_saddr.as_u64[1] = ip6->src_address.as_u64[1];
173 old_daddr.as_u64[0] = ip6->dst_address.as_u64[0];
174 old_daddr.as_u64[1] = ip6->dst_address.as_u64[1];
176 if (PREDICT_FALSE (frag_hdr_offset))
178 //Only the first fragment
179 ip6_frag_hdr_t *hdr =
180 (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
181 fragment_id = frag_id_6to4 (hdr->identification);
182 frag_more = ip6_frag_hdr_more (hdr);
183 frag_offset = ip6_frag_hdr_offset (hdr);
192 ip4->ip_version_and_header_length =
193 IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
194 ip4->tos = ip6_translate_tos (ip_version_traffic_class_and_flow_label);
196 u16_net_add (payload_length, sizeof (*ip4) + sizeof (*ip6) - l4_offset);
197 ip4->fragment_id = fragment_id;
198 ip4->flags_and_fragment_offset =
199 clib_host_to_net_u16 (frag_offset |
200 (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
201 ip4->ttl = hop_limit;
202 ip4->protocol = (proto == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : proto;
204 sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
206 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
209 nat64_db_st_entry_find (db, &old_saddr, &old_daddr, sport, dport, proto,
214 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
221 nat64_db_bib_entry_find (db, &old_saddr, sport, proto, fib_index, 1);
226 ip4_address_t out_addr;
227 if (nat64_alloc_out_addr_and_port
228 (fib_index, ip_proto_to_nat_proto (proto), &out_addr,
229 &out_port, ctx->thread_index))
233 nat64_db_bib_entry_create (ctx->thread_index, db,
234 &old_saddr.ip6, &out_addr, sport,
235 out_port, fib_index, proto, 0);
239 vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0,
240 db->bib.bib_entries_num);
243 nat64_extract_ip4 (&old_daddr.ip6, &new_daddr, fib_index);
245 nat64_db_st_entry_create (ctx->thread_index, db, bibe,
246 &old_daddr.ip6, &new_daddr, dport);
250 vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
251 db->st.st_entries_num);
254 ip4->src_address.as_u32 = bibe->out_addr.as_u32;
255 ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
257 ip4->checksum = ip4_header_checksum (ip4);
259 if (!vnet_buffer (p)->ip.reass.is_non_first_fragment)
261 udp_header_t *udp = (udp_header_t *) (ip4 + 1);
262 udp->src_port = bibe->out_port;
264 //UDP checksum is optional over IPv4
265 if (proto == IP_PROTOCOL_UDP)
271 tcp_header_t *tcp = (tcp_header_t *) (ip4 + 1);
272 csum = ip_csum_sub_even (tcp->checksum, old_saddr.as_u64[0]);
273 csum = ip_csum_sub_even (csum, old_saddr.as_u64[1]);
274 csum = ip_csum_sub_even (csum, old_daddr.as_u64[0]);
275 csum = ip_csum_sub_even (csum, old_daddr.as_u64[1]);
276 csum = ip_csum_add_even (csum, ip4->dst_address.as_u32);
277 csum = ip_csum_add_even (csum, ip4->src_address.as_u32);
278 csum = ip_csum_sub_even (csum, sport);
279 csum = ip_csum_add_even (csum, udp->src_port);
280 mss_clamping (nm->sm->mss_clamping, tcp, &csum);
281 tcp->checksum = ip_csum_fold (csum);
283 nat64_tcp_session_set_state (ste, tcp, 1);
287 nat64_session_reset_timeout (ste, ctx->vm);
293 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
295 nat64_main_t *nm = &nat64_main;
296 nat64_in2out_set_ctx_t *ctx = arg;
297 nat64_db_bib_entry_t *bibe;
298 nat64_db_st_entry_t *ste;
299 ip46_address_t saddr, daddr;
300 u32 sw_if_index, fib_index;
301 icmp46_header_t *icmp = ip6_next_header (ip6);
302 nat64_db_t *db = &nm->db[ctx->thread_index];
304 sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
306 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
308 saddr.as_u64[0] = ip6->src_address.as_u64[0];
309 saddr.as_u64[1] = ip6->src_address.as_u64[1];
310 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
311 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
313 if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
315 u16 in_id = ((u16 *) (icmp))[2];
317 nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
318 IP_PROTOCOL_ICMP, fib_index, 1);
323 nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
331 nat64_db_bib_entry_find (db, &saddr, in_id,
332 IP_PROTOCOL_ICMP, fib_index, 1);
337 ip4_address_t out_addr;
338 if (nat64_alloc_out_addr_and_port
339 (fib_index, NAT_PROTOCOL_ICMP, &out_addr, &out_id,
344 nat64_db_bib_entry_create (ctx->thread_index, db,
345 &ip6->src_address, &out_addr,
346 in_id, out_id, fib_index,
347 IP_PROTOCOL_ICMP, 0);
351 vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0,
352 db->bib.bib_entries_num);
355 nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
357 nat64_db_st_entry_create (ctx->thread_index, db, bibe,
358 &ip6->dst_address, &daddr.ip4, 0);
362 vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
363 db->st.st_entries_num);
366 nat64_session_reset_timeout (ste, ctx->vm);
368 ip4->src_address.as_u32 = bibe->out_addr.as_u32;
369 ((u16 *) (icmp))[2] = bibe->out_port;
371 ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
375 if (!vec_len (nm->addr_pool))
378 ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
379 nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
386 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
389 nat64_main_t *nm = &nat64_main;
390 nat64_in2out_set_ctx_t *ctx = arg;
391 nat64_db_st_entry_t *ste;
392 nat64_db_bib_entry_t *bibe;
393 ip46_address_t saddr, daddr;
394 u32 sw_if_index, fib_index;
395 u8 proto = ip6->protocol;
396 nat64_db_t *db = &nm->db[ctx->thread_index];
398 sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
400 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
402 saddr.as_u64[0] = ip6->src_address.as_u64[0];
403 saddr.as_u64[1] = ip6->src_address.as_u64[1];
404 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
405 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
407 if (proto == IP_PROTOCOL_ICMP6)
409 icmp46_header_t *icmp = ip6_next_header (ip6);
410 u16 in_id = ((u16 *) (icmp))[2];
411 proto = IP_PROTOCOL_ICMP;
414 (icmp->type == ICMP4_echo_request
415 || icmp->type == ICMP4_echo_reply))
419 nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
424 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
428 ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
429 ((u16 *) (icmp))[2] = bibe->out_port;
430 ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
434 udp_header_t *udp = ip6_next_header (ip6);
435 tcp_header_t *tcp = ip6_next_header (ip6);
439 u16 sport = udp->src_port;
440 u16 dport = udp->dst_port;
443 nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
448 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
452 ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
453 udp->dst_port = bibe->out_port;
454 ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
456 if (proto == IP_PROTOCOL_TCP)
457 checksum = &tcp->checksum;
459 checksum = &udp->checksum;
460 csum = ip_csum_sub_even (*checksum, dport);
461 csum = ip_csum_add_even (csum, udp->dst_port);
462 *checksum = ip_csum_fold (csum);
468 typedef struct unk_proto_st_walk_ctx_t_
470 ip6_address_t src_addr;
471 ip6_address_t dst_addr;
472 ip4_address_t out_addr;
476 } unk_proto_st_walk_ctx_t;
479 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
481 nat64_main_t *nm = &nat64_main;
482 unk_proto_st_walk_ctx_t *ctx = arg;
483 nat64_db_bib_entry_t *bibe;
484 ip46_address_t saddr, daddr;
485 nat64_db_t *db = &nm->db[ctx->thread_index];
487 if (ip6_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
489 bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
493 if (ip6_address_is_equal (&bibe->in_addr, &ctx->src_addr)
494 && bibe->fib_index == ctx->fib_index)
496 clib_memset (&saddr, 0, sizeof (saddr));
497 saddr.ip4.as_u32 = bibe->out_addr.as_u32;
498 clib_memset (&daddr, 0, sizeof (daddr));
499 nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
501 if (nat64_db_st_entry_find
502 (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
505 ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
514 nat64_in2out_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, u8 l4_protocol,
515 u16 l4_offset, u16 frag_hdr_offset,
516 nat64_in2out_set_ctx_t * s_ctx)
524 ip6 = vlib_buffer_get_current (p);
526 ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
528 vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
530 if (PREDICT_FALSE (frag_hdr_offset))
532 //Only the first fragment
533 ip6_frag_hdr_t *hdr =
534 (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
535 fragment_id = frag_id_6to4 (hdr->identification);
536 frag_offset = ip6_frag_hdr_offset (hdr);
537 frag_more = ip6_frag_hdr_more (hdr);
546 nat64_main_t *nm = &nat64_main;
547 nat64_db_bib_entry_t *bibe;
548 nat64_db_st_entry_t *ste;
549 ip46_address_t saddr, daddr, addr;
550 u32 sw_if_index, fib_index;
552 nat64_db_t *db = &nm->db[s_ctx->thread_index];
554 sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
556 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
558 saddr.as_u64[0] = ip6->src_address.as_u64[0];
559 saddr.as_u64[1] = ip6->src_address.as_u64[1];
560 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
561 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
564 nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, l4_protocol, fib_index,
569 bibe = nat64_db_bib_entry_by_index (db, l4_protocol, ste->bibe_index);
576 nat64_db_bib_entry_find (db, &saddr, 0, l4_protocol, fib_index, 1);
580 /* Choose same out address as for TCP/UDP session to same dst */
581 unk_proto_st_walk_ctx_t ctx = {
582 .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
583 .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
584 .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
585 .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
586 .out_addr.as_u32 = 0,
587 .fib_index = fib_index,
588 .proto = l4_protocol,
589 .thread_index = s_ctx->thread_index,
592 nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
594 if (!ctx.out_addr.as_u32)
595 nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
597 /* Verify if out address is not already in use for protocol */
598 clib_memset (&addr, 0, sizeof (addr));
599 addr.ip4.as_u32 = ctx.out_addr.as_u32;
600 if (nat64_db_bib_entry_find (db, &addr, 0, l4_protocol, 0, 0))
601 ctx.out_addr.as_u32 = 0;
603 if (!ctx.out_addr.as_u32)
605 for (i = 0; i < vec_len (nm->addr_pool); i++)
607 addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
608 if (!nat64_db_bib_entry_find
609 (db, &addr, 0, l4_protocol, 0, 0))
614 if (!ctx.out_addr.as_u32)
618 nat64_db_bib_entry_create (s_ctx->thread_index, db,
619 &ip6->src_address, &ctx.out_addr,
620 0, 0, fib_index, l4_protocol, 0);
624 vlib_set_simple_counter (&nm->total_bibs, s_ctx->thread_index, 0,
625 db->bib.bib_entries_num);
628 nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
630 nat64_db_st_entry_create (s_ctx->thread_index, db, bibe,
631 &ip6->dst_address, &daddr.ip4, 0);
635 vlib_set_simple_counter (&nm->total_sessions, s_ctx->thread_index, 0,
636 db->st.st_entries_num);
639 nat64_session_reset_timeout (ste, s_ctx->vm);
641 ip4->src_address.as_u32 = bibe->out_addr.as_u32;
642 ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
644 ip4->ip_version_and_header_length =
645 IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
646 ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label);
647 ip4->length = u16_net_add (ip6->payload_length,
648 sizeof (*ip4) + sizeof (*ip6) - l4_offset);
649 ip4->fragment_id = fragment_id;
650 ip4->flags_and_fragment_offset =
651 clib_host_to_net_u16 (frag_offset |
652 (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
653 ip4->ttl = ip6->hop_limit;
654 ip4->protocol = l4_protocol;
655 ip4->checksum = ip4_header_checksum (ip4);
661 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
662 ip6_header_t * ip6, u32 l4_offset,
665 nat64_main_t *nm = &nat64_main;
666 nat64_db_bib_entry_t *bibe;
667 nat64_db_st_entry_t *ste;
668 ip46_address_t saddr, daddr;
669 u32 sw_if_index, fib_index;
670 udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, l4_offset);
671 tcp_header_t *tcp = (tcp_header_t *) u8_ptr_add (ip6, l4_offset);
672 u8 proto = vnet_buffer (b)->ip.reass.ip_proto;
673 u16 sport = vnet_buffer (b)->ip.reass.l4_src_port;
674 u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port;
675 u16 *checksum = NULL;
677 nat64_db_t *db = &nm->db[thread_index];
679 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
681 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
683 saddr.as_u64[0] = ip6->src_address.as_u64[0];
684 saddr.as_u64[1] = ip6->src_address.as_u64[1];
685 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
686 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
688 if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
690 if (proto == IP_PROTOCOL_UDP)
691 checksum = &udp->checksum;
693 checksum = &tcp->checksum;
694 csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
695 csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
696 csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
697 csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
701 nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
706 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
712 bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
717 ip4_address_t out_addr;
718 if (nat64_alloc_out_addr_and_port
719 (fib_index, ip_proto_to_nat_proto (proto), &out_addr,
720 &out_port, thread_index))
724 nat64_db_bib_entry_create (thread_index, db, &ip6->src_address,
725 &out_addr, sport, out_port, fib_index,
730 vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
731 db->bib.bib_entries_num);
734 nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
736 nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address,
741 vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
742 db->st.st_entries_num);
745 if (proto == IP_PROTOCOL_TCP)
746 nat64_tcp_session_set_state (ste, tcp, 1);
748 nat64_session_reset_timeout (ste, vm);
750 if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
752 udp->src_port = bibe->out_port;
755 nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
757 clib_memset (&daddr, 0, sizeof (daddr));
758 daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
762 vec_foreach (db, nm->db)
764 bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
774 ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
775 ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
777 if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
779 csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
780 csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
781 csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
782 csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
783 csum = ip_csum_sub_even (csum, sport);
784 csum = ip_csum_sub_even (csum, dport);
785 udp->dst_port = bibe->in_port;
786 csum = ip_csum_add_even (csum, udp->src_port);
787 csum = ip_csum_add_even (csum, udp->dst_port);
788 *checksum = ip_csum_fold (csum);
795 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
796 ip6_header_t * ip6, u32 thread_index)
798 nat64_main_t *nm = &nat64_main;
799 nat64_db_bib_entry_t *bibe;
800 nat64_db_st_entry_t *ste;
801 icmp46_header_t *icmp = ip6_next_header (ip6);
802 ip6_header_t *inner_ip6;
803 ip46_address_t saddr, daddr;
804 u32 sw_if_index, fib_index;
808 u16 *checksum, sport, dport;
810 nat64_db_t *db = &nm->db[thread_index];
812 if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
815 inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
817 proto = inner_ip6->protocol;
819 if (proto == IP_PROTOCOL_ICMP6)
822 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
824 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
826 saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
827 saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
828 daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
829 daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
831 udp = ip6_next_header (inner_ip6);
832 tcp = ip6_next_header (inner_ip6);
834 sport = udp->src_port;
835 dport = udp->dst_port;
837 if (proto == IP_PROTOCOL_UDP)
838 checksum = &udp->checksum;
840 checksum = &tcp->checksum;
842 csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
843 csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
844 csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
845 csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
846 csum = ip_csum_sub_even (csum, sport);
847 csum = ip_csum_sub_even (csum, dport);
850 nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
855 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
859 dport = udp->dst_port = bibe->out_port;
860 nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
862 clib_memset (&saddr, 0, sizeof (saddr));
863 clib_memset (&daddr, 0, sizeof (daddr));
864 saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
865 daddr.ip4.as_u32 = bibe->out_addr.as_u32;
869 vec_foreach (db, nm->db)
871 ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
882 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
886 inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
887 inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
888 udp->src_port = bibe->in_port;
890 csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
891 csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
892 csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
893 csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
894 csum = ip_csum_add_even (csum, udp->src_port);
895 csum = ip_csum_add_even (csum, udp->dst_port);
896 *checksum = ip_csum_fold (csum);
898 if (!vec_len (nm->addr_pool))
901 nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
902 ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
903 ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
906 csum = ip_csum_with_carry (0, ip6->payload_length);
907 csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
908 csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
909 csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
910 csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
911 csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
913 ip_incremental_checksum (csum, icmp,
914 clib_net_to_host_u16 (ip6->payload_length));
915 icmp->checksum = ~ip_csum_fold (csum);
921 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
922 ip6_header_t * ip6, u32 thread_index)
924 nat64_main_t *nm = &nat64_main;
925 nat64_db_bib_entry_t *bibe;
926 nat64_db_st_entry_t *ste;
927 ip46_address_t saddr, daddr, addr;
928 u32 sw_if_index, fib_index;
929 u8 proto = ip6->protocol;
931 nat64_db_t *db = &nm->db[thread_index];
933 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
935 fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
937 saddr.as_u64[0] = ip6->src_address.as_u64[0];
938 saddr.as_u64[1] = ip6->src_address.as_u64[1];
939 daddr.as_u64[0] = ip6->dst_address.as_u64[0];
940 daddr.as_u64[1] = ip6->dst_address.as_u64[1];
943 nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
947 bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
953 bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
957 /* Choose same out address as for TCP/UDP session to same dst */
958 unk_proto_st_walk_ctx_t ctx = {
959 .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
960 .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
961 .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
962 .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
963 .out_addr.as_u32 = 0,
964 .fib_index = fib_index,
966 .thread_index = thread_index,
969 nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
971 if (!ctx.out_addr.as_u32)
972 nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
974 /* Verify if out address is not already in use for protocol */
975 clib_memset (&addr, 0, sizeof (addr));
976 addr.ip4.as_u32 = ctx.out_addr.as_u32;
977 if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
978 ctx.out_addr.as_u32 = 0;
980 if (!ctx.out_addr.as_u32)
982 for (i = 0; i < vec_len (nm->addr_pool); i++)
984 addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
985 if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
990 if (!ctx.out_addr.as_u32)
994 nat64_db_bib_entry_create (thread_index, db, &ip6->src_address,
995 &ctx.out_addr, 0, 0, fib_index, proto,
1000 vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
1001 db->bib.bib_entries_num);
1004 nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
1006 nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address,
1011 vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
1012 db->st.st_entries_num);
1015 nat64_session_reset_timeout (ste, vm);
1017 nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
1019 clib_memset (&daddr, 0, sizeof (daddr));
1020 daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1024 vec_foreach (db, nm->db)
1026 bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
1036 ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1037 ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1043 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1044 vlib_frame_t * frame, u8 is_slow_path)
1046 u32 n_left_from, *from, *to_next;
1047 nat64_in2out_next_t next_index;
1048 u32 pkts_processed = 0;
1049 u32 stats_node_index;
1050 u32 thread_index = vm->thread_index;
1051 nat64_main_t *nm = &nat64_main;
1053 u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
1057 is_slow_path ? nm->in2out_slowpath_node_index : nm->in2out_node_index;
1059 from = vlib_frame_vector_args (frame);
1060 n_left_from = frame->n_vectors;
1061 next_index = node->cached_next_index;
1063 while (n_left_from > 0)
1067 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1069 while (n_left_from > 0 && n_left_to_next > 0)
1075 u16 l4_offset0, frag_hdr_offset0;
1078 nat64_in2out_set_ctx_t ctx0;
1081 /* speculatively enqueue b0 to the current next frame */
1087 n_left_to_next -= 1;
1089 b0 = vlib_get_buffer (vm, bi0);
1090 ip60 = vlib_buffer_get_current (b0);
1094 ctx0.thread_index = thread_index;
1096 next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1100 (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1101 &frag_hdr_offset0)))
1103 next0 = NAT64_IN2OUT_NEXT_DROP;
1104 b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1108 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1110 if (nat64_not_translate (sw_if_index0, ip60->dst_address))
1112 next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1116 proto0 = ip_proto_to_nat_proto (l4_protocol0);
1120 if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_OTHER))
1123 if (is_hairpinning (&ip60->dst_address))
1125 next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1126 if (nat64_in2out_unk_proto_hairpinning
1127 (vm, b0, ip60, thread_index))
1129 next0 = NAT64_IN2OUT_NEXT_DROP;
1131 node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1136 if (nat64_in2out_unk_proto
1137 (vm, b0, l4_protocol0, l4_offset0, frag_hdr_offset0,
1140 next0 = NAT64_IN2OUT_NEXT_DROP;
1142 node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1150 if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1152 next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
1157 if (proto0 == NAT_PROTOCOL_ICMP)
1160 if (is_hairpinning (&ip60->dst_address))
1162 next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1163 if (nat64_in2out_icmp_hairpinning
1164 (vm, b0, ip60, thread_index))
1166 next0 = NAT64_IN2OUT_NEXT_DROP;
1168 node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1174 (vm, b0, nat64_in2out_icmp_set_cb, &ctx0,
1175 nat64_in2out_inner_icmp_set_cb, &ctx0))
1177 next0 = NAT64_IN2OUT_NEXT_DROP;
1178 b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1182 else if (proto0 == NAT_PROTOCOL_TCP || proto0 == NAT_PROTOCOL_UDP)
1184 if (proto0 == NAT_PROTOCOL_TCP)
1189 if (is_hairpinning (&ip60->dst_address))
1191 next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1192 if (nat64_in2out_tcp_udp_hairpinning
1193 (vm, b0, ip60, l4_offset0, thread_index))
1195 next0 = NAT64_IN2OUT_NEXT_DROP;
1197 node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1202 if (nat64_in2out_tcp_udp
1203 (vm, b0, l4_offset0, frag_hdr_offset0, &ctx0))
1205 next0 = NAT64_IN2OUT_NEXT_DROP;
1206 b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1212 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1213 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1215 nat64_in2out_trace_t *t =
1216 vlib_add_trace (vm, node, b0, sizeof (*t));
1217 t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1218 t->next_index = next0;
1219 t->is_slow_path = is_slow_path;
1222 pkts_processed += next0 == NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1224 /* verify speculative enqueue, maybe switch current next frame */
1225 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1226 n_left_to_next, bi0, next0);
1228 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1230 vlib_node_increment_counter (vm, stats_node_index,
1231 NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1233 vlib_node_increment_counter (vm, stats_node_index,
1234 NAT64_IN2OUT_ERROR_TCP_PACKETS, tcp_packets);
1235 vlib_node_increment_counter (vm, stats_node_index,
1236 NAT64_IN2OUT_ERROR_UDP_PACKETS, udp_packets);
1237 vlib_node_increment_counter (vm, stats_node_index,
1238 NAT64_IN2OUT_ERROR_ICMP_PACKETS, icmp_packets);
1239 vlib_node_increment_counter (vm, stats_node_index,
1240 NAT64_IN2OUT_ERROR_OTHER_PACKETS,
1242 vlib_node_increment_counter (vm, stats_node_index,
1243 NAT64_IN2OUT_ERROR_FRAGMENTS, fragments);
1245 return frame->n_vectors;
1248 VLIB_NODE_FN (nat64_in2out_node) (vlib_main_t * vm,
1249 vlib_node_runtime_t * node,
1250 vlib_frame_t * frame)
1252 return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1256 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1257 .name = "nat64-in2out",
1258 .vector_size = sizeof (u32),
1259 .format_trace = format_nat64_in2out_trace,
1260 .type = VLIB_NODE_TYPE_INTERNAL,
1261 .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1262 .error_strings = nat64_in2out_error_strings,
1263 .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1264 /* edit / add dispositions here */
1266 [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1267 [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1268 [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1269 [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1274 VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm,
1275 vlib_node_runtime_t * node,
1276 vlib_frame_t * frame)
1278 return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1282 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1283 .name = "nat64-in2out-slowpath",
1284 .vector_size = sizeof (u32),
1285 .format_trace = format_nat64_in2out_trace,
1286 .type = VLIB_NODE_TYPE_INTERNAL,
1287 .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1288 .error_strings = nat64_in2out_error_strings,
1289 .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1290 /* edit / add dispositions here */
1292 [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1293 [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1294 [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1295 [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1300 typedef struct nat64_in2out_frag_set_ctx_t_
1308 } nat64_in2out_frag_set_ctx_t;
1311 #define foreach_nat64_in2out_handoff_error \
1312 _(CONGESTION_DROP, "congestion drop") \
1313 _(SAME_WORKER, "same worker") \
1314 _(DO_HANDOFF, "do handoff")
1318 #define _(sym,str) NAT64_IN2OUT_HANDOFF_ERROR_##sym,
1319 foreach_nat64_in2out_handoff_error
1321 NAT64_IN2OUT_HANDOFF_N_ERROR,
1322 } nat64_in2out_handoff_error_t;
1324 static char *nat64_in2out_handoff_error_strings[] = {
1325 #define _(sym,string) string,
1326 foreach_nat64_in2out_handoff_error
1332 u32 next_worker_index;
1333 } nat64_in2out_handoff_trace_t;
1336 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1338 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1339 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1340 nat64_in2out_handoff_trace_t *t =
1341 va_arg (*args, nat64_in2out_handoff_trace_t *);
1344 format (s, "NAT64-IN2OUT-HANDOFF: next-worker %d", t->next_worker_index);
1349 VLIB_NODE_FN (nat64_in2out_handoff_node) (vlib_main_t * vm,
1350 vlib_node_runtime_t * node,
1351 vlib_frame_t * frame)
1353 nat64_main_t *nm = &nat64_main;
1354 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1355 u32 n_enq, n_left_from, *from;
1356 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1358 u32 thread_index = vm->thread_index;
1359 u32 do_handoff = 0, same_worker = 0;
1361 from = vlib_frame_vector_args (frame);
1362 n_left_from = frame->n_vectors;
1363 vlib_get_buffers (vm, from, bufs, n_left_from);
1366 ti = thread_indices;
1368 fq_index = nm->fq_in2out_index;
1370 while (n_left_from > 0)
1374 ip0 = vlib_buffer_get_current (b[0]);
1375 ti[0] = nat64_get_worker_in2out (&ip0->src_address);
1377 if (ti[0] != thread_index)
1383 ((node->flags & VLIB_NODE_FLAG_TRACE)
1384 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1386 nat64_in2out_handoff_trace_t *t =
1387 vlib_add_trace (vm, node, b[0], sizeof (*t));
1388 t->next_worker_index = ti[0];
1397 vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1398 frame->n_vectors, 1);
1400 if (n_enq < frame->n_vectors)
1401 vlib_node_increment_counter (vm, node->node_index,
1402 NAT64_IN2OUT_HANDOFF_ERROR_CONGESTION_DROP,
1403 frame->n_vectors - n_enq);
1404 vlib_node_increment_counter (vm, node->node_index,
1405 NAT64_IN2OUT_HANDOFF_ERROR_SAME_WORKER,
1407 vlib_node_increment_counter (vm, node->node_index,
1408 NAT64_IN2OUT_HANDOFF_ERROR_DO_HANDOFF,
1411 return frame->n_vectors;
1415 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1416 .name = "nat64-in2out-handoff",
1417 .vector_size = sizeof (u32),
1418 .format_trace = format_nat64_in2out_handoff_trace,
1419 .type = VLIB_NODE_TYPE_INTERNAL,
1420 .n_errors = ARRAY_LEN(nat64_in2out_handoff_error_strings),
1421 .error_strings = nat64_in2out_handoff_error_strings,
1432 * fd.io coding-style-patch-verification: ON
1435 * eval: (c-set-style "gnu")