372931c7e4ba8f90a8b50f8fcf35a42c38e2be38
[vpp.git] / src / plugins / nat / nat64_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/ip/ip6_to_ip4.h>
24 #include <vnet/fib/fib_table.h>
25
26 typedef struct
27 {
28   u32 sw_if_index;
29   u32 next_index;
30   u8 is_slow_path;
31 } nat64_in2out_trace_t;
32
33 static u8 *
34 format_nat64_in2out_trace (u8 * s, va_list * args)
35 {
36   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38   nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
39   char *tag;
40
41   tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
42
43   s =
44     format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
45             t->next_index);
46
47   return s;
48 }
49
50 typedef struct
51 {
52   u32 sw_if_index;
53   u32 next_index;
54   u8 cached;
55 } nat64_in2out_reass_trace_t;
56
57 static u8 *
58 format_nat64_in2out_reass_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   nat64_in2out_reass_trace_t *t =
63     va_arg (*args, nat64_in2out_reass_trace_t *);
64
65   s =
66     format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
67             t->sw_if_index, t->next_index,
68             t->cached ? "cached" : "translated");
69
70   return s;
71 }
72
73 vlib_node_registration_t nat64_in2out_node;
74 vlib_node_registration_t nat64_in2out_slowpath_node;
75 vlib_node_registration_t nat64_in2out_reass_node;
76 vlib_node_registration_t nat64_in2out_handoff_node;
77
78 #define foreach_nat64_in2out_error                       \
79 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
80 _(IN2OUT_PACKETS, "good in2out packets processed")       \
81 _(NO_TRANSLATION, "no translation")                      \
82 _(UNKNOWN, "unknown")                                    \
83 _(DROP_FRAGMENT, "drop fragment")                        \
84 _(MAX_REASS, "maximum reassemblies exceeded")            \
85 _(MAX_FRAG, "maximum fragments per reassembly exceeded") \
86 _(TCP_PACKETS, "TCP packets")                            \
87 _(UDP_PACKETS, "UDP packets")                            \
88 _(ICMP_PACKETS, "ICMP packets")                          \
89 _(OTHER_PACKETS, "other protocol packets")               \
90 _(FRAGMENTS, "fragments")                                \
91 _(CACHED_FRAGMENTS, "cached fragments")                  \
92 _(PROCESSED_FRAGMENTS, "processed fragments")
93
94
95 typedef enum
96 {
97 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
98   foreach_nat64_in2out_error
99 #undef _
100     NAT64_IN2OUT_N_ERROR,
101 } nat64_in2out_error_t;
102
103 static char *nat64_in2out_error_strings[] = {
104 #define _(sym,string) string,
105   foreach_nat64_in2out_error
106 #undef _
107 };
108
109 typedef enum
110 {
111   NAT64_IN2OUT_NEXT_IP4_LOOKUP,
112   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
113   NAT64_IN2OUT_NEXT_DROP,
114   NAT64_IN2OUT_NEXT_SLOWPATH,
115   NAT64_IN2OUT_NEXT_REASS,
116   NAT64_IN2OUT_N_NEXT,
117 } nat64_in2out_next_t;
118
119 typedef struct nat64_in2out_set_ctx_t_
120 {
121   vlib_buffer_t *b;
122   vlib_main_t *vm;
123   u32 thread_index;
124 } nat64_in2out_set_ctx_t;
125
126 static inline u8
127 nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr)
128 {
129   ip6_address_t *addr;
130   ip6_main_t *im6 = &ip6_main;
131   ip_lookup_main_t *lm6 = &im6->lookup_main;
132   ip_interface_address_t *ia = 0;
133
134   /* *INDENT-OFF* */
135   foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
136   ({
137         addr = ip_interface_address_get_address (lm6, ia);
138         if (0 == ip6_address_compare (addr, &ip6_addr))
139                 return 1;
140   }));
141   /* *INDENT-ON* */
142
143   return 0;
144 }
145
146 /**
147  * @brief Check whether is a hairpinning.
148  *
149  * If the destination IP address of the packet is an IPv4 address assigned to
150  * the NAT64 itself, then the packet is a hairpin packet.
151  *
152  * param dst_addr Destination address of the packet.
153  *
154  * @returns 1 if hairpinning, otherwise 0.
155  */
156 static_always_inline int
157 is_hairpinning (ip6_address_t * dst_addr)
158 {
159   nat64_main_t *nm = &nat64_main;
160   int i;
161
162   for (i = 0; i < vec_len (nm->addr_pool); i++)
163     {
164       if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
165         return 1;
166     }
167
168   return 0;
169 }
170
171 static int
172 nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
173                              void *arg)
174 {
175   nat64_main_t *nm = &nat64_main;
176   nat64_in2out_set_ctx_t *ctx = arg;
177   nat64_db_bib_entry_t *bibe;
178   nat64_db_st_entry_t *ste;
179   ip46_address_t saddr, daddr;
180   u32 sw_if_index, fib_index;
181   udp_header_t *udp = ip6_next_header (ip6);
182   u8 proto = ip6->protocol;
183   u16 sport = udp->src_port;
184   u16 dport = udp->dst_port;
185   nat64_db_t *db = &nm->db[ctx->thread_index];
186
187   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
188   fib_index =
189     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
190
191   saddr.as_u64[0] = ip6->src_address.as_u64[0];
192   saddr.as_u64[1] = ip6->src_address.as_u64[1];
193   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
194   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
195
196   ste =
197     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
198                             fib_index, 1);
199
200   if (ste)
201     {
202       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
203       if (!bibe)
204         return -1;
205     }
206   else
207     {
208       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
209
210       if (!bibe)
211         {
212           u16 out_port;
213           ip4_address_t out_addr;
214           if (nat64_alloc_out_addr_and_port
215               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
216                &out_port, ctx->thread_index))
217             return -1;
218
219           bibe =
220             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
221                                        sport, out_port, fib_index, proto, 0);
222           if (!bibe)
223             return -1;
224         }
225
226       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
227       ste =
228         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
229                                   &daddr.ip4, dport);
230       if (!ste)
231         return -1;
232     }
233
234   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
235   udp->src_port = bibe->out_port;
236
237   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
238
239   if (proto == IP_PROTOCOL_TCP)
240     {
241       u16 *checksum;
242       ip_csum_t csum;
243       tcp_header_t *tcp = ip6_next_header (ip6);
244
245       nat64_tcp_session_set_state (ste, tcp, 1);
246       checksum = &tcp->checksum;
247       csum = ip_csum_sub_even (*checksum, sport);
248       csum = ip_csum_add_even (csum, udp->src_port);
249       mss_clamping (nm->sm, tcp, &csum);
250       *checksum = ip_csum_fold (csum);
251     }
252
253   nat64_session_reset_timeout (ste, ctx->vm);
254
255   return 0;
256 }
257
258 static int
259 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
260 {
261   nat64_main_t *nm = &nat64_main;
262   nat64_in2out_set_ctx_t *ctx = arg;
263   nat64_db_bib_entry_t *bibe;
264   nat64_db_st_entry_t *ste;
265   ip46_address_t saddr, daddr;
266   u32 sw_if_index, fib_index;
267   icmp46_header_t *icmp = ip6_next_header (ip6);
268   nat64_db_t *db = &nm->db[ctx->thread_index];
269
270   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
271   fib_index =
272     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
273
274   saddr.as_u64[0] = ip6->src_address.as_u64[0];
275   saddr.as_u64[1] = ip6->src_address.as_u64[1];
276   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
277   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
278
279   if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
280     {
281       u16 in_id = ((u16 *) (icmp))[2];
282       ste =
283         nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
284                                 IP_PROTOCOL_ICMP, fib_index, 1);
285
286       if (ste)
287         {
288           bibe =
289             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
290                                          ste->bibe_index);
291           if (!bibe)
292             return -1;
293         }
294       else
295         {
296           bibe =
297             nat64_db_bib_entry_find (db, &saddr, in_id,
298                                      IP_PROTOCOL_ICMP, fib_index, 1);
299
300           if (!bibe)
301             {
302               u16 out_id;
303               ip4_address_t out_addr;
304               if (nat64_alloc_out_addr_and_port
305                   (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id,
306                    ctx->thread_index))
307                 return -1;
308
309               bibe =
310                 nat64_db_bib_entry_create (db, &ip6->src_address,
311                                            &out_addr, in_id, out_id,
312                                            fib_index, IP_PROTOCOL_ICMP, 0);
313               if (!bibe)
314                 return -1;
315             }
316
317           nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
318           ste =
319             nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
320                                       &daddr.ip4, 0);
321           if (!ste)
322             return -1;
323         }
324
325       nat64_session_reset_timeout (ste, ctx->vm);
326
327       ip4->src_address.as_u32 = bibe->out_addr.as_u32;
328       ((u16 *) (icmp))[2] = bibe->out_port;
329
330       ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
331     }
332   else
333     {
334       if (!vec_len (nm->addr_pool))
335         return -1;
336
337       ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
338       nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
339     }
340
341   return 0;
342 }
343
344 static int
345 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
346                                 void *arg)
347 {
348   nat64_main_t *nm = &nat64_main;
349   nat64_in2out_set_ctx_t *ctx = arg;
350   nat64_db_st_entry_t *ste;
351   nat64_db_bib_entry_t *bibe;
352   ip46_address_t saddr, daddr;
353   u32 sw_if_index, fib_index;
354   u8 proto = ip6->protocol;
355   nat64_db_t *db = &nm->db[ctx->thread_index];
356
357   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
358   fib_index =
359     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
360
361   saddr.as_u64[0] = ip6->src_address.as_u64[0];
362   saddr.as_u64[1] = ip6->src_address.as_u64[1];
363   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
364   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
365
366   if (proto == IP_PROTOCOL_ICMP6)
367     {
368       icmp46_header_t *icmp = ip6_next_header (ip6);
369       u16 in_id = ((u16 *) (icmp))[2];
370       proto = IP_PROTOCOL_ICMP;
371
372       if (!
373           (icmp->type == ICMP4_echo_request
374            || icmp->type == ICMP4_echo_reply))
375         return -1;
376
377       ste =
378         nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
379                                 fib_index, 1);
380       if (!ste)
381         return -1;
382
383       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
384       if (!bibe)
385         return -1;
386
387       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
388       ((u16 *) (icmp))[2] = bibe->out_port;
389       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
390     }
391   else
392     {
393       udp_header_t *udp = ip6_next_header (ip6);
394       tcp_header_t *tcp = ip6_next_header (ip6);
395       u16 *checksum;
396       ip_csum_t csum;
397
398       u16 sport = udp->src_port;
399       u16 dport = udp->dst_port;
400
401       ste =
402         nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
403                                 fib_index, 1);
404       if (!ste)
405         return -1;
406
407       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
408       if (!bibe)
409         return -1;
410
411       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
412       udp->dst_port = bibe->out_port;
413       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
414
415       if (proto == IP_PROTOCOL_TCP)
416         checksum = &tcp->checksum;
417       else
418         checksum = &udp->checksum;
419       csum = ip_csum_sub_even (*checksum, dport);
420       csum = ip_csum_add_even (csum, udp->dst_port);
421       *checksum = ip_csum_fold (csum);
422     }
423
424   return 0;
425 }
426
427 typedef struct unk_proto_st_walk_ctx_t_
428 {
429   ip6_address_t src_addr;
430   ip6_address_t dst_addr;
431   ip4_address_t out_addr;
432   u32 fib_index;
433   u32 thread_index;
434   u8 proto;
435 } unk_proto_st_walk_ctx_t;
436
437 static int
438 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
439 {
440   nat64_main_t *nm = &nat64_main;
441   unk_proto_st_walk_ctx_t *ctx = arg;
442   nat64_db_bib_entry_t *bibe;
443   ip46_address_t saddr, daddr;
444   nat64_db_t *db = &nm->db[ctx->thread_index];
445
446   if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
447     {
448       bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
449       if (!bibe)
450         return -1;
451
452       if (ip46_address_is_equal (&bibe->in_addr, &ctx->src_addr)
453           && bibe->fib_index == ctx->fib_index)
454         {
455           clib_memset (&saddr, 0, sizeof (saddr));
456           saddr.ip4.as_u32 = bibe->out_addr.as_u32;
457           clib_memset (&daddr, 0, sizeof (daddr));
458           nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
459
460           if (nat64_db_st_entry_find
461               (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
462             return -1;
463
464           ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
465           return 1;
466         }
467     }
468
469   return 0;
470 }
471
472 static int
473 nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
474                                void *arg)
475 {
476   nat64_main_t *nm = &nat64_main;
477   nat64_in2out_set_ctx_t *s_ctx = arg;
478   nat64_db_bib_entry_t *bibe;
479   nat64_db_st_entry_t *ste;
480   ip46_address_t saddr, daddr, addr;
481   u32 sw_if_index, fib_index;
482   u8 proto = ip6->protocol;
483   int i;
484   nat64_db_t *db = &nm->db[s_ctx->thread_index];
485
486   sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
487   fib_index =
488     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
489
490   saddr.as_u64[0] = ip6->src_address.as_u64[0];
491   saddr.as_u64[1] = ip6->src_address.as_u64[1];
492   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
493   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
494
495   ste =
496     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
497
498   if (ste)
499     {
500       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
501       if (!bibe)
502         return -1;
503     }
504   else
505     {
506       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
507
508       if (!bibe)
509         {
510           /* Choose same out address as for TCP/UDP session to same dst */
511           unk_proto_st_walk_ctx_t ctx = {
512             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
513             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
514             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
515             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
516             .out_addr.as_u32 = 0,
517             .fib_index = fib_index,
518             .proto = proto,
519             .thread_index = s_ctx->thread_index,
520           };
521
522           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
523
524           if (!ctx.out_addr.as_u32)
525             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
526
527           /* Verify if out address is not already in use for protocol */
528           clib_memset (&addr, 0, sizeof (addr));
529           addr.ip4.as_u32 = ctx.out_addr.as_u32;
530           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
531             ctx.out_addr.as_u32 = 0;
532
533           if (!ctx.out_addr.as_u32)
534             {
535               for (i = 0; i < vec_len (nm->addr_pool); i++)
536                 {
537                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
538                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
539                     break;
540                 }
541             }
542
543           if (!ctx.out_addr.as_u32)
544             return -1;
545
546           bibe =
547             nat64_db_bib_entry_create (db, &ip6->src_address,
548                                        &ctx.out_addr, 0, 0, fib_index, proto,
549                                        0);
550           if (!bibe)
551             return -1;
552         }
553
554       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
555       ste =
556         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
557       if (!ste)
558         return -1;
559     }
560
561   nat64_session_reset_timeout (ste, s_ctx->vm);
562
563   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
564   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
565
566   return 0;
567 }
568
569
570
571 static int
572 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
573                                   ip6_header_t * ip6, u32 thread_index)
574 {
575   nat64_main_t *nm = &nat64_main;
576   nat64_db_bib_entry_t *bibe;
577   nat64_db_st_entry_t *ste;
578   ip46_address_t saddr, daddr;
579   u32 sw_if_index, fib_index;
580   udp_header_t *udp = ip6_next_header (ip6);
581   tcp_header_t *tcp = ip6_next_header (ip6);
582   u8 proto = ip6->protocol;
583   u16 sport = udp->src_port;
584   u16 dport = udp->dst_port;
585   u16 *checksum;
586   ip_csum_t csum;
587   nat64_db_t *db = &nm->db[thread_index];
588
589   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
590   fib_index =
591     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
592
593   saddr.as_u64[0] = ip6->src_address.as_u64[0];
594   saddr.as_u64[1] = ip6->src_address.as_u64[1];
595   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
596   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
597
598   if (proto == IP_PROTOCOL_UDP)
599     checksum = &udp->checksum;
600   else
601     checksum = &tcp->checksum;
602
603   csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
604   csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
605   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
606   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
607   csum = ip_csum_sub_even (csum, sport);
608   csum = ip_csum_sub_even (csum, dport);
609
610   ste =
611     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
612                             fib_index, 1);
613
614   if (ste)
615     {
616       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
617       if (!bibe)
618         return -1;
619     }
620   else
621     {
622       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
623
624       if (!bibe)
625         {
626           u16 out_port;
627           ip4_address_t out_addr;
628           if (nat64_alloc_out_addr_and_port
629               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
630                &out_port, thread_index))
631             return -1;
632
633           bibe =
634             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
635                                        sport, out_port, fib_index, proto, 0);
636           if (!bibe)
637             return -1;
638         }
639
640       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
641       ste =
642         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
643                                   &daddr.ip4, dport);
644       if (!ste)
645         return -1;
646     }
647
648   if (proto == IP_PROTOCOL_TCP)
649     nat64_tcp_session_set_state (ste, tcp, 1);
650
651   nat64_session_reset_timeout (ste, vm);
652
653   sport = udp->src_port = bibe->out_port;
654   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
655
656   clib_memset (&daddr, 0, sizeof (daddr));
657   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
658
659   bibe = 0;
660   /* *INDENT-OFF* */
661   vec_foreach (db, nm->db)
662     {
663       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
664
665       if (bibe)
666         break;
667     }
668   /* *INDENT-ON* */
669
670   if (!bibe)
671     return -1;
672
673   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
674   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
675   udp->dst_port = bibe->in_port;
676
677   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
678   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
679   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
680   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
681   csum = ip_csum_add_even (csum, udp->src_port);
682   csum = ip_csum_add_even (csum, udp->dst_port);
683   *checksum = ip_csum_fold (csum);
684
685   return 0;
686 }
687
688 static int
689 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
690                                ip6_header_t * ip6, u32 thread_index)
691 {
692   nat64_main_t *nm = &nat64_main;
693   nat64_db_bib_entry_t *bibe;
694   nat64_db_st_entry_t *ste;
695   icmp46_header_t *icmp = ip6_next_header (ip6);
696   ip6_header_t *inner_ip6;
697   ip46_address_t saddr, daddr;
698   u32 sw_if_index, fib_index;
699   u8 proto;
700   udp_header_t *udp;
701   tcp_header_t *tcp;
702   u16 *checksum, sport, dport;
703   ip_csum_t csum;
704   nat64_db_t *db = &nm->db[thread_index];
705
706   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
707     return -1;
708
709   inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
710
711   proto = inner_ip6->protocol;
712
713   if (proto == IP_PROTOCOL_ICMP6)
714     return -1;
715
716   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
717   fib_index =
718     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
719
720   saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
721   saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
722   daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
723   daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
724
725   udp = ip6_next_header (inner_ip6);
726   tcp = ip6_next_header (inner_ip6);
727
728   sport = udp->src_port;
729   dport = udp->dst_port;
730
731   if (proto == IP_PROTOCOL_UDP)
732     checksum = &udp->checksum;
733   else
734     checksum = &tcp->checksum;
735
736   csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
737   csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
738   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
739   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
740   csum = ip_csum_sub_even (csum, sport);
741   csum = ip_csum_sub_even (csum, dport);
742
743   ste =
744     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
745                             fib_index, 1);
746   if (!ste)
747     return -1;
748
749   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
750   if (!bibe)
751     return -1;
752
753   dport = udp->dst_port = bibe->out_port;
754   nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
755
756   clib_memset (&saddr, 0, sizeof (saddr));
757   clib_memset (&daddr, 0, sizeof (daddr));
758   saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
759   daddr.ip4.as_u32 = bibe->out_addr.as_u32;
760
761   ste = 0;
762   /* *INDENT-OFF* */
763   vec_foreach (db, nm->db)
764     {
765       ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
766                                     0, 0);
767
768       if (ste)
769         break;
770     }
771   /* *INDENT-ON* */
772
773   if (!ste)
774     return -1;
775
776   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
777   if (!bibe)
778     return -1;
779
780   inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
781   inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
782   udp->src_port = bibe->in_port;
783
784   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
785   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
786   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
787   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
788   csum = ip_csum_add_even (csum, udp->src_port);
789   csum = ip_csum_add_even (csum, udp->dst_port);
790   *checksum = ip_csum_fold (csum);
791
792   if (!vec_len (nm->addr_pool))
793     return -1;
794
795   nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
796   ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
797   ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
798
799   icmp->checksum = 0;
800   csum = ip_csum_with_carry (0, ip6->payload_length);
801   csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
802   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
803   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
804   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
805   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
806   csum =
807     ip_incremental_checksum (csum, icmp,
808                              clib_net_to_host_u16 (ip6->payload_length));
809   icmp->checksum = ~ip_csum_fold (csum);
810
811   return 0;
812 }
813
814 static int
815 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
816                                     ip6_header_t * ip6, u32 thread_index)
817 {
818   nat64_main_t *nm = &nat64_main;
819   nat64_db_bib_entry_t *bibe;
820   nat64_db_st_entry_t *ste;
821   ip46_address_t saddr, daddr, addr;
822   u32 sw_if_index, fib_index;
823   u8 proto = ip6->protocol;
824   int i;
825   nat64_db_t *db = &nm->db[thread_index];
826
827   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
828   fib_index =
829     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
830
831   saddr.as_u64[0] = ip6->src_address.as_u64[0];
832   saddr.as_u64[1] = ip6->src_address.as_u64[1];
833   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
834   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
835
836   ste =
837     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
838
839   if (ste)
840     {
841       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
842       if (!bibe)
843         return -1;
844     }
845   else
846     {
847       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
848
849       if (!bibe)
850         {
851           /* Choose same out address as for TCP/UDP session to same dst */
852           unk_proto_st_walk_ctx_t ctx = {
853             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
854             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
855             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
856             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
857             .out_addr.as_u32 = 0,
858             .fib_index = fib_index,
859             .proto = proto,
860             .thread_index = thread_index,
861           };
862
863           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
864
865           if (!ctx.out_addr.as_u32)
866             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
867
868           /* Verify if out address is not already in use for protocol */
869           clib_memset (&addr, 0, sizeof (addr));
870           addr.ip4.as_u32 = ctx.out_addr.as_u32;
871           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
872             ctx.out_addr.as_u32 = 0;
873
874           if (!ctx.out_addr.as_u32)
875             {
876               for (i = 0; i < vec_len (nm->addr_pool); i++)
877                 {
878                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
879                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
880                     break;
881                 }
882             }
883
884           if (!ctx.out_addr.as_u32)
885             return -1;
886
887           bibe =
888             nat64_db_bib_entry_create (db, &ip6->src_address,
889                                        &ctx.out_addr, 0, 0, fib_index, proto,
890                                        0);
891           if (!bibe)
892             return -1;
893         }
894
895       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
896       ste =
897         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
898       if (!ste)
899         return -1;
900     }
901
902   nat64_session_reset_timeout (ste, vm);
903
904   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
905
906   clib_memset (&daddr, 0, sizeof (daddr));
907   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
908
909   bibe = 0;
910   /* *INDENT-OFF* */
911   vec_foreach (db, nm->db)
912     {
913       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
914
915       if (bibe)
916         break;
917     }
918   /* *INDENT-ON* */
919
920   if (!bibe)
921     return -1;
922
923   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
924   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
925
926   return 0;
927 }
928
929 static inline uword
930 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
931                              vlib_frame_t * frame, u8 is_slow_path)
932 {
933   u32 n_left_from, *from, *to_next;
934   nat64_in2out_next_t next_index;
935   u32 pkts_processed = 0;
936   u32 stats_node_index;
937   u32 thread_index = vm->thread_index;
938   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
939     0, fragments = 0;
940
941   stats_node_index =
942     is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index;
943
944   from = vlib_frame_vector_args (frame);
945   n_left_from = frame->n_vectors;
946   next_index = node->cached_next_index;
947
948   while (n_left_from > 0)
949     {
950       u32 n_left_to_next;
951
952       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
953
954       while (n_left_from > 0 && n_left_to_next > 0)
955         {
956           u32 bi0;
957           vlib_buffer_t *b0;
958           u32 next0;
959           ip6_header_t *ip60;
960           u16 l4_offset0, frag_offset0;
961           u8 l4_protocol0;
962           u32 proto0;
963           nat64_in2out_set_ctx_t ctx0;
964           u32 sw_if_index0;
965
966           /* speculatively enqueue b0 to the current next frame */
967           bi0 = from[0];
968           to_next[0] = bi0;
969           from += 1;
970           to_next += 1;
971           n_left_from -= 1;
972           n_left_to_next -= 1;
973
974           b0 = vlib_get_buffer (vm, bi0);
975           ip60 = vlib_buffer_get_current (b0);
976
977           ctx0.b = b0;
978           ctx0.vm = vm;
979           ctx0.thread_index = thread_index;
980
981           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
982
983           if (PREDICT_FALSE
984               (ip6_parse
985                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
986                 &frag_offset0)))
987             {
988               next0 = NAT64_IN2OUT_NEXT_DROP;
989               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
990               goto trace0;
991             }
992
993           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
994
995           if (nat64_not_translate (sw_if_index0, ip60->dst_address))
996             {
997               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
998               goto trace0;
999             }
1000
1001           proto0 = ip_proto_to_snat_proto (l4_protocol0);
1002
1003           if (is_slow_path)
1004             {
1005               if (PREDICT_TRUE (proto0 == ~0))
1006                 {
1007                   other_packets++;
1008                   if (is_hairpinning (&ip60->dst_address))
1009                     {
1010                       next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1011                       if (nat64_in2out_unk_proto_hairpinning
1012                           (vm, b0, ip60, thread_index))
1013                         {
1014                           next0 = NAT64_IN2OUT_NEXT_DROP;
1015                           b0->error =
1016                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1017                         }
1018                       goto trace0;
1019                     }
1020
1021                   if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0))
1022                     {
1023                       next0 = NAT64_IN2OUT_NEXT_DROP;
1024                       b0->error =
1025                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1026                       goto trace0;
1027                     }
1028                 }
1029               goto trace0;
1030             }
1031           else
1032             {
1033               if (PREDICT_FALSE (proto0 == ~0))
1034                 {
1035                   next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
1036                   goto trace0;
1037                 }
1038             }
1039
1040           if (PREDICT_FALSE
1041               (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
1042             {
1043               next0 = NAT64_IN2OUT_NEXT_REASS;
1044               fragments++;
1045               goto trace0;
1046             }
1047
1048           if (proto0 == SNAT_PROTOCOL_ICMP)
1049             {
1050               icmp_packets++;
1051               if (is_hairpinning (&ip60->dst_address))
1052                 {
1053                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1054                   if (nat64_in2out_icmp_hairpinning
1055                       (vm, b0, ip60, thread_index))
1056                     {
1057                       next0 = NAT64_IN2OUT_NEXT_DROP;
1058                       b0->error =
1059                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1060                     }
1061                   goto trace0;
1062                 }
1063
1064               if (icmp6_to_icmp
1065                   (b0, nat64_in2out_icmp_set_cb, &ctx0,
1066                    nat64_in2out_inner_icmp_set_cb, &ctx0))
1067                 {
1068                   next0 = NAT64_IN2OUT_NEXT_DROP;
1069                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1070                   goto trace0;
1071                 }
1072             }
1073           else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
1074             {
1075               if (proto0 == SNAT_PROTOCOL_TCP)
1076                 tcp_packets++;
1077               else
1078                 udp_packets++;
1079
1080               if (is_hairpinning (&ip60->dst_address))
1081                 {
1082                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1083                   if (nat64_in2out_tcp_udp_hairpinning
1084                       (vm, b0, ip60, thread_index))
1085                     {
1086                       next0 = NAT64_IN2OUT_NEXT_DROP;
1087                       b0->error =
1088                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1089                     }
1090                   goto trace0;
1091                 }
1092
1093               if (ip6_to_ip4_tcp_udp
1094                   (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0))
1095                 {
1096                   next0 = NAT64_IN2OUT_NEXT_DROP;
1097                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1098                   goto trace0;
1099                 }
1100             }
1101
1102         trace0:
1103           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1104                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1105             {
1106               nat64_in2out_trace_t *t =
1107                 vlib_add_trace (vm, node, b0, sizeof (*t));
1108               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1109               t->next_index = next0;
1110               t->is_slow_path = is_slow_path;
1111             }
1112
1113           pkts_processed += next0 == NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1114
1115           /* verify speculative enqueue, maybe switch current next frame */
1116           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1117                                            n_left_to_next, bi0, next0);
1118         }
1119       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1120     }
1121   vlib_node_increment_counter (vm, stats_node_index,
1122                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1123                                pkts_processed);
1124   vlib_node_increment_counter (vm, stats_node_index,
1125                                NAT64_IN2OUT_ERROR_TCP_PACKETS, tcp_packets);
1126   vlib_node_increment_counter (vm, stats_node_index,
1127                                NAT64_IN2OUT_ERROR_UDP_PACKETS, tcp_packets);
1128   vlib_node_increment_counter (vm, stats_node_index,
1129                                NAT64_IN2OUT_ERROR_ICMP_PACKETS, icmp_packets);
1130   vlib_node_increment_counter (vm, stats_node_index,
1131                                NAT64_IN2OUT_ERROR_OTHER_PACKETS,
1132                                other_packets);
1133   vlib_node_increment_counter (vm, stats_node_index,
1134                                NAT64_IN2OUT_ERROR_FRAGMENTS, fragments);
1135
1136   return frame->n_vectors;
1137 }
1138
1139 static uword
1140 nat64_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1141                       vlib_frame_t * frame)
1142 {
1143   return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1144 }
1145
1146 /* *INDENT-OFF* */
1147 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1148   .function = nat64_in2out_node_fn,
1149   .name = "nat64-in2out",
1150   .vector_size = sizeof (u32),
1151   .format_trace = format_nat64_in2out_trace,
1152   .type = VLIB_NODE_TYPE_INTERNAL,
1153   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1154   .error_strings = nat64_in2out_error_strings,
1155   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1156   /* edit / add dispositions here */
1157   .next_nodes = {
1158     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1159     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1160     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1161     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1162     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1163   },
1164 };
1165 /* *INDENT-ON* */
1166
1167 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_node, nat64_in2out_node_fn);
1168
1169 static uword
1170 nat64_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1171                                vlib_frame_t * frame)
1172 {
1173   return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1174 }
1175
1176 /* *INDENT-OFF* */
1177 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1178   .function = nat64_in2out_slowpath_node_fn,
1179   .name = "nat64-in2out-slowpath",
1180   .vector_size = sizeof (u32),
1181   .format_trace = format_nat64_in2out_trace,
1182   .type = VLIB_NODE_TYPE_INTERNAL,
1183   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1184   .error_strings = nat64_in2out_error_strings,
1185   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1186   /* edit / add dispositions here */
1187   .next_nodes = {
1188     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1189     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1190     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1191     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1192     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1193   },
1194 };
1195 /* *INDENT-ON* */
1196
1197 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
1198                               nat64_in2out_slowpath_node_fn);
1199
1200 typedef struct nat64_in2out_frag_set_ctx_t_
1201 {
1202   vlib_main_t *vm;
1203   u32 sess_index;
1204   u32 thread_index;
1205   u16 l4_offset;
1206   u8 proto;
1207   u8 first_frag;
1208 } nat64_in2out_frag_set_ctx_t;
1209
1210 static int
1211 nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
1212 {
1213   nat64_main_t *nm = &nat64_main;
1214   nat64_in2out_frag_set_ctx_t *ctx = arg;
1215   nat64_db_st_entry_t *ste;
1216   nat64_db_bib_entry_t *bibe;
1217   udp_header_t *udp;
1218   nat64_db_t *db = &nm->db[ctx->thread_index];
1219
1220   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1221   if (!ste)
1222     return -1;
1223
1224   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1225   if (!bibe)
1226     return -1;
1227
1228   nat64_session_reset_timeout (ste, ctx->vm);
1229
1230   if (ctx->first_frag)
1231     {
1232       udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1233
1234       if (ctx->proto == IP_PROTOCOL_TCP)
1235         {
1236           u16 *checksum;
1237           ip_csum_t csum;
1238           tcp_header_t *tcp = (tcp_header_t *) udp;
1239
1240           nat64_tcp_session_set_state (ste, tcp, 1);
1241           checksum = &tcp->checksum;
1242           csum = ip_csum_sub_even (*checksum, tcp->src_port);
1243           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
1244           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1245           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1246           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1247           csum = ip_csum_add_even (csum, bibe->out_port);
1248           csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
1249           csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
1250           *checksum = ip_csum_fold (csum);
1251         }
1252
1253       udp->src_port = bibe->out_port;
1254     }
1255
1256   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
1257   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
1258
1259   return 0;
1260 }
1261
1262 static int
1263 nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
1264                                nat64_in2out_frag_set_ctx_t * ctx)
1265 {
1266   nat64_main_t *nm = &nat64_main;
1267   nat64_db_st_entry_t *ste;
1268   nat64_db_bib_entry_t *bibe;
1269   udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1270   tcp_header_t *tcp = (tcp_header_t *) udp;
1271   u16 sport = udp->src_port;
1272   u16 dport = udp->dst_port;
1273   u16 *checksum;
1274   ip_csum_t csum;
1275   ip46_address_t daddr;
1276   nat64_db_t *db = &nm->db[ctx->thread_index];
1277
1278   if (ctx->first_frag)
1279     {
1280       if (ctx->proto == IP_PROTOCOL_UDP)
1281         checksum = &udp->checksum;
1282       else
1283         checksum = &tcp->checksum;
1284
1285       csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
1286       csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1287       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1288       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1289       csum = ip_csum_sub_even (csum, sport);
1290       csum = ip_csum_sub_even (csum, dport);
1291     }
1292
1293   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1294   if (!ste)
1295     return -1;
1296
1297   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1298   if (!bibe)
1299     return -1;
1300
1301   if (ctx->proto == IP_PROTOCOL_TCP)
1302     nat64_tcp_session_set_state (ste, tcp, 1);
1303
1304   nat64_session_reset_timeout (ste, ctx->vm);
1305
1306   sport = bibe->out_port;
1307   dport = ste->r_port;
1308
1309   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
1310
1311   clib_memset (&daddr, 0, sizeof (daddr));
1312   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1313
1314   bibe = 0;
1315   /* *INDENT-OFF* */
1316   vec_foreach (db, nm->db)
1317     {
1318       bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0);
1319
1320       if (bibe)
1321         break;
1322     }
1323   /* *INDENT-ON* */
1324
1325   if (!bibe)
1326     return -1;
1327
1328   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1329   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1330
1331   if (ctx->first_frag)
1332     {
1333       udp->dst_port = bibe->in_port;
1334       udp->src_port = sport;
1335       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
1336       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
1337       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
1338       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
1339       csum = ip_csum_add_even (csum, udp->src_port);
1340       csum = ip_csum_add_even (csum, udp->dst_port);
1341       *checksum = ip_csum_fold (csum);
1342     }
1343
1344   return 0;
1345 }
1346
1347 static uword
1348 nat64_in2out_reass_node_fn (vlib_main_t * vm,
1349                             vlib_node_runtime_t * node, vlib_frame_t * frame)
1350 {
1351   u32 n_left_from, *from, *to_next;
1352   nat64_in2out_next_t next_index;
1353   u32 pkts_processed = 0, cached_fragments = 0;
1354   u32 *fragments_to_drop = 0;
1355   u32 *fragments_to_loopback = 0;
1356   nat64_main_t *nm = &nat64_main;
1357   u32 thread_index = vm->thread_index;
1358
1359   from = vlib_frame_vector_args (frame);
1360   n_left_from = frame->n_vectors;
1361   next_index = node->cached_next_index;
1362
1363   while (n_left_from > 0)
1364     {
1365       u32 n_left_to_next;
1366
1367       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1368
1369       while (n_left_from > 0 && n_left_to_next > 0)
1370         {
1371           u32 bi0;
1372           vlib_buffer_t *b0;
1373           u32 next0;
1374           u8 cached0 = 0;
1375           ip6_header_t *ip60;
1376           u16 l4_offset0, frag_offset0;
1377           u8 l4_protocol0;
1378           nat_reass_ip6_t *reass0;
1379           ip6_frag_hdr_t *frag0;
1380           nat64_db_bib_entry_t *bibe0;
1381           nat64_db_st_entry_t *ste0;
1382           udp_header_t *udp0;
1383           snat_protocol_t proto0;
1384           u32 sw_if_index0, fib_index0;
1385           ip46_address_t saddr0, daddr0;
1386           nat64_in2out_frag_set_ctx_t ctx0;
1387           nat64_db_t *db = &nm->db[thread_index];
1388
1389           /* speculatively enqueue b0 to the current next frame */
1390           bi0 = from[0];
1391           to_next[0] = bi0;
1392           from += 1;
1393           to_next += 1;
1394           n_left_from -= 1;
1395           n_left_to_next -= 1;
1396
1397           b0 = vlib_get_buffer (vm, bi0);
1398           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1399
1400           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1401           fib_index0 =
1402             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
1403                                                  sw_if_index0);
1404
1405           ctx0.thread_index = thread_index;
1406
1407           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
1408             {
1409               next0 = NAT64_IN2OUT_NEXT_DROP;
1410               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1411               goto trace0;
1412             }
1413
1414           ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
1415
1416           if (PREDICT_FALSE
1417               (ip6_parse
1418                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1419                 &frag_offset0)))
1420             {
1421               next0 = NAT64_IN2OUT_NEXT_DROP;
1422               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1423               goto trace0;
1424             }
1425
1426           if (PREDICT_FALSE
1427               (!(l4_protocol0 == IP_PROTOCOL_TCP
1428                  || l4_protocol0 == IP_PROTOCOL_UDP)))
1429             {
1430               next0 = NAT64_IN2OUT_NEXT_DROP;
1431               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1432               goto trace0;
1433             }
1434
1435           udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
1436           frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
1437           proto0 = ip_proto_to_snat_proto (l4_protocol0);
1438
1439           reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
1440                                                  ip60->dst_address,
1441                                                  frag0->identification,
1442                                                  l4_protocol0,
1443                                                  1, &fragments_to_drop);
1444
1445           if (PREDICT_FALSE (!reass0))
1446             {
1447               next0 = NAT64_IN2OUT_NEXT_DROP;
1448               b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
1449               goto trace0;
1450             }
1451
1452           if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
1453             {
1454               ctx0.first_frag = 0;
1455               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
1456                 {
1457                   if (nat_ip6_reass_add_fragment
1458                       (reass0, bi0, &fragments_to_drop))
1459                     {
1460                       b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
1461                       next0 = NAT64_IN2OUT_NEXT_DROP;
1462                       goto trace0;
1463                     }
1464                   cached0 = 1;
1465                   goto trace0;
1466                 }
1467             }
1468           else
1469             {
1470               ctx0.first_frag = 1;
1471
1472               saddr0.as_u64[0] = ip60->src_address.as_u64[0];
1473               saddr0.as_u64[1] = ip60->src_address.as_u64[1];
1474               daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
1475               daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
1476
1477               ste0 =
1478                 nat64_db_st_entry_find (db, &saddr0, &daddr0,
1479                                         udp0->src_port, udp0->dst_port,
1480                                         l4_protocol0, fib_index0, 1);
1481               if (!ste0)
1482                 {
1483                   bibe0 =
1484                     nat64_db_bib_entry_find (db, &saddr0, udp0->src_port,
1485                                              l4_protocol0, fib_index0, 1);
1486                   if (!bibe0)
1487                     {
1488                       u16 out_port0;
1489                       ip4_address_t out_addr0;
1490                       if (nat64_alloc_out_addr_and_port
1491                           (fib_index0, proto0, &out_addr0, &out_port0,
1492                            thread_index))
1493                         {
1494                           next0 = NAT64_IN2OUT_NEXT_DROP;
1495                           b0->error =
1496                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1497                           goto trace0;
1498                         }
1499
1500                       bibe0 =
1501                         nat64_db_bib_entry_create (db,
1502                                                    &ip60->src_address,
1503                                                    &out_addr0, udp0->src_port,
1504                                                    out_port0, fib_index0,
1505                                                    l4_protocol0, 0);
1506                       if (!bibe0)
1507                         {
1508                           next0 = NAT64_IN2OUT_NEXT_DROP;
1509                           b0->error =
1510                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1511                           goto trace0;
1512                         }
1513                     }
1514                   nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
1515                                      fib_index0);
1516                   ste0 =
1517                     nat64_db_st_entry_create (db, bibe0,
1518                                               &ip60->dst_address, &daddr0.ip4,
1519                                               udp0->dst_port);
1520                   if (!ste0)
1521                     {
1522                       next0 = NAT64_IN2OUT_NEXT_DROP;
1523                       b0->error =
1524                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1525                       goto trace0;
1526                     }
1527                 }
1528               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
1529
1530               nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
1531             }
1532
1533           ctx0.sess_index = reass0->sess_index;
1534           ctx0.proto = l4_protocol0;
1535           ctx0.vm = vm;
1536           ctx0.l4_offset = l4_offset0;
1537
1538           if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
1539             {
1540               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1541               if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
1542                 {
1543                   next0 = NAT64_IN2OUT_NEXT_DROP;
1544                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1545                 }
1546               goto trace0;
1547             }
1548           else
1549             {
1550               if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
1551                 {
1552                   next0 = NAT64_IN2OUT_NEXT_DROP;
1553                   b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1554                   goto trace0;
1555                 }
1556             }
1557
1558         trace0:
1559           if (PREDICT_FALSE
1560               ((node->flags & VLIB_NODE_FLAG_TRACE)
1561                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1562             {
1563               nat64_in2out_reass_trace_t *t =
1564                 vlib_add_trace (vm, node, b0, sizeof (*t));
1565               t->cached = cached0;
1566               t->sw_if_index = sw_if_index0;
1567               t->next_index = next0;
1568             }
1569
1570           if (cached0)
1571             {
1572               n_left_to_next++;
1573               to_next--;
1574               cached_fragments++;
1575             }
1576           else
1577             {
1578               pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1579
1580               /* verify speculative enqueue, maybe switch current next frame */
1581               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1582                                                to_next, n_left_to_next,
1583                                                bi0, next0);
1584             }
1585
1586           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1587             {
1588               from = vlib_frame_vector_args (frame);
1589               u32 len = vec_len (fragments_to_loopback);
1590               if (len <= VLIB_FRAME_SIZE)
1591                 {
1592                   clib_memcpy_fast (from, fragments_to_loopback,
1593                                     sizeof (u32) * len);
1594                   n_left_from = len;
1595                   vec_reset_length (fragments_to_loopback);
1596                 }
1597               else
1598                 {
1599                   clib_memcpy_fast (from, fragments_to_loopback +
1600                                     (len - VLIB_FRAME_SIZE),
1601                                     sizeof (u32) * VLIB_FRAME_SIZE);
1602                   n_left_from = VLIB_FRAME_SIZE;
1603                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1604                 }
1605             }
1606         }
1607
1608       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1609     }
1610
1611   vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
1612                                NAT64_IN2OUT_ERROR_PROCESSED_FRAGMENTS,
1613                                pkts_processed);
1614   vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
1615                                NAT64_IN2OUT_ERROR_CACHED_FRAGMENTS,
1616                                cached_fragments);
1617
1618   nat_send_all_to_node (vm, fragments_to_drop, node,
1619                         &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
1620                         NAT64_IN2OUT_NEXT_DROP);
1621
1622   vec_free (fragments_to_drop);
1623   vec_free (fragments_to_loopback);
1624   return frame->n_vectors;
1625 }
1626
1627 /* *INDENT-OFF* */
1628 VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
1629   .function = nat64_in2out_reass_node_fn,
1630   .name = "nat64-in2out-reass",
1631   .vector_size = sizeof (u32),
1632   .format_trace = format_nat64_in2out_reass_trace,
1633   .type = VLIB_NODE_TYPE_INTERNAL,
1634   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1635   .error_strings = nat64_in2out_error_strings,
1636   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1637   /* edit / add dispositions here */
1638   .next_nodes = {
1639     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1640     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1641     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1642     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1643     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1644   },
1645 };
1646 /* *INDENT-ON* */
1647
1648 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node,
1649                               nat64_in2out_reass_node_fn);
1650
1651 #define foreach_nat64_in2out_handoff_error                       \
1652 _(CONGESTION_DROP, "congestion drop")                            \
1653 _(SAME_WORKER, "same worker")                                    \
1654 _(DO_HANDOFF, "do handoff")
1655
1656 typedef enum
1657 {
1658 #define _(sym,str) NAT64_IN2OUT_HANDOFF_ERROR_##sym,
1659   foreach_nat64_in2out_handoff_error
1660 #undef _
1661     NAT64_IN2OUT_HANDOFF_N_ERROR,
1662 } nat64_in2out_handoff_error_t;
1663
1664 static char *nat64_in2out_handoff_error_strings[] = {
1665 #define _(sym,string) string,
1666   foreach_nat64_in2out_handoff_error
1667 #undef _
1668 };
1669
1670 typedef struct
1671 {
1672   u32 next_worker_index;
1673 } nat64_in2out_handoff_trace_t;
1674
1675 static u8 *
1676 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1677 {
1678   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1679   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1680   nat64_in2out_handoff_trace_t *t =
1681     va_arg (*args, nat64_in2out_handoff_trace_t *);
1682
1683   s =
1684     format (s, "NAT64-IN2OUT-HANDOFF: next-worker %d", t->next_worker_index);
1685
1686   return s;
1687 }
1688
1689 static inline uword
1690 nat64_in2out_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1691                               vlib_frame_t * frame)
1692 {
1693   nat64_main_t *nm = &nat64_main;
1694   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1695   u32 n_enq, n_left_from, *from;
1696   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1697   u32 fq_index;
1698   u32 thread_index = vm->thread_index;
1699   u32 do_handoff = 0, same_worker = 0;
1700
1701   from = vlib_frame_vector_args (frame);
1702   n_left_from = frame->n_vectors;
1703   vlib_get_buffers (vm, from, bufs, n_left_from);
1704
1705   b = bufs;
1706   ti = thread_indices;
1707
1708   fq_index = nm->fq_in2out_index;
1709
1710   while (n_left_from > 0)
1711     {
1712       ip6_header_t *ip0;
1713
1714       ip0 = vlib_buffer_get_current (b[0]);
1715       ti[0] = nat64_get_worker_in2out (&ip0->src_address);
1716
1717       if (ti[0] != thread_index)
1718         do_handoff++;
1719       else
1720         same_worker++;
1721
1722       if (PREDICT_FALSE
1723           ((node->flags & VLIB_NODE_FLAG_TRACE)
1724            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1725         {
1726           nat64_in2out_handoff_trace_t *t =
1727             vlib_add_trace (vm, node, b[0], sizeof (*t));
1728           t->next_worker_index = ti[0];
1729         }
1730
1731       n_left_from -= 1;
1732       ti += 1;
1733       b += 1;
1734     }
1735
1736   n_enq =
1737     vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1738                                    frame->n_vectors, 1);
1739
1740   if (n_enq < frame->n_vectors)
1741     vlib_node_increment_counter (vm, node->node_index,
1742                                  NAT64_IN2OUT_HANDOFF_ERROR_CONGESTION_DROP,
1743                                  frame->n_vectors - n_enq);
1744   vlib_node_increment_counter (vm, node->node_index,
1745                                NAT64_IN2OUT_HANDOFF_ERROR_SAME_WORKER,
1746                                same_worker);
1747   vlib_node_increment_counter (vm, node->node_index,
1748                                NAT64_IN2OUT_HANDOFF_ERROR_DO_HANDOFF,
1749                                do_handoff);
1750
1751   return frame->n_vectors;
1752 }
1753
1754 /* *INDENT-OFF* */
1755 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1756   .function = nat64_in2out_handoff_node_fn,
1757   .name = "nat64-in2out-handoff",
1758   .vector_size = sizeof (u32),
1759   .format_trace = format_nat64_in2out_handoff_trace,
1760   .type = VLIB_NODE_TYPE_INTERNAL,
1761   .n_errors = ARRAY_LEN(nat64_in2out_handoff_error_strings),
1762   .error_strings = nat64_in2out_handoff_error_strings,
1763
1764   .n_next_nodes = 1,
1765
1766   .next_nodes = {
1767     [0] = "error-drop",
1768   },
1769 };
1770 /* *INDENT-ON* */
1771
1772 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_handoff_node,
1773                               nat64_in2out_handoff_node_fn);
1774
1775 /*
1776  * fd.io coding-style-patch-verification: ON
1777  *
1778  * Local Variables:
1779  * eval: (c-set-style "gnu")
1780  * End:
1781  */