NAT64: fix TCP session expire (VPP-1390)
[vpp.git] / src / plugins / nat / nat64_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/ip/ip6_to_ip4.h>
24 #include <vnet/fib/fib_table.h>
25
26 typedef struct
27 {
28   u32 sw_if_index;
29   u32 next_index;
30   u8 is_slow_path;
31 } nat64_in2out_trace_t;
32
33 static u8 *
34 format_nat64_in2out_trace (u8 * s, va_list * args)
35 {
36   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38   nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
39   char *tag;
40
41   tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
42
43   s =
44     format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
45             t->next_index);
46
47   return s;
48 }
49
50 typedef struct
51 {
52   u32 sw_if_index;
53   u32 next_index;
54   u8 cached;
55 } nat64_in2out_reass_trace_t;
56
57 static u8 *
58 format_nat64_in2out_reass_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   nat64_in2out_reass_trace_t *t =
63     va_arg (*args, nat64_in2out_reass_trace_t *);
64
65   s =
66     format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
67             t->sw_if_index, t->next_index,
68             t->cached ? "cached" : "translated");
69
70   return s;
71 }
72
73 vlib_node_registration_t nat64_in2out_node;
74 vlib_node_registration_t nat64_in2out_slowpath_node;
75 vlib_node_registration_t nat64_in2out_reass_node;
76 vlib_node_registration_t nat64_in2out_handoff_node;
77
78 #define foreach_nat64_in2out_error                       \
79 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
80 _(IN2OUT_PACKETS, "good in2out packets processed")       \
81 _(NO_TRANSLATION, "no translation")                      \
82 _(UNKNOWN, "unknown")                                    \
83 _(DROP_FRAGMENT, "Drop fragment")                        \
84 _(MAX_REASS, "Maximum reassemblies exceeded")            \
85 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
86
87
88 typedef enum
89 {
90 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
91   foreach_nat64_in2out_error
92 #undef _
93     NAT64_IN2OUT_N_ERROR,
94 } nat64_in2out_error_t;
95
96 static char *nat64_in2out_error_strings[] = {
97 #define _(sym,string) string,
98   foreach_nat64_in2out_error
99 #undef _
100 };
101
102 typedef enum
103 {
104   NAT64_IN2OUT_NEXT_IP4_LOOKUP,
105   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
106   NAT64_IN2OUT_NEXT_DROP,
107   NAT64_IN2OUT_NEXT_SLOWPATH,
108   NAT64_IN2OUT_NEXT_REASS,
109   NAT64_IN2OUT_N_NEXT,
110 } nat64_in2out_next_t;
111
112 typedef struct nat64_in2out_set_ctx_t_
113 {
114   vlib_buffer_t *b;
115   vlib_main_t *vm;
116   u32 thread_index;
117 } nat64_in2out_set_ctx_t;
118
119 static inline u8
120 nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr)
121 {
122   ip6_address_t *addr;
123   ip6_main_t *im6 = &ip6_main;
124   ip_lookup_main_t *lm6 = &im6->lookup_main;
125   ip_interface_address_t *ia = 0;
126
127   /* *INDENT-OFF* */
128   foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
129   ({
130         addr = ip_interface_address_get_address (lm6, ia);
131         if (0 == ip6_address_compare (addr, &ip6_addr))
132                 return 1;
133   }));
134   /* *INDENT-ON* */
135
136   return 0;
137 }
138
139 /**
140  * @brief Check whether is a hairpinning.
141  *
142  * If the destination IP address of the packet is an IPv4 address assigned to
143  * the NAT64 itself, then the packet is a hairpin packet.
144  *
145  * param dst_addr Destination address of the packet.
146  *
147  * @returns 1 if hairpinning, otherwise 0.
148  */
149 static_always_inline int
150 is_hairpinning (ip6_address_t * dst_addr)
151 {
152   nat64_main_t *nm = &nat64_main;
153   int i;
154
155   for (i = 0; i < vec_len (nm->addr_pool); i++)
156     {
157       if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
158         return 1;
159     }
160
161   return 0;
162 }
163
164 static int
165 nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
166                              void *arg)
167 {
168   nat64_main_t *nm = &nat64_main;
169   nat64_in2out_set_ctx_t *ctx = arg;
170   nat64_db_bib_entry_t *bibe;
171   nat64_db_st_entry_t *ste;
172   ip46_address_t saddr, daddr;
173   u32 sw_if_index, fib_index;
174   udp_header_t *udp = ip6_next_header (ip6);
175   u8 proto = ip6->protocol;
176   u16 sport = udp->src_port;
177   u16 dport = udp->dst_port;
178   nat64_db_t *db = &nm->db[ctx->thread_index];
179
180   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
181   fib_index =
182     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
183
184   saddr.as_u64[0] = ip6->src_address.as_u64[0];
185   saddr.as_u64[1] = ip6->src_address.as_u64[1];
186   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
187   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
188
189   ste =
190     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
191                             fib_index, 1);
192
193   if (ste)
194     {
195       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
196       if (!bibe)
197         return -1;
198     }
199   else
200     {
201       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
202
203       if (!bibe)
204         {
205           u16 out_port;
206           ip4_address_t out_addr;
207           if (nat64_alloc_out_addr_and_port
208               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
209                &out_port, ctx->thread_index))
210             return -1;
211
212           bibe =
213             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
214                                        sport, out_port, fib_index, proto, 0);
215           if (!bibe)
216             return -1;
217         }
218
219       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
220       ste =
221         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
222                                   &daddr.ip4, dport);
223       if (!ste)
224         return -1;
225     }
226
227   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
228   udp->src_port = bibe->out_port;
229
230   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
231
232   if (proto == IP_PROTOCOL_TCP)
233     {
234       u16 *checksum;
235       ip_csum_t csum;
236       tcp_header_t *tcp = ip6_next_header (ip6);
237
238       nat64_tcp_session_set_state (ste, tcp, 1);
239       checksum = &tcp->checksum;
240       csum = ip_csum_sub_even (*checksum, sport);
241       csum = ip_csum_add_even (csum, udp->src_port);
242       *checksum = ip_csum_fold (csum);
243     }
244
245   nat64_session_reset_timeout (ste, ctx->vm);
246
247   return 0;
248 }
249
250 static int
251 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
252 {
253   nat64_main_t *nm = &nat64_main;
254   nat64_in2out_set_ctx_t *ctx = arg;
255   nat64_db_bib_entry_t *bibe;
256   nat64_db_st_entry_t *ste;
257   ip46_address_t saddr, daddr;
258   u32 sw_if_index, fib_index;
259   icmp46_header_t *icmp = ip6_next_header (ip6);
260   nat64_db_t *db = &nm->db[ctx->thread_index];
261
262   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
263   fib_index =
264     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
265
266   saddr.as_u64[0] = ip6->src_address.as_u64[0];
267   saddr.as_u64[1] = ip6->src_address.as_u64[1];
268   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
269   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
270
271   if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
272     {
273       u16 in_id = ((u16 *) (icmp))[2];
274       ste =
275         nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
276                                 IP_PROTOCOL_ICMP, fib_index, 1);
277
278       if (ste)
279         {
280           bibe =
281             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
282                                          ste->bibe_index);
283           if (!bibe)
284             return -1;
285         }
286       else
287         {
288           bibe =
289             nat64_db_bib_entry_find (db, &saddr, in_id,
290                                      IP_PROTOCOL_ICMP, fib_index, 1);
291
292           if (!bibe)
293             {
294               u16 out_id;
295               ip4_address_t out_addr;
296               if (nat64_alloc_out_addr_and_port
297                   (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id,
298                    ctx->thread_index))
299                 return -1;
300
301               bibe =
302                 nat64_db_bib_entry_create (db, &ip6->src_address,
303                                            &out_addr, in_id, out_id,
304                                            fib_index, IP_PROTOCOL_ICMP, 0);
305               if (!bibe)
306                 return -1;
307             }
308
309           nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
310           ste =
311             nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
312                                       &daddr.ip4, 0);
313           if (!ste)
314             return -1;
315         }
316
317       nat64_session_reset_timeout (ste, ctx->vm);
318
319       ip4->src_address.as_u32 = bibe->out_addr.as_u32;
320       ((u16 *) (icmp))[2] = bibe->out_port;
321
322       ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
323     }
324   else
325     {
326       if (!vec_len (nm->addr_pool))
327         return -1;
328
329       ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
330       nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
331     }
332
333   return 0;
334 }
335
336 static int
337 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
338                                 void *arg)
339 {
340   nat64_main_t *nm = &nat64_main;
341   nat64_in2out_set_ctx_t *ctx = arg;
342   nat64_db_st_entry_t *ste;
343   nat64_db_bib_entry_t *bibe;
344   ip46_address_t saddr, daddr;
345   u32 sw_if_index, fib_index;
346   u8 proto = ip6->protocol;
347   nat64_db_t *db = &nm->db[ctx->thread_index];
348
349   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
350   fib_index =
351     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
352
353   saddr.as_u64[0] = ip6->src_address.as_u64[0];
354   saddr.as_u64[1] = ip6->src_address.as_u64[1];
355   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
356   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
357
358   if (proto == IP_PROTOCOL_ICMP6)
359     {
360       icmp46_header_t *icmp = ip6_next_header (ip6);
361       u16 in_id = ((u16 *) (icmp))[2];
362       proto = IP_PROTOCOL_ICMP;
363
364       if (!
365           (icmp->type == ICMP4_echo_request
366            || icmp->type == ICMP4_echo_reply))
367         return -1;
368
369       ste =
370         nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
371                                 fib_index, 1);
372       if (!ste)
373         return -1;
374
375       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
376       if (!bibe)
377         return -1;
378
379       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
380       ((u16 *) (icmp))[2] = bibe->out_port;
381       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
382     }
383   else
384     {
385       udp_header_t *udp = ip6_next_header (ip6);
386       tcp_header_t *tcp = ip6_next_header (ip6);
387       u16 *checksum;
388       ip_csum_t csum;
389
390       u16 sport = udp->src_port;
391       u16 dport = udp->dst_port;
392
393       ste =
394         nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
395                                 fib_index, 1);
396       if (!ste)
397         return -1;
398
399       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
400       if (!bibe)
401         return -1;
402
403       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
404       udp->dst_port = bibe->out_port;
405       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
406
407       if (proto == IP_PROTOCOL_TCP)
408         checksum = &tcp->checksum;
409       else
410         checksum = &udp->checksum;
411       csum = ip_csum_sub_even (*checksum, dport);
412       csum = ip_csum_add_even (csum, udp->dst_port);
413       *checksum = ip_csum_fold (csum);
414     }
415
416   return 0;
417 }
418
419 typedef struct unk_proto_st_walk_ctx_t_
420 {
421   ip6_address_t src_addr;
422   ip6_address_t dst_addr;
423   ip4_address_t out_addr;
424   u32 fib_index;
425   u32 thread_index;
426   u8 proto;
427 } unk_proto_st_walk_ctx_t;
428
429 static int
430 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
431 {
432   nat64_main_t *nm = &nat64_main;
433   unk_proto_st_walk_ctx_t *ctx = arg;
434   nat64_db_bib_entry_t *bibe;
435   ip46_address_t saddr, daddr;
436   nat64_db_t *db = &nm->db[ctx->thread_index];
437
438   if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
439     {
440       bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
441       if (!bibe)
442         return -1;
443
444       if (ip46_address_is_equal (&bibe->in_addr, &ctx->src_addr)
445           && bibe->fib_index == ctx->fib_index)
446         {
447           memset (&saddr, 0, sizeof (saddr));
448           saddr.ip4.as_u32 = bibe->out_addr.as_u32;
449           memset (&daddr, 0, sizeof (daddr));
450           nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
451
452           if (nat64_db_st_entry_find
453               (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
454             return -1;
455
456           ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
457           return 1;
458         }
459     }
460
461   return 0;
462 }
463
464 static int
465 nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
466                                void *arg)
467 {
468   nat64_main_t *nm = &nat64_main;
469   nat64_in2out_set_ctx_t *s_ctx = arg;
470   nat64_db_bib_entry_t *bibe;
471   nat64_db_st_entry_t *ste;
472   ip46_address_t saddr, daddr, addr;
473   u32 sw_if_index, fib_index;
474   u8 proto = ip6->protocol;
475   int i;
476   nat64_db_t *db = &nm->db[s_ctx->thread_index];
477
478   sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
479   fib_index =
480     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
481
482   saddr.as_u64[0] = ip6->src_address.as_u64[0];
483   saddr.as_u64[1] = ip6->src_address.as_u64[1];
484   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
485   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
486
487   ste =
488     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
489
490   if (ste)
491     {
492       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
493       if (!bibe)
494         return -1;
495     }
496   else
497     {
498       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
499
500       if (!bibe)
501         {
502           /* Choose same out address as for TCP/UDP session to same dst */
503           unk_proto_st_walk_ctx_t ctx = {
504             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
505             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
506             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
507             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
508             .out_addr.as_u32 = 0,
509             .fib_index = fib_index,
510             .proto = proto,
511             .thread_index = s_ctx->thread_index,
512           };
513
514           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
515
516           if (!ctx.out_addr.as_u32)
517             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
518
519           /* Verify if out address is not already in use for protocol */
520           memset (&addr, 0, sizeof (addr));
521           addr.ip4.as_u32 = ctx.out_addr.as_u32;
522           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
523             ctx.out_addr.as_u32 = 0;
524
525           if (!ctx.out_addr.as_u32)
526             {
527               for (i = 0; i < vec_len (nm->addr_pool); i++)
528                 {
529                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
530                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
531                     break;
532                 }
533             }
534
535           if (!ctx.out_addr.as_u32)
536             return -1;
537
538           bibe =
539             nat64_db_bib_entry_create (db, &ip6->src_address,
540                                        &ctx.out_addr, 0, 0, fib_index, proto,
541                                        0);
542           if (!bibe)
543             return -1;
544         }
545
546       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
547       ste =
548         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
549       if (!ste)
550         return -1;
551     }
552
553   nat64_session_reset_timeout (ste, s_ctx->vm);
554
555   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
556   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
557
558   return 0;
559 }
560
561
562
563 static int
564 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
565                                   ip6_header_t * ip6, u32 thread_index)
566 {
567   nat64_main_t *nm = &nat64_main;
568   nat64_db_bib_entry_t *bibe;
569   nat64_db_st_entry_t *ste;
570   ip46_address_t saddr, daddr;
571   u32 sw_if_index, fib_index;
572   udp_header_t *udp = ip6_next_header (ip6);
573   tcp_header_t *tcp = ip6_next_header (ip6);
574   u8 proto = ip6->protocol;
575   u16 sport = udp->src_port;
576   u16 dport = udp->dst_port;
577   u16 *checksum;
578   ip_csum_t csum;
579   nat64_db_t *db = &nm->db[thread_index];
580
581   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
582   fib_index =
583     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
584
585   saddr.as_u64[0] = ip6->src_address.as_u64[0];
586   saddr.as_u64[1] = ip6->src_address.as_u64[1];
587   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
588   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
589
590   if (proto == IP_PROTOCOL_UDP)
591     checksum = &udp->checksum;
592   else
593     checksum = &tcp->checksum;
594
595   csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
596   csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
597   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
598   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
599   csum = ip_csum_sub_even (csum, sport);
600   csum = ip_csum_sub_even (csum, dport);
601
602   ste =
603     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
604                             fib_index, 1);
605
606   if (ste)
607     {
608       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
609       if (!bibe)
610         return -1;
611     }
612   else
613     {
614       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
615
616       if (!bibe)
617         {
618           u16 out_port;
619           ip4_address_t out_addr;
620           if (nat64_alloc_out_addr_and_port
621               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
622                &out_port, thread_index))
623             return -1;
624
625           bibe =
626             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
627                                        sport, out_port, fib_index, proto, 0);
628           if (!bibe)
629             return -1;
630         }
631
632       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
633       ste =
634         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
635                                   &daddr.ip4, dport);
636       if (!ste)
637         return -1;
638     }
639
640   if (proto == IP_PROTOCOL_TCP)
641     nat64_tcp_session_set_state (ste, tcp, 1);
642
643   nat64_session_reset_timeout (ste, vm);
644
645   sport = udp->src_port = bibe->out_port;
646   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
647
648   memset (&daddr, 0, sizeof (daddr));
649   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
650
651   bibe = 0;
652   /* *INDENT-OFF* */
653   vec_foreach (db, nm->db)
654     {
655       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
656
657       if (bibe)
658         break;
659     }
660   /* *INDENT-ON* */
661
662   if (!bibe)
663     return -1;
664
665   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
666   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
667   udp->dst_port = bibe->in_port;
668
669   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
670   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
671   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
672   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
673   csum = ip_csum_add_even (csum, udp->src_port);
674   csum = ip_csum_add_even (csum, udp->dst_port);
675   *checksum = ip_csum_fold (csum);
676
677   return 0;
678 }
679
680 static int
681 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
682                                ip6_header_t * ip6, u32 thread_index)
683 {
684   nat64_main_t *nm = &nat64_main;
685   nat64_db_bib_entry_t *bibe;
686   nat64_db_st_entry_t *ste;
687   icmp46_header_t *icmp = ip6_next_header (ip6);
688   ip6_header_t *inner_ip6;
689   ip46_address_t saddr, daddr;
690   u32 sw_if_index, fib_index;
691   u8 proto;
692   udp_header_t *udp;
693   tcp_header_t *tcp;
694   u16 *checksum, sport, dport;
695   ip_csum_t csum;
696   nat64_db_t *db = &nm->db[thread_index];
697
698   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
699     return -1;
700
701   inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
702
703   proto = inner_ip6->protocol;
704
705   if (proto == IP_PROTOCOL_ICMP6)
706     return -1;
707
708   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
709   fib_index =
710     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
711
712   saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
713   saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
714   daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
715   daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
716
717   udp = ip6_next_header (inner_ip6);
718   tcp = ip6_next_header (inner_ip6);
719
720   sport = udp->src_port;
721   dport = udp->dst_port;
722
723   if (proto == IP_PROTOCOL_UDP)
724     checksum = &udp->checksum;
725   else
726     checksum = &tcp->checksum;
727
728   csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
729   csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
730   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
731   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
732   csum = ip_csum_sub_even (csum, sport);
733   csum = ip_csum_sub_even (csum, dport);
734
735   ste =
736     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
737                             fib_index, 1);
738   if (!ste)
739     return -1;
740
741   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
742   if (!bibe)
743     return -1;
744
745   dport = udp->dst_port = bibe->out_port;
746   nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
747
748   memset (&saddr, 0, sizeof (saddr));
749   memset (&daddr, 0, sizeof (daddr));
750   saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
751   daddr.ip4.as_u32 = bibe->out_addr.as_u32;
752
753   ste = 0;
754   /* *INDENT-OFF* */
755   vec_foreach (db, nm->db)
756     {
757       ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
758                                     0, 0);
759
760       if (ste)
761         break;
762     }
763   /* *INDENT-ON* */
764
765   if (!ste)
766     return -1;
767
768   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
769   if (!bibe)
770     return -1;
771
772   inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
773   inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
774   udp->src_port = bibe->in_port;
775
776   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
777   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
778   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
779   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
780   csum = ip_csum_add_even (csum, udp->src_port);
781   csum = ip_csum_add_even (csum, udp->dst_port);
782   *checksum = ip_csum_fold (csum);
783
784   if (!vec_len (nm->addr_pool))
785     return -1;
786
787   nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
788   ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
789   ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
790
791   icmp->checksum = 0;
792   csum = ip_csum_with_carry (0, ip6->payload_length);
793   csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
794   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
795   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
796   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
797   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
798   csum =
799     ip_incremental_checksum (csum, icmp,
800                              clib_net_to_host_u16 (ip6->payload_length));
801   icmp->checksum = ~ip_csum_fold (csum);
802
803   return 0;
804 }
805
806 static int
807 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
808                                     ip6_header_t * ip6, u32 thread_index)
809 {
810   nat64_main_t *nm = &nat64_main;
811   nat64_db_bib_entry_t *bibe;
812   nat64_db_st_entry_t *ste;
813   ip46_address_t saddr, daddr, addr;
814   u32 sw_if_index, fib_index;
815   u8 proto = ip6->protocol;
816   int i;
817   nat64_db_t *db = &nm->db[thread_index];
818
819   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
820   fib_index =
821     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
822
823   saddr.as_u64[0] = ip6->src_address.as_u64[0];
824   saddr.as_u64[1] = ip6->src_address.as_u64[1];
825   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
826   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
827
828   ste =
829     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
830
831   if (ste)
832     {
833       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
834       if (!bibe)
835         return -1;
836     }
837   else
838     {
839       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
840
841       if (!bibe)
842         {
843           /* Choose same out address as for TCP/UDP session to same dst */
844           unk_proto_st_walk_ctx_t ctx = {
845             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
846             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
847             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
848             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
849             .out_addr.as_u32 = 0,
850             .fib_index = fib_index,
851             .proto = proto,
852             .thread_index = thread_index,
853           };
854
855           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
856
857           if (!ctx.out_addr.as_u32)
858             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
859
860           /* Verify if out address is not already in use for protocol */
861           memset (&addr, 0, sizeof (addr));
862           addr.ip4.as_u32 = ctx.out_addr.as_u32;
863           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
864             ctx.out_addr.as_u32 = 0;
865
866           if (!ctx.out_addr.as_u32)
867             {
868               for (i = 0; i < vec_len (nm->addr_pool); i++)
869                 {
870                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
871                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
872                     break;
873                 }
874             }
875
876           if (!ctx.out_addr.as_u32)
877             return -1;
878
879           bibe =
880             nat64_db_bib_entry_create (db, &ip6->src_address,
881                                        &ctx.out_addr, 0, 0, fib_index, proto,
882                                        0);
883           if (!bibe)
884             return -1;
885         }
886
887       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
888       ste =
889         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
890       if (!ste)
891         return -1;
892     }
893
894   nat64_session_reset_timeout (ste, vm);
895
896   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
897
898   memset (&daddr, 0, sizeof (daddr));
899   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
900
901   bibe = 0;
902   /* *INDENT-OFF* */
903   vec_foreach (db, nm->db)
904     {
905       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
906
907       if (bibe)
908         break;
909     }
910   /* *INDENT-ON* */
911
912   if (!bibe)
913     return -1;
914
915   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
916   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
917
918   return 0;
919 }
920
921 static inline uword
922 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
923                              vlib_frame_t * frame, u8 is_slow_path)
924 {
925   u32 n_left_from, *from, *to_next;
926   nat64_in2out_next_t next_index;
927   u32 pkts_processed = 0;
928   u32 stats_node_index;
929   u32 thread_index = vm->thread_index;
930
931   stats_node_index =
932     is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index;
933
934   from = vlib_frame_vector_args (frame);
935   n_left_from = frame->n_vectors;
936   next_index = node->cached_next_index;
937
938   while (n_left_from > 0)
939     {
940       u32 n_left_to_next;
941
942       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
943
944       while (n_left_from > 0 && n_left_to_next > 0)
945         {
946           u32 bi0;
947           vlib_buffer_t *b0;
948           u32 next0;
949           ip6_header_t *ip60;
950           u16 l4_offset0, frag_offset0;
951           u8 l4_protocol0;
952           u32 proto0;
953           nat64_in2out_set_ctx_t ctx0;
954           u32 sw_if_index0;
955
956           /* speculatively enqueue b0 to the current next frame */
957           bi0 = from[0];
958           to_next[0] = bi0;
959           from += 1;
960           to_next += 1;
961           n_left_from -= 1;
962           n_left_to_next -= 1;
963
964           b0 = vlib_get_buffer (vm, bi0);
965           ip60 = vlib_buffer_get_current (b0);
966
967           ctx0.b = b0;
968           ctx0.vm = vm;
969           ctx0.thread_index = thread_index;
970
971           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
972
973           if (PREDICT_FALSE
974               (ip6_parse
975                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
976                 &frag_offset0)))
977             {
978               next0 = NAT64_IN2OUT_NEXT_DROP;
979               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
980               goto trace0;
981             }
982
983           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
984
985           if (nat64_not_translate (sw_if_index0, ip60->dst_address))
986             {
987               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
988               goto trace0;
989             }
990
991           proto0 = ip_proto_to_snat_proto (l4_protocol0);
992
993           if (is_slow_path)
994             {
995               if (PREDICT_TRUE (proto0 == ~0))
996                 {
997                   if (is_hairpinning (&ip60->dst_address))
998                     {
999                       next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1000                       if (nat64_in2out_unk_proto_hairpinning
1001                           (vm, b0, ip60, thread_index))
1002                         {
1003                           next0 = NAT64_IN2OUT_NEXT_DROP;
1004                           b0->error =
1005                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1006                         }
1007                       goto trace0;
1008                     }
1009
1010                   if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0))
1011                     {
1012                       next0 = NAT64_IN2OUT_NEXT_DROP;
1013                       b0->error =
1014                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1015                       goto trace0;
1016                     }
1017                 }
1018               goto trace0;
1019             }
1020           else
1021             {
1022               if (PREDICT_FALSE (proto0 == ~0))
1023                 {
1024                   next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
1025                   goto trace0;
1026                 }
1027             }
1028
1029           if (PREDICT_FALSE
1030               (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
1031             {
1032               next0 = NAT64_IN2OUT_NEXT_REASS;
1033               goto trace0;
1034             }
1035
1036           if (proto0 == SNAT_PROTOCOL_ICMP)
1037             {
1038               if (is_hairpinning (&ip60->dst_address))
1039                 {
1040                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1041                   if (nat64_in2out_icmp_hairpinning
1042                       (vm, b0, ip60, thread_index))
1043                     {
1044                       next0 = NAT64_IN2OUT_NEXT_DROP;
1045                       b0->error =
1046                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1047                     }
1048                   goto trace0;
1049                 }
1050
1051               if (icmp6_to_icmp
1052                   (b0, nat64_in2out_icmp_set_cb, &ctx0,
1053                    nat64_in2out_inner_icmp_set_cb, &ctx0))
1054                 {
1055                   next0 = NAT64_IN2OUT_NEXT_DROP;
1056                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1057                   goto trace0;
1058                 }
1059             }
1060           else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
1061             {
1062               if (is_hairpinning (&ip60->dst_address))
1063                 {
1064                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1065                   if (nat64_in2out_tcp_udp_hairpinning
1066                       (vm, b0, ip60, thread_index))
1067                     {
1068                       next0 = NAT64_IN2OUT_NEXT_DROP;
1069                       b0->error =
1070                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1071                     }
1072                   goto trace0;
1073                 }
1074
1075               if (ip6_to_ip4_tcp_udp
1076                   (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0))
1077                 {
1078                   next0 = NAT64_IN2OUT_NEXT_DROP;
1079                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1080                   goto trace0;
1081                 }
1082             }
1083
1084         trace0:
1085           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1086                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1087             {
1088               nat64_in2out_trace_t *t =
1089                 vlib_add_trace (vm, node, b0, sizeof (*t));
1090               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1091               t->next_index = next0;
1092               t->is_slow_path = is_slow_path;
1093             }
1094
1095           pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1096
1097           /* verify speculative enqueue, maybe switch current next frame */
1098           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1099                                            n_left_to_next, bi0, next0);
1100         }
1101       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1102     }
1103   vlib_node_increment_counter (vm, stats_node_index,
1104                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1105                                pkts_processed);
1106   return frame->n_vectors;
1107 }
1108
1109 static uword
1110 nat64_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1111                       vlib_frame_t * frame)
1112 {
1113   return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1114 }
1115
1116 /* *INDENT-OFF* */
1117 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1118   .function = nat64_in2out_node_fn,
1119   .name = "nat64-in2out",
1120   .vector_size = sizeof (u32),
1121   .format_trace = format_nat64_in2out_trace,
1122   .type = VLIB_NODE_TYPE_INTERNAL,
1123   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1124   .error_strings = nat64_in2out_error_strings,
1125   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1126   /* edit / add dispositions here */
1127   .next_nodes = {
1128     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1129     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1130     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1131     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1132     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1133   },
1134 };
1135 /* *INDENT-ON* */
1136
1137 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_node, nat64_in2out_node_fn);
1138
1139 static uword
1140 nat64_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1141                                vlib_frame_t * frame)
1142 {
1143   return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1144 }
1145
1146 /* *INDENT-OFF* */
1147 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1148   .function = nat64_in2out_slowpath_node_fn,
1149   .name = "nat64-in2out-slowpath",
1150   .vector_size = sizeof (u32),
1151   .format_trace = format_nat64_in2out_trace,
1152   .type = VLIB_NODE_TYPE_INTERNAL,
1153   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1154   .error_strings = nat64_in2out_error_strings,
1155   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1156   /* edit / add dispositions here */
1157   .next_nodes = {
1158     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1159     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1160     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1161     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1162     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1163   },
1164 };
1165 /* *INDENT-ON* */
1166
1167 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
1168                               nat64_in2out_slowpath_node_fn);
1169
1170 typedef struct nat64_in2out_frag_set_ctx_t_
1171 {
1172   vlib_main_t *vm;
1173   u32 sess_index;
1174   u32 thread_index;
1175   u16 l4_offset;
1176   u8 proto;
1177   u8 first_frag;
1178 } nat64_in2out_frag_set_ctx_t;
1179
1180 static int
1181 nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
1182 {
1183   nat64_main_t *nm = &nat64_main;
1184   nat64_in2out_frag_set_ctx_t *ctx = arg;
1185   nat64_db_st_entry_t *ste;
1186   nat64_db_bib_entry_t *bibe;
1187   udp_header_t *udp;
1188   nat64_db_t *db = &nm->db[ctx->thread_index];
1189
1190   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1191   if (!ste)
1192     return -1;
1193
1194   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1195   if (!bibe)
1196     return -1;
1197
1198   nat64_session_reset_timeout (ste, ctx->vm);
1199
1200   if (ctx->first_frag)
1201     {
1202       udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1203
1204       if (ctx->proto == IP_PROTOCOL_TCP)
1205         {
1206           u16 *checksum;
1207           ip_csum_t csum;
1208           tcp_header_t *tcp = (tcp_header_t *) udp;
1209
1210           nat64_tcp_session_set_state (ste, tcp, 1);
1211           checksum = &tcp->checksum;
1212           csum = ip_csum_sub_even (*checksum, tcp->src_port);
1213           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
1214           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1215           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1216           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1217           csum = ip_csum_add_even (csum, bibe->out_port);
1218           csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
1219           csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
1220           *checksum = ip_csum_fold (csum);
1221         }
1222
1223       udp->src_port = bibe->out_port;
1224     }
1225
1226   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
1227   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
1228
1229   return 0;
1230 }
1231
1232 static int
1233 nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
1234                                nat64_in2out_frag_set_ctx_t * ctx)
1235 {
1236   nat64_main_t *nm = &nat64_main;
1237   nat64_db_st_entry_t *ste;
1238   nat64_db_bib_entry_t *bibe;
1239   udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1240   tcp_header_t *tcp = (tcp_header_t *) udp;
1241   u16 sport = udp->src_port;
1242   u16 dport = udp->dst_port;
1243   u16 *checksum;
1244   ip_csum_t csum;
1245   ip46_address_t daddr;
1246   nat64_db_t *db = &nm->db[ctx->thread_index];
1247
1248   if (ctx->first_frag)
1249     {
1250       if (ctx->proto == IP_PROTOCOL_UDP)
1251         checksum = &udp->checksum;
1252       else
1253         checksum = &tcp->checksum;
1254
1255       csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
1256       csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1257       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1258       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1259       csum = ip_csum_sub_even (csum, sport);
1260       csum = ip_csum_sub_even (csum, dport);
1261     }
1262
1263   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1264   if (!ste)
1265     return -1;
1266
1267   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1268   if (!bibe)
1269     return -1;
1270
1271   if (ctx->proto == IP_PROTOCOL_TCP)
1272     nat64_tcp_session_set_state (ste, tcp, 1);
1273
1274   nat64_session_reset_timeout (ste, ctx->vm);
1275
1276   sport = bibe->out_port;
1277   dport = ste->r_port;
1278
1279   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
1280
1281   memset (&daddr, 0, sizeof (daddr));
1282   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1283
1284   bibe = 0;
1285   /* *INDENT-OFF* */
1286   vec_foreach (db, nm->db)
1287     {
1288       bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0);
1289
1290       if (bibe)
1291         break;
1292     }
1293   /* *INDENT-ON* */
1294
1295   if (!bibe)
1296     return -1;
1297
1298   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1299   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1300
1301   if (ctx->first_frag)
1302     {
1303       udp->dst_port = bibe->in_port;
1304       udp->src_port = sport;
1305       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
1306       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
1307       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
1308       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
1309       csum = ip_csum_add_even (csum, udp->src_port);
1310       csum = ip_csum_add_even (csum, udp->dst_port);
1311       *checksum = ip_csum_fold (csum);
1312     }
1313
1314   return 0;
1315 }
1316
1317 static uword
1318 nat64_in2out_reass_node_fn (vlib_main_t * vm,
1319                             vlib_node_runtime_t * node, vlib_frame_t * frame)
1320 {
1321   u32 n_left_from, *from, *to_next;
1322   nat64_in2out_next_t next_index;
1323   u32 pkts_processed = 0;
1324   u32 *fragments_to_drop = 0;
1325   u32 *fragments_to_loopback = 0;
1326   nat64_main_t *nm = &nat64_main;
1327   u32 thread_index = vm->thread_index;
1328
1329   from = vlib_frame_vector_args (frame);
1330   n_left_from = frame->n_vectors;
1331   next_index = node->cached_next_index;
1332
1333   while (n_left_from > 0)
1334     {
1335       u32 n_left_to_next;
1336
1337       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1338
1339       while (n_left_from > 0 && n_left_to_next > 0)
1340         {
1341           u32 bi0;
1342           vlib_buffer_t *b0;
1343           u32 next0;
1344           u8 cached0 = 0;
1345           ip6_header_t *ip60;
1346           u16 l4_offset0, frag_offset0;
1347           u8 l4_protocol0;
1348           nat_reass_ip6_t *reass0;
1349           ip6_frag_hdr_t *frag0;
1350           nat64_db_bib_entry_t *bibe0;
1351           nat64_db_st_entry_t *ste0;
1352           udp_header_t *udp0;
1353           snat_protocol_t proto0;
1354           u32 sw_if_index0, fib_index0;
1355           ip46_address_t saddr0, daddr0;
1356           nat64_in2out_frag_set_ctx_t ctx0;
1357           nat64_db_t *db = &nm->db[thread_index];
1358
1359           /* speculatively enqueue b0 to the current next frame */
1360           bi0 = from[0];
1361           to_next[0] = bi0;
1362           from += 1;
1363           to_next += 1;
1364           n_left_from -= 1;
1365           n_left_to_next -= 1;
1366
1367           b0 = vlib_get_buffer (vm, bi0);
1368           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1369
1370           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1371           fib_index0 =
1372             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
1373                                                  sw_if_index0);
1374
1375           ctx0.thread_index = thread_index;
1376
1377           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
1378             {
1379               next0 = NAT64_IN2OUT_NEXT_DROP;
1380               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1381               goto trace0;
1382             }
1383
1384           ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
1385
1386           if (PREDICT_FALSE
1387               (ip6_parse
1388                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1389                 &frag_offset0)))
1390             {
1391               next0 = NAT64_IN2OUT_NEXT_DROP;
1392               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1393               goto trace0;
1394             }
1395
1396           if (PREDICT_FALSE
1397               (!(l4_protocol0 == IP_PROTOCOL_TCP
1398                  || l4_protocol0 == IP_PROTOCOL_UDP)))
1399             {
1400               next0 = NAT64_IN2OUT_NEXT_DROP;
1401               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1402               goto trace0;
1403             }
1404
1405           udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
1406           frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
1407           proto0 = ip_proto_to_snat_proto (l4_protocol0);
1408
1409           reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
1410                                                  ip60->dst_address,
1411                                                  frag0->identification,
1412                                                  l4_protocol0,
1413                                                  1, &fragments_to_drop);
1414
1415           if (PREDICT_FALSE (!reass0))
1416             {
1417               next0 = NAT64_IN2OUT_NEXT_DROP;
1418               b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
1419               goto trace0;
1420             }
1421
1422           if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
1423             {
1424               ctx0.first_frag = 0;
1425               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
1426                 {
1427                   if (nat_ip6_reass_add_fragment (reass0, bi0))
1428                     {
1429                       b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
1430                       next0 = NAT64_IN2OUT_NEXT_DROP;
1431                       goto trace0;
1432                     }
1433                   cached0 = 1;
1434                   goto trace0;
1435                 }
1436             }
1437           else
1438             {
1439               ctx0.first_frag = 1;
1440
1441               saddr0.as_u64[0] = ip60->src_address.as_u64[0];
1442               saddr0.as_u64[1] = ip60->src_address.as_u64[1];
1443               daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
1444               daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
1445
1446               ste0 =
1447                 nat64_db_st_entry_find (db, &saddr0, &daddr0,
1448                                         udp0->src_port, udp0->dst_port,
1449                                         l4_protocol0, fib_index0, 1);
1450               if (!ste0)
1451                 {
1452                   bibe0 =
1453                     nat64_db_bib_entry_find (db, &saddr0, udp0->src_port,
1454                                              l4_protocol0, fib_index0, 1);
1455                   if (!bibe0)
1456                     {
1457                       u16 out_port0;
1458                       ip4_address_t out_addr0;
1459                       if (nat64_alloc_out_addr_and_port
1460                           (fib_index0, proto0, &out_addr0, &out_port0,
1461                            thread_index))
1462                         {
1463                           next0 = NAT64_IN2OUT_NEXT_DROP;
1464                           b0->error =
1465                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1466                           goto trace0;
1467                         }
1468
1469                       bibe0 =
1470                         nat64_db_bib_entry_create (db,
1471                                                    &ip60->src_address,
1472                                                    &out_addr0, udp0->src_port,
1473                                                    out_port0, fib_index0,
1474                                                    l4_protocol0, 0);
1475                       if (!bibe0)
1476                         {
1477                           next0 = NAT64_IN2OUT_NEXT_DROP;
1478                           b0->error =
1479                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1480                           goto trace0;
1481                         }
1482                     }
1483                   nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
1484                                      fib_index0);
1485                   ste0 =
1486                     nat64_db_st_entry_create (db, bibe0,
1487                                               &ip60->dst_address, &daddr0.ip4,
1488                                               udp0->dst_port);
1489                   if (!ste0)
1490                     {
1491                       next0 = NAT64_IN2OUT_NEXT_DROP;
1492                       b0->error =
1493                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1494                       goto trace0;
1495                     }
1496                 }
1497               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
1498
1499               nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
1500             }
1501
1502           ctx0.sess_index = reass0->sess_index;
1503           ctx0.proto = l4_protocol0;
1504           ctx0.vm = vm;
1505           ctx0.l4_offset = l4_offset0;
1506
1507           if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
1508             {
1509               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1510               if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
1511                 {
1512                   next0 = NAT64_IN2OUT_NEXT_DROP;
1513                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1514                 }
1515               goto trace0;
1516             }
1517           else
1518             {
1519               if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
1520                 {
1521                   next0 = NAT64_IN2OUT_NEXT_DROP;
1522                   b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1523                   goto trace0;
1524                 }
1525             }
1526
1527         trace0:
1528           if (PREDICT_FALSE
1529               ((node->flags & VLIB_NODE_FLAG_TRACE)
1530                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1531             {
1532               nat64_in2out_reass_trace_t *t =
1533                 vlib_add_trace (vm, node, b0, sizeof (*t));
1534               t->cached = cached0;
1535               t->sw_if_index = sw_if_index0;
1536               t->next_index = next0;
1537             }
1538
1539           if (cached0)
1540             {
1541               n_left_to_next++;
1542               to_next--;
1543             }
1544           else
1545             {
1546               pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1547
1548               /* verify speculative enqueue, maybe switch current next frame */
1549               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1550                                                to_next, n_left_to_next,
1551                                                bi0, next0);
1552             }
1553
1554           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1555             {
1556               from = vlib_frame_vector_args (frame);
1557               u32 len = vec_len (fragments_to_loopback);
1558               if (len <= VLIB_FRAME_SIZE)
1559                 {
1560                   clib_memcpy (from, fragments_to_loopback,
1561                                sizeof (u32) * len);
1562                   n_left_from = len;
1563                   vec_reset_length (fragments_to_loopback);
1564                 }
1565               else
1566                 {
1567                   clib_memcpy (from,
1568                                fragments_to_loopback + (len -
1569                                                         VLIB_FRAME_SIZE),
1570                                sizeof (u32) * VLIB_FRAME_SIZE);
1571                   n_left_from = VLIB_FRAME_SIZE;
1572                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1573                 }
1574             }
1575         }
1576
1577       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1578     }
1579
1580   vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
1581                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1582                                pkts_processed);
1583
1584   nat_send_all_to_node (vm, fragments_to_drop, node,
1585                         &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
1586                         NAT64_IN2OUT_NEXT_DROP);
1587
1588   vec_free (fragments_to_drop);
1589   vec_free (fragments_to_loopback);
1590   return frame->n_vectors;
1591 }
1592
1593 /* *INDENT-OFF* */
1594 VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
1595   .function = nat64_in2out_reass_node_fn,
1596   .name = "nat64-in2out-reass",
1597   .vector_size = sizeof (u32),
1598   .format_trace = format_nat64_in2out_reass_trace,
1599   .type = VLIB_NODE_TYPE_INTERNAL,
1600   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1601   .error_strings = nat64_in2out_error_strings,
1602   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1603   /* edit / add dispositions here */
1604   .next_nodes = {
1605     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1606     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1607     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1608     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1609     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1610   },
1611 };
1612 /* *INDENT-ON* */
1613
1614 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node,
1615                               nat64_in2out_reass_node_fn);
1616
1617 typedef struct
1618 {
1619   u32 next_worker_index;
1620   u8 do_handoff;
1621 } nat64_in2out_handoff_trace_t;
1622
1623 static u8 *
1624 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1625 {
1626   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1627   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1628   nat64_in2out_handoff_trace_t *t =
1629     va_arg (*args, nat64_in2out_handoff_trace_t *);
1630   char *m;
1631
1632   m = t->do_handoff ? "next worker" : "same worker";
1633   s = format (s, "NAT64-IN2OUT-HANDOFF: %s %d", m, t->next_worker_index);
1634
1635   return s;
1636 }
1637
1638 static inline uword
1639 nat64_in2out_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1640                               vlib_frame_t * frame)
1641 {
1642   nat64_main_t *nm = &nat64_main;
1643   vlib_thread_main_t *tm = vlib_get_thread_main ();
1644   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
1645   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1646   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1647     = 0;
1648   vlib_frame_queue_elt_t *hf = 0;
1649   vlib_frame_queue_t *fq;
1650   vlib_frame_t *f = 0, *d = 0;
1651   int i;
1652   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1653   u32 next_worker_index = 0;
1654   u32 current_worker_index = ~0;
1655   u32 thread_index = vm->thread_index;
1656   u32 fq_index;
1657   u32 to_node_index;
1658
1659   fq_index = nm->fq_in2out_index;
1660   to_node_index = nat64_in2out_node.index;
1661
1662   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1663     {
1664       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1665
1666       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1667                                tm->n_vlib_mains - 1,
1668                                (vlib_frame_queue_t *) (~0));
1669     }
1670
1671   from = vlib_frame_vector_args (frame);
1672   n_left_from = frame->n_vectors;
1673
1674   while (n_left_from > 0)
1675     {
1676       u32 bi0;
1677       vlib_buffer_t *b0;
1678       ip6_header_t *ip0;
1679       u8 do_handoff;
1680
1681       bi0 = from[0];
1682       from += 1;
1683       n_left_from -= 1;
1684
1685       b0 = vlib_get_buffer (vm, bi0);
1686
1687       ip0 = vlib_buffer_get_current (b0);
1688
1689       next_worker_index = nat64_get_worker_in2out (&ip0->src_address);
1690
1691       if (PREDICT_FALSE (next_worker_index != thread_index))
1692         {
1693           do_handoff = 1;
1694
1695           if (next_worker_index != current_worker_index)
1696             {
1697               fq =
1698                 is_vlib_frame_queue_congested (fq_index, next_worker_index,
1699                                                30,
1700                                                congested_handoff_queue_by_worker_index);
1701
1702               if (fq)
1703                 {
1704                   /* if this is 1st frame */
1705                   if (!d)
1706                     {
1707                       d = vlib_get_frame_to_node (vm, nm->error_node_index);
1708                       to_next_drop = vlib_frame_vector_args (d);
1709                     }
1710
1711                   to_next_drop[0] = bi0;
1712                   to_next_drop += 1;
1713                   d->n_vectors++;
1714                   goto trace0;
1715                 }
1716
1717               if (hf)
1718                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1719
1720               hf =
1721                 vlib_get_worker_handoff_queue_elt (fq_index,
1722                                                    next_worker_index,
1723                                                    handoff_queue_elt_by_worker_index);
1724               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1725               to_next_worker = &hf->buffer_index[hf->n_vectors];
1726               current_worker_index = next_worker_index;
1727             }
1728
1729           ASSERT (to_next_worker != 0);
1730
1731           /* enqueue to correct worker thread */
1732           to_next_worker[0] = bi0;
1733           to_next_worker++;
1734           n_left_to_next_worker--;
1735
1736           if (n_left_to_next_worker == 0)
1737             {
1738               hf->n_vectors = VLIB_FRAME_SIZE;
1739               vlib_put_frame_queue_elt (hf);
1740               current_worker_index = ~0;
1741               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1742               hf = 0;
1743             }
1744         }
1745       else
1746         {
1747           do_handoff = 0;
1748           /* if this is 1st frame */
1749           if (!f)
1750             {
1751               f = vlib_get_frame_to_node (vm, to_node_index);
1752               to_next = vlib_frame_vector_args (f);
1753             }
1754
1755           to_next[0] = bi0;
1756           to_next += 1;
1757           f->n_vectors++;
1758         }
1759
1760     trace0:
1761       if (PREDICT_FALSE
1762           ((node->flags & VLIB_NODE_FLAG_TRACE)
1763            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1764         {
1765           nat64_in2out_handoff_trace_t *t =
1766             vlib_add_trace (vm, node, b0, sizeof (*t));
1767           t->next_worker_index = next_worker_index;
1768           t->do_handoff = do_handoff;
1769         }
1770     }
1771
1772   if (f)
1773     vlib_put_frame_to_node (vm, to_node_index, f);
1774
1775   if (d)
1776     vlib_put_frame_to_node (vm, nm->error_node_index, d);
1777
1778   if (hf)
1779     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1780
1781   /* Ship frames to the worker nodes */
1782   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1783     {
1784       if (handoff_queue_elt_by_worker_index[i])
1785         {
1786           hf = handoff_queue_elt_by_worker_index[i];
1787           /*
1788            * It works better to let the handoff node
1789            * rate-adapt, always ship the handoff queue element.
1790            */
1791           if (1 || hf->n_vectors == hf->last_n_vectors)
1792             {
1793               vlib_put_frame_queue_elt (hf);
1794               handoff_queue_elt_by_worker_index[i] = 0;
1795             }
1796           else
1797             hf->last_n_vectors = hf->n_vectors;
1798         }
1799       congested_handoff_queue_by_worker_index[i] =
1800         (vlib_frame_queue_t *) (~0);
1801     }
1802   hf = 0;
1803   current_worker_index = ~0;
1804   return frame->n_vectors;
1805 }
1806
1807 /* *INDENT-OFF* */
1808 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1809   .function = nat64_in2out_handoff_node_fn,
1810   .name = "nat64-in2out-handoff",
1811   .vector_size = sizeof (u32),
1812   .format_trace = format_nat64_in2out_handoff_trace,
1813   .type = VLIB_NODE_TYPE_INTERNAL,
1814
1815   .n_next_nodes = 1,
1816
1817   .next_nodes = {
1818     [0] = "error-drop",
1819   },
1820 };
1821 /* *INDENT-ON* */
1822
1823 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_handoff_node,
1824                               nat64_in2out_handoff_node_fn);
1825
1826 /*
1827  * fd.io coding-style-patch-verification: ON
1828  *
1829  * Local Variables:
1830  * eval: (c-set-style "gnu")
1831  * End:
1832  */