SNAT: IP fragmentation (VPP-890)
[vpp.git] / src / plugins / nat / nat64_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <vnet/ip/ip6_to_ip4.h>
23 #include <vnet/fib/fib_table.h>
24
25 typedef struct
26 {
27   u32 sw_if_index;
28   u32 next_index;
29   u8 is_slow_path;
30 } nat64_in2out_trace_t;
31
32 static u8 *
33 format_nat64_in2out_trace (u8 * s, va_list * args)
34 {
35   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
36   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
37   nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
38   char *tag;
39
40   tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
41
42   s =
43     format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
44             t->next_index);
45
46   return s;
47 }
48
49 typedef struct
50 {
51   u32 sw_if_index;
52   u32 next_index;
53   u8 cached;
54 } nat64_in2out_reass_trace_t;
55
56 static u8 *
57 format_nat64_in2out_reass_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   nat64_in2out_reass_trace_t *t =
62     va_arg (*args, nat64_in2out_reass_trace_t *);
63
64   s =
65     format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
66             t->sw_if_index, t->next_index,
67             t->cached ? "cached" : "translated");
68
69   return s;
70 }
71
72 vlib_node_registration_t nat64_in2out_node;
73 vlib_node_registration_t nat64_in2out_slowpath_node;
74 vlib_node_registration_t nat64_in2out_reass_node;
75
76 #define foreach_nat64_in2out_error                       \
77 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
78 _(IN2OUT_PACKETS, "good in2out packets processed")       \
79 _(NO_TRANSLATION, "no translation")                      \
80 _(UNKNOWN, "unknown")                                    \
81 _(DROP_FRAGMENT, "Drop fragment")                        \
82 _(MAX_REASS, "Maximum reassemblies exceeded")            \
83 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
84
85
86 typedef enum
87 {
88 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
89   foreach_nat64_in2out_error
90 #undef _
91     NAT64_IN2OUT_N_ERROR,
92 } nat64_in2out_error_t;
93
94 static char *nat64_in2out_error_strings[] = {
95 #define _(sym,string) string,
96   foreach_nat64_in2out_error
97 #undef _
98 };
99
100 typedef enum
101 {
102   NAT64_IN2OUT_NEXT_IP4_LOOKUP,
103   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
104   NAT64_IN2OUT_NEXT_DROP,
105   NAT64_IN2OUT_NEXT_SLOWPATH,
106   NAT64_IN2OUT_NEXT_REASS,
107   NAT64_IN2OUT_N_NEXT,
108 } nat64_in2out_next_t;
109
110 typedef struct nat64_in2out_set_ctx_t_
111 {
112   vlib_buffer_t *b;
113   vlib_main_t *vm;
114 } nat64_in2out_set_ctx_t;
115
116 /**
117  * @brief Check whether is a hairpinning.
118  *
119  * If the destination IP address of the packet is an IPv4 address assigned to
120  * the NAT64 itself, then the packet is a hairpin packet.
121  *
122  * param dst_addr Destination address of the packet.
123  *
124  * @returns 1 if hairpinning, otherwise 0.
125  */
126 static_always_inline int
127 is_hairpinning (ip6_address_t * dst_addr)
128 {
129   nat64_main_t *nm = &nat64_main;
130   int i;
131
132   for (i = 0; i < vec_len (nm->addr_pool); i++)
133     {
134       if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
135         return 1;
136     }
137
138   return 0;
139 }
140
141 static int
142 nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
143                              void *arg)
144 {
145   nat64_main_t *nm = &nat64_main;
146   nat64_in2out_set_ctx_t *ctx = arg;
147   nat64_db_bib_entry_t *bibe;
148   nat64_db_st_entry_t *ste;
149   ip46_address_t saddr, daddr;
150   u32 sw_if_index, fib_index;
151   udp_header_t *udp = ip6_next_header (ip6);
152   u8 proto = ip6->protocol;
153   u16 sport = udp->src_port;
154   u16 dport = udp->dst_port;
155
156   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
157   fib_index =
158     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
159
160   saddr.as_u64[0] = ip6->src_address.as_u64[0];
161   saddr.as_u64[1] = ip6->src_address.as_u64[1];
162   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
163   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
164
165   ste =
166     nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto,
167                             fib_index, 1);
168
169   if (ste)
170     {
171       bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
172       if (!bibe)
173         return -1;
174     }
175   else
176     {
177       bibe =
178         nat64_db_bib_entry_find (&nm->db, &saddr, sport, proto, fib_index, 1);
179
180       if (!bibe)
181         {
182           u16 out_port;
183           ip4_address_t out_addr;
184           if (nat64_alloc_out_addr_and_port
185               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
186                &out_port))
187             return -1;
188
189           bibe =
190             nat64_db_bib_entry_create (&nm->db, &ip6->src_address, &out_addr,
191                                        sport, clib_host_to_net_u16 (out_port),
192                                        fib_index, proto, 0);
193           if (!bibe)
194             return -1;
195         }
196
197       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
198       ste =
199         nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
200                                   &daddr.ip4, dport);
201       if (!ste)
202         return -1;
203     }
204
205   nat64_session_reset_timeout (ste, ctx->vm);
206
207   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
208   udp->src_port = bibe->out_port;
209
210   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
211
212   if (proto == IP_PROTOCOL_TCP)
213     {
214       u16 *checksum;
215       ip_csum_t csum;
216       tcp_header_t *tcp = ip6_next_header (ip6);
217
218       checksum = &tcp->checksum;
219       csum = ip_csum_sub_even (*checksum, sport);
220       csum = ip_csum_add_even (csum, udp->src_port);
221       *checksum = ip_csum_fold (csum);
222     }
223
224   return 0;
225 }
226
227 static int
228 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
229 {
230   nat64_main_t *nm = &nat64_main;
231   nat64_in2out_set_ctx_t *ctx = arg;
232   nat64_db_bib_entry_t *bibe;
233   nat64_db_st_entry_t *ste;
234   ip46_address_t saddr, daddr;
235   u32 sw_if_index, fib_index;
236   icmp46_header_t *icmp = ip6_next_header (ip6);
237
238   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
239   fib_index =
240     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
241
242   saddr.as_u64[0] = ip6->src_address.as_u64[0];
243   saddr.as_u64[1] = ip6->src_address.as_u64[1];
244   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
245   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
246
247   if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
248     {
249       u16 in_id = ((u16 *) (icmp))[2];
250       ste =
251         nat64_db_st_entry_find (&nm->db, &saddr, &daddr, in_id, 0,
252                                 IP_PROTOCOL_ICMP, fib_index, 1);
253
254       if (ste)
255         {
256           bibe =
257             nat64_db_bib_entry_by_index (&nm->db, IP_PROTOCOL_ICMP,
258                                          ste->bibe_index);
259           if (!bibe)
260             return -1;
261         }
262       else
263         {
264           bibe =
265             nat64_db_bib_entry_find (&nm->db, &saddr, in_id,
266                                      IP_PROTOCOL_ICMP, fib_index, 1);
267
268           if (!bibe)
269             {
270               u16 out_id;
271               ip4_address_t out_addr;
272               if (nat64_alloc_out_addr_and_port
273                   (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id))
274                 return -1;
275
276               bibe =
277                 nat64_db_bib_entry_create (&nm->db, &ip6->src_address,
278                                            &out_addr, in_id,
279                                            clib_host_to_net_u16 (out_id),
280                                            fib_index, IP_PROTOCOL_ICMP, 0);
281               if (!bibe)
282                 return -1;
283             }
284
285           nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
286           ste =
287             nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
288                                       &daddr.ip4, 0);
289           if (!ste)
290             return -1;
291         }
292
293       nat64_session_reset_timeout (ste, ctx->vm);
294
295       ip4->src_address.as_u32 = bibe->out_addr.as_u32;
296       ((u16 *) (icmp))[2] = bibe->out_port;
297
298       ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
299     }
300   else
301     {
302       if (!vec_len (nm->addr_pool))
303         return -1;
304
305       ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
306       nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
307     }
308
309   return 0;
310 }
311
312 static int
313 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
314                                 void *arg)
315 {
316   nat64_main_t *nm = &nat64_main;
317   nat64_in2out_set_ctx_t *ctx = arg;
318   nat64_db_st_entry_t *ste;
319   nat64_db_bib_entry_t *bibe;
320   ip46_address_t saddr, daddr;
321   u32 sw_if_index, fib_index;
322   u8 proto = ip6->protocol;
323
324   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
325   fib_index =
326     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
327
328   saddr.as_u64[0] = ip6->src_address.as_u64[0];
329   saddr.as_u64[1] = ip6->src_address.as_u64[1];
330   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
331   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
332
333   if (proto == IP_PROTOCOL_ICMP6)
334     {
335       icmp46_header_t *icmp = ip6_next_header (ip6);
336       u16 in_id = ((u16 *) (icmp))[2];
337       proto = IP_PROTOCOL_ICMP;
338
339       if (!
340           (icmp->type == ICMP4_echo_request
341            || icmp->type == ICMP4_echo_reply))
342         return -1;
343
344       ste =
345         nat64_db_st_entry_find (&nm->db, &daddr, &saddr, in_id, 0, proto,
346                                 fib_index, 1);
347       if (!ste)
348         return -1;
349
350       bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
351       if (!bibe)
352         return -1;
353
354       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
355       ((u16 *) (icmp))[2] = bibe->out_port;
356       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
357     }
358   else
359     {
360       udp_header_t *udp = ip6_next_header (ip6);
361       tcp_header_t *tcp = ip6_next_header (ip6);
362       u16 *checksum;
363       ip_csum_t csum;
364
365       u16 sport = udp->src_port;
366       u16 dport = udp->dst_port;
367
368       ste =
369         nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto,
370                                 fib_index, 1);
371       if (!ste)
372         return -1;
373
374       bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
375       if (!bibe)
376         return -1;
377
378       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
379       udp->dst_port = bibe->out_port;
380       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
381
382       if (proto == IP_PROTOCOL_TCP)
383         checksum = &tcp->checksum;
384       else
385         checksum = &udp->checksum;
386       csum = ip_csum_sub_even (*checksum, dport);
387       csum = ip_csum_add_even (csum, udp->dst_port);
388       *checksum = ip_csum_fold (csum);
389     }
390
391   return 0;
392 }
393
394 typedef struct unk_proto_st_walk_ctx_t_
395 {
396   ip6_address_t src_addr;
397   ip6_address_t dst_addr;
398   ip4_address_t out_addr;
399   u32 fib_index;
400   u8 proto;
401 } unk_proto_st_walk_ctx_t;
402
403 static int
404 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
405 {
406   nat64_main_t *nm = &nat64_main;
407   unk_proto_st_walk_ctx_t *ctx = arg;
408   nat64_db_bib_entry_t *bibe;
409   ip46_address_t saddr, daddr;
410
411   if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
412     {
413       bibe =
414         nat64_db_bib_entry_by_index (&nm->db, ste->proto, ste->bibe_index);
415       if (!bibe)
416         return -1;
417
418       if (ip46_address_is_equal (&bibe->in_addr, &ctx->src_addr)
419           && bibe->fib_index == ctx->fib_index)
420         {
421           memset (&saddr, 0, sizeof (saddr));
422           saddr.ip4.as_u32 = bibe->out_addr.as_u32;
423           memset (&daddr, 0, sizeof (daddr));
424           nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
425
426           if (nat64_db_st_entry_find
427               (&nm->db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
428             return -1;
429
430           ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
431           return 1;
432         }
433     }
434
435   return 0;
436 }
437
438 static int
439 nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
440                                void *arg)
441 {
442   nat64_main_t *nm = &nat64_main;
443   nat64_in2out_set_ctx_t *ctx = arg;
444   nat64_db_bib_entry_t *bibe;
445   nat64_db_st_entry_t *ste;
446   ip46_address_t saddr, daddr, addr;
447   u32 sw_if_index, fib_index;
448   u8 proto = ip6->protocol;
449   int i;
450
451   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
452   fib_index =
453     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
454
455   saddr.as_u64[0] = ip6->src_address.as_u64[0];
456   saddr.as_u64[1] = ip6->src_address.as_u64[1];
457   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
458   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
459
460   ste =
461     nat64_db_st_entry_find (&nm->db, &saddr, &daddr, 0, 0, proto, fib_index,
462                             1);
463
464   if (ste)
465     {
466       bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
467       if (!bibe)
468         return -1;
469     }
470   else
471     {
472       bibe =
473         nat64_db_bib_entry_find (&nm->db, &saddr, 0, proto, fib_index, 1);
474
475       if (!bibe)
476         {
477           /* Choose same out address as for TCP/UDP session to same dst */
478           unk_proto_st_walk_ctx_t ctx = {
479             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
480             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
481             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
482             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
483             .out_addr.as_u32 = 0,
484             .fib_index = fib_index,
485             .proto = proto,
486           };
487
488           nat64_db_st_walk (&nm->db, IP_PROTOCOL_TCP, unk_proto_st_walk,
489                             &ctx);
490
491           if (!ctx.out_addr.as_u32)
492             nat64_db_st_walk (&nm->db, IP_PROTOCOL_UDP, unk_proto_st_walk,
493                               &ctx);
494
495           /* Verify if out address is not already in use for protocol */
496           memset (&addr, 0, sizeof (addr));
497           addr.ip4.as_u32 = ctx.out_addr.as_u32;
498           if (nat64_db_bib_entry_find (&nm->db, &addr, 0, proto, 0, 0))
499             ctx.out_addr.as_u32 = 0;
500
501           if (!ctx.out_addr.as_u32)
502             {
503               for (i = 0; i < vec_len (nm->addr_pool); i++)
504                 {
505                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
506                   if (!nat64_db_bib_entry_find
507                       (&nm->db, &addr, 0, proto, 0, 0))
508                     break;
509                 }
510             }
511
512           if (!ctx.out_addr.as_u32)
513             return -1;
514
515           bibe =
516             nat64_db_bib_entry_create (&nm->db, &ip6->src_address,
517                                        &ctx.out_addr, 0, 0, fib_index, proto,
518                                        0);
519           if (!bibe)
520             return -1;
521         }
522
523       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
524       ste =
525         nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
526                                   &daddr.ip4, 0);
527       if (!ste)
528         return -1;
529     }
530
531   nat64_session_reset_timeout (ste, ctx->vm);
532
533   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
534   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
535
536   return 0;
537 }
538
539
540
541 static int
542 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
543                                   ip6_header_t * ip6)
544 {
545   nat64_main_t *nm = &nat64_main;
546   nat64_db_bib_entry_t *bibe;
547   nat64_db_st_entry_t *ste;
548   ip46_address_t saddr, daddr;
549   u32 sw_if_index, fib_index;
550   udp_header_t *udp = ip6_next_header (ip6);
551   tcp_header_t *tcp = ip6_next_header (ip6);
552   u8 proto = ip6->protocol;
553   u16 sport = udp->src_port;
554   u16 dport = udp->dst_port;
555   u16 *checksum;
556   ip_csum_t csum;
557
558   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
559   fib_index =
560     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
561
562   saddr.as_u64[0] = ip6->src_address.as_u64[0];
563   saddr.as_u64[1] = ip6->src_address.as_u64[1];
564   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
565   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
566
567   if (proto == IP_PROTOCOL_UDP)
568     checksum = &udp->checksum;
569   else
570     checksum = &tcp->checksum;
571
572   csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
573   csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
574   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
575   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
576   csum = ip_csum_sub_even (csum, sport);
577   csum = ip_csum_sub_even (csum, dport);
578
579   ste =
580     nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto,
581                             fib_index, 1);
582
583   if (ste)
584     {
585       bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
586       if (!bibe)
587         return -1;
588     }
589   else
590     {
591       bibe =
592         nat64_db_bib_entry_find (&nm->db, &saddr, sport, proto, fib_index, 1);
593
594       if (!bibe)
595         {
596           u16 out_port;
597           ip4_address_t out_addr;
598           if (nat64_alloc_out_addr_and_port
599               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
600                &out_port))
601             return -1;
602
603           bibe =
604             nat64_db_bib_entry_create (&nm->db, &ip6->src_address, &out_addr,
605                                        sport, clib_host_to_net_u16 (out_port),
606                                        fib_index, proto, 0);
607           if (!bibe)
608             return -1;
609         }
610
611       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
612       ste =
613         nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
614                                   &daddr.ip4, dport);
615       if (!ste)
616         return -1;
617     }
618
619   nat64_session_reset_timeout (ste, vm);
620
621   sport = udp->src_port = bibe->out_port;
622   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
623
624   memset (&saddr, 0, sizeof (saddr));
625   memset (&daddr, 0, sizeof (daddr));
626   saddr.ip4.as_u32 = bibe->out_addr.as_u32;
627   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
628
629   ste =
630     nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto, 0,
631                             0);
632
633   if (ste)
634     {
635       bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
636       if (!bibe)
637         return -1;
638     }
639   else
640     {
641       bibe = nat64_db_bib_entry_find (&nm->db, &daddr, dport, proto, 0, 0);
642
643       if (!bibe)
644         return -1;
645
646       ste =
647         nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address,
648                                   &saddr.ip4, sport);
649     }
650
651   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
652   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
653   udp->dst_port = bibe->in_port;
654
655   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
656   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
657   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
658   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
659   csum = ip_csum_add_even (csum, udp->src_port);
660   csum = ip_csum_add_even (csum, udp->dst_port);
661   *checksum = ip_csum_fold (csum);
662
663   return 0;
664 }
665
666 static int
667 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
668                                ip6_header_t * ip6)
669 {
670   nat64_main_t *nm = &nat64_main;
671   nat64_db_bib_entry_t *bibe;
672   nat64_db_st_entry_t *ste;
673   icmp46_header_t *icmp = ip6_next_header (ip6);
674   ip6_header_t *inner_ip6;
675   ip46_address_t saddr, daddr;
676   u32 sw_if_index, fib_index;
677   u8 proto;
678   udp_header_t *udp;
679   tcp_header_t *tcp;
680   u16 *checksum, sport, dport;
681   ip_csum_t csum;
682
683   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
684     return -1;
685
686   inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
687
688   proto = inner_ip6->protocol;
689
690   if (proto == IP_PROTOCOL_ICMP6)
691     return -1;
692
693   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
694   fib_index =
695     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
696
697   saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
698   saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
699   daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
700   daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
701
702   udp = ip6_next_header (inner_ip6);
703   tcp = ip6_next_header (inner_ip6);
704
705   sport = udp->src_port;
706   dport = udp->dst_port;
707
708   if (proto == IP_PROTOCOL_UDP)
709     checksum = &udp->checksum;
710   else
711     checksum = &tcp->checksum;
712
713   csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
714   csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
715   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
716   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
717   csum = ip_csum_sub_even (csum, sport);
718   csum = ip_csum_sub_even (csum, dport);
719
720   ste =
721     nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto,
722                             fib_index, 1);
723   if (!ste)
724     return -1;
725
726   bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
727   if (!bibe)
728     return -1;
729
730   dport = udp->dst_port = bibe->out_port;
731   nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
732
733   memset (&saddr, 0, sizeof (saddr));
734   memset (&daddr, 0, sizeof (daddr));
735   saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
736   daddr.ip4.as_u32 = bibe->out_addr.as_u32;
737
738   ste =
739     nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto, 0,
740                             0);
741   if (!ste)
742     return -1;
743
744   bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
745   if (!bibe)
746     return -1;
747
748   inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
749   inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
750   udp->src_port = bibe->in_port;
751
752   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
753   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
754   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
755   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
756   csum = ip_csum_add_even (csum, udp->src_port);
757   csum = ip_csum_add_even (csum, udp->dst_port);
758   *checksum = ip_csum_fold (csum);
759
760   if (!vec_len (nm->addr_pool))
761     return -1;
762
763   nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
764   ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
765   ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
766
767   icmp->checksum = 0;
768   csum = ip_csum_with_carry (0, ip6->payload_length);
769   csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
770   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
771   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
772   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
773   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
774   csum =
775     ip_incremental_checksum (csum, icmp,
776                              clib_net_to_host_u16 (ip6->payload_length));
777   icmp->checksum = ~ip_csum_fold (csum);
778
779   return 0;
780 }
781
782 static int
783 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
784                                     ip6_header_t * ip6)
785 {
786   nat64_main_t *nm = &nat64_main;
787   nat64_db_bib_entry_t *bibe;
788   nat64_db_st_entry_t *ste;
789   ip46_address_t saddr, daddr, addr;
790   u32 sw_if_index, fib_index;
791   u8 proto = ip6->protocol;
792   int i;
793
794   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
795   fib_index =
796     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
797
798   saddr.as_u64[0] = ip6->src_address.as_u64[0];
799   saddr.as_u64[1] = ip6->src_address.as_u64[1];
800   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
801   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
802
803   ste =
804     nat64_db_st_entry_find (&nm->db, &saddr, &daddr, 0, 0, proto, fib_index,
805                             1);
806
807   if (ste)
808     {
809       bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
810       if (!bibe)
811         return -1;
812     }
813   else
814     {
815       bibe =
816         nat64_db_bib_entry_find (&nm->db, &saddr, 0, proto, fib_index, 1);
817
818       if (!bibe)
819         {
820           /* Choose same out address as for TCP/UDP session to same dst */
821           unk_proto_st_walk_ctx_t ctx = {
822             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
823             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
824             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
825             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
826             .out_addr.as_u32 = 0,
827             .fib_index = fib_index,
828             .proto = proto,
829           };
830
831           nat64_db_st_walk (&nm->db, IP_PROTOCOL_TCP, unk_proto_st_walk,
832                             &ctx);
833
834           if (!ctx.out_addr.as_u32)
835             nat64_db_st_walk (&nm->db, IP_PROTOCOL_UDP, unk_proto_st_walk,
836                               &ctx);
837
838           /* Verify if out address is not already in use for protocol */
839           memset (&addr, 0, sizeof (addr));
840           addr.ip4.as_u32 = ctx.out_addr.as_u32;
841           if (nat64_db_bib_entry_find (&nm->db, &addr, 0, proto, 0, 0))
842             ctx.out_addr.as_u32 = 0;
843
844           if (!ctx.out_addr.as_u32)
845             {
846               for (i = 0; i < vec_len (nm->addr_pool); i++)
847                 {
848                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
849                   if (!nat64_db_bib_entry_find
850                       (&nm->db, &addr, 0, proto, 0, 0))
851                     break;
852                 }
853             }
854
855           if (!ctx.out_addr.as_u32)
856             return -1;
857
858           bibe =
859             nat64_db_bib_entry_create (&nm->db, &ip6->src_address,
860                                        &ctx.out_addr, 0, 0, fib_index, proto,
861                                        0);
862           if (!bibe)
863             return -1;
864         }
865
866       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
867       ste =
868         nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address,
869                                   &daddr.ip4, 0);
870       if (!ste)
871         return -1;
872     }
873
874   nat64_session_reset_timeout (ste, vm);
875
876   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
877
878   memset (&saddr, 0, sizeof (saddr));
879   memset (&daddr, 0, sizeof (daddr));
880   saddr.ip4.as_u32 = bibe->out_addr.as_u32;
881   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
882
883   ste = nat64_db_st_entry_find (&nm->db, &daddr, &saddr, 0, 0, proto, 0, 0);
884
885   if (ste)
886     {
887       bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index);
888       if (!bibe)
889         return -1;
890     }
891   else
892     {
893       bibe = nat64_db_bib_entry_find (&nm->db, &daddr, 0, proto, 0, 0);
894
895       if (!bibe)
896         return -1;
897
898       ste =
899         nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address,
900                                   &saddr.ip4, 0);
901     }
902
903   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
904   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
905
906   return 0;
907 }
908
909 static inline uword
910 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
911                              vlib_frame_t * frame, u8 is_slow_path)
912 {
913   u32 n_left_from, *from, *to_next;
914   nat64_in2out_next_t next_index;
915   u32 pkts_processed = 0;
916   u32 stats_node_index;
917
918   stats_node_index =
919     is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index;
920
921   from = vlib_frame_vector_args (frame);
922   n_left_from = frame->n_vectors;
923   next_index = node->cached_next_index;
924
925   while (n_left_from > 0)
926     {
927       u32 n_left_to_next;
928
929       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
930
931       while (n_left_from > 0 && n_left_to_next > 0)
932         {
933           u32 bi0;
934           vlib_buffer_t *b0;
935           u32 next0;
936           ip6_header_t *ip60;
937           u16 l4_offset0, frag_offset0;
938           u8 l4_protocol0;
939           u32 proto0;
940           nat64_in2out_set_ctx_t ctx0;
941
942           /* speculatively enqueue b0 to the current next frame */
943           bi0 = from[0];
944           to_next[0] = bi0;
945           from += 1;
946           to_next += 1;
947           n_left_from -= 1;
948           n_left_to_next -= 1;
949
950           b0 = vlib_get_buffer (vm, bi0);
951           ip60 = vlib_buffer_get_current (b0);
952
953           ctx0.b = b0;
954           ctx0.vm = vm;
955
956           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
957
958           if (PREDICT_FALSE
959               (ip6_parse
960                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
961                 &frag_offset0)))
962             {
963               next0 = NAT64_IN2OUT_NEXT_DROP;
964               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
965               goto trace0;
966             }
967
968           proto0 = ip_proto_to_snat_proto (l4_protocol0);
969
970           if (is_slow_path)
971             {
972               if (PREDICT_TRUE (proto0 == ~0))
973                 {
974                   if (is_hairpinning (&ip60->dst_address))
975                     {
976                       next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
977                       if (nat64_in2out_unk_proto_hairpinning (vm, b0, ip60))
978                         {
979                           next0 = NAT64_IN2OUT_NEXT_DROP;
980                           b0->error =
981                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
982                         }
983                       goto trace0;
984                     }
985
986                   if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0))
987                     {
988                       next0 = NAT64_IN2OUT_NEXT_DROP;
989                       b0->error =
990                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
991                       goto trace0;
992                     }
993                 }
994               goto trace0;
995             }
996           else
997             {
998               if (PREDICT_FALSE (proto0 == ~0))
999                 {
1000                   next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
1001                   goto trace0;
1002                 }
1003             }
1004
1005           if (PREDICT_FALSE
1006               (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
1007             {
1008               next0 = NAT64_IN2OUT_NEXT_REASS;
1009               goto trace0;
1010             }
1011
1012           if (proto0 == SNAT_PROTOCOL_ICMP)
1013             {
1014               if (is_hairpinning (&ip60->dst_address))
1015                 {
1016                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1017                   if (nat64_in2out_icmp_hairpinning (vm, b0, ip60))
1018                     {
1019                       next0 = NAT64_IN2OUT_NEXT_DROP;
1020                       b0->error =
1021                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1022                     }
1023                   goto trace0;
1024                 }
1025
1026               if (icmp6_to_icmp
1027                   (b0, nat64_in2out_icmp_set_cb, &ctx0,
1028                    nat64_in2out_inner_icmp_set_cb, &ctx0))
1029                 {
1030                   next0 = NAT64_IN2OUT_NEXT_DROP;
1031                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1032                   goto trace0;
1033                 }
1034             }
1035           else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
1036             {
1037               if (is_hairpinning (&ip60->dst_address))
1038                 {
1039                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1040                   if (nat64_in2out_tcp_udp_hairpinning (vm, b0, ip60))
1041                     {
1042                       next0 = NAT64_IN2OUT_NEXT_DROP;
1043                       b0->error =
1044                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1045                     }
1046                   goto trace0;
1047                 }
1048
1049               if (ip6_to_ip4_tcp_udp
1050                   (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0))
1051                 {
1052                   next0 = NAT64_IN2OUT_NEXT_DROP;
1053                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1054                   goto trace0;
1055                 }
1056             }
1057
1058         trace0:
1059           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1060                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1061             {
1062               nat64_in2out_trace_t *t =
1063                 vlib_add_trace (vm, node, b0, sizeof (*t));
1064               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1065               t->next_index = next0;
1066               t->is_slow_path = is_slow_path;
1067             }
1068
1069           pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1070
1071           /* verify speculative enqueue, maybe switch current next frame */
1072           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1073                                            n_left_to_next, bi0, next0);
1074         }
1075       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1076     }
1077   vlib_node_increment_counter (vm, stats_node_index,
1078                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1079                                pkts_processed);
1080   return frame->n_vectors;
1081 }
1082
1083 static uword
1084 nat64_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1085                       vlib_frame_t * frame)
1086 {
1087   return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1088 }
1089
1090 /* *INDENT-OFF* */
1091 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1092   .function = nat64_in2out_node_fn,
1093   .name = "nat64-in2out",
1094   .vector_size = sizeof (u32),
1095   .format_trace = format_nat64_in2out_trace,
1096   .type = VLIB_NODE_TYPE_INTERNAL,
1097   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1098   .error_strings = nat64_in2out_error_strings,
1099   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1100   /* edit / add dispositions here */
1101   .next_nodes = {
1102     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1103     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1104     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1105     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1106     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1107   },
1108 };
1109 /* *INDENT-ON* */
1110
1111 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_node, nat64_in2out_node_fn);
1112
1113 static uword
1114 nat64_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1115                                vlib_frame_t * frame)
1116 {
1117   return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1118 }
1119
1120 /* *INDENT-OFF* */
1121 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1122   .function = nat64_in2out_slowpath_node_fn,
1123   .name = "nat64-in2out-slowpath",
1124   .vector_size = sizeof (u32),
1125   .format_trace = format_nat64_in2out_trace,
1126   .type = VLIB_NODE_TYPE_INTERNAL,
1127   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1128   .error_strings = nat64_in2out_error_strings,
1129   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1130   /* edit / add dispositions here */
1131   .next_nodes = {
1132     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1133     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1134     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1135     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1136     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1137   },
1138 };
1139 /* *INDENT-ON* */
1140
1141 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
1142                               nat64_in2out_slowpath_node_fn);
1143
1144 typedef struct nat64_in2out_frag_set_ctx_t_
1145 {
1146   vlib_main_t *vm;
1147   u32 sess_index;
1148   u16 l4_offset;
1149   u8 proto;
1150   u8 first_frag;
1151 } nat64_in2out_frag_set_ctx_t;
1152
1153 static int
1154 nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
1155 {
1156   nat64_main_t *nm = &nat64_main;
1157   nat64_in2out_frag_set_ctx_t *ctx = arg;
1158   nat64_db_st_entry_t *ste;
1159   nat64_db_bib_entry_t *bibe;
1160   udp_header_t *udp;
1161
1162   ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index);
1163   if (!ste)
1164     return -1;
1165
1166   bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
1167   if (!bibe)
1168     return -1;
1169
1170   nat64_session_reset_timeout (ste, ctx->vm);
1171
1172   if (ctx->first_frag)
1173     {
1174       udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1175
1176       if (ctx->proto == IP_PROTOCOL_TCP)
1177         {
1178           u16 *checksum;
1179           ip_csum_t csum;
1180           tcp_header_t *tcp = (tcp_header_t *) udp;
1181
1182           checksum = &tcp->checksum;
1183           csum = ip_csum_sub_even (*checksum, tcp->src_port);
1184           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
1185           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1186           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1187           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1188           csum = ip_csum_add_even (csum, bibe->out_port);
1189           csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
1190           csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
1191           *checksum = ip_csum_fold (csum);
1192         }
1193
1194       udp->src_port = bibe->out_port;
1195     }
1196
1197   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
1198   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
1199
1200   return 0;
1201 }
1202
1203 static int
1204 nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
1205                                nat64_in2out_frag_set_ctx_t * ctx)
1206 {
1207   nat64_main_t *nm = &nat64_main;
1208   nat64_db_st_entry_t *ste;
1209   nat64_db_bib_entry_t *bibe;
1210   udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1211   tcp_header_t *tcp = (tcp_header_t *) udp;
1212   u16 sport = udp->src_port;
1213   u16 dport = udp->dst_port;
1214   u16 *checksum;
1215   ip_csum_t csum;
1216   ip46_address_t saddr, daddr;
1217
1218   if (ctx->first_frag)
1219     {
1220       if (ctx->proto == IP_PROTOCOL_UDP)
1221         checksum = &udp->checksum;
1222       else
1223         checksum = &tcp->checksum;
1224
1225       csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
1226       csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1227       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1228       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1229       csum = ip_csum_sub_even (csum, sport);
1230       csum = ip_csum_sub_even (csum, dport);
1231     }
1232
1233   ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index);
1234   if (!ste)
1235     return -1;
1236
1237   bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
1238   if (!bibe)
1239     return -1;
1240
1241   nat64_session_reset_timeout (ste, ctx->vm);
1242
1243   sport = bibe->out_port;
1244   dport = ste->r_port;
1245
1246   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
1247
1248   memset (&saddr, 0, sizeof (saddr));
1249   memset (&daddr, 0, sizeof (daddr));
1250   saddr.ip4.as_u32 = bibe->out_addr.as_u32;
1251   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1252
1253   ste =
1254     nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, ctx->proto,
1255                             0, 0);
1256
1257   if (ste)
1258     {
1259       bibe =
1260         nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index);
1261       if (!bibe)
1262         return -1;
1263     }
1264   else
1265     {
1266       bibe =
1267         nat64_db_bib_entry_find (&nm->db, &daddr, dport, ctx->proto, 0, 0);
1268
1269       if (!bibe)
1270         return -1;
1271
1272       ste =
1273         nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address,
1274                                   &saddr.ip4, sport);
1275     }
1276
1277   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1278   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1279
1280   if (ctx->first_frag)
1281     {
1282       udp->dst_port = bibe->in_port;
1283       udp->src_port = sport;
1284       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
1285       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
1286       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
1287       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
1288       csum = ip_csum_add_even (csum, udp->src_port);
1289       csum = ip_csum_add_even (csum, udp->dst_port);
1290       *checksum = ip_csum_fold (csum);
1291     }
1292
1293   return 0;
1294 }
1295
1296 static uword
1297 nat64_in2out_reass_node_fn (vlib_main_t * vm,
1298                             vlib_node_runtime_t * node, vlib_frame_t * frame)
1299 {
1300   u32 n_left_from, *from, *to_next;
1301   nat64_in2out_next_t next_index;
1302   u32 pkts_processed = 0;
1303   u32 *fragments_to_drop = 0;
1304   u32 *fragments_to_loopback = 0;
1305   nat64_main_t *nm = &nat64_main;
1306
1307   from = vlib_frame_vector_args (frame);
1308   n_left_from = frame->n_vectors;
1309   next_index = node->cached_next_index;
1310
1311   while (n_left_from > 0)
1312     {
1313       u32 n_left_to_next;
1314
1315       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1316
1317       while (n_left_from > 0 && n_left_to_next > 0)
1318         {
1319           u32 bi0;
1320           vlib_buffer_t *b0;
1321           u32 next0;
1322           u8 cached0 = 0;
1323           ip6_header_t *ip60;
1324           u16 l4_offset0, frag_offset0;
1325           u8 l4_protocol0;
1326           nat_reass_ip6_t *reass0;
1327           ip6_frag_hdr_t *frag0;
1328           nat64_db_bib_entry_t *bibe0;
1329           nat64_db_st_entry_t *ste0;
1330           udp_header_t *udp0;
1331           snat_protocol_t proto0;
1332           u32 sw_if_index0, fib_index0;
1333           ip46_address_t saddr0, daddr0;
1334           nat64_in2out_frag_set_ctx_t ctx0;
1335
1336           /* speculatively enqueue b0 to the current next frame */
1337           bi0 = from[0];
1338           to_next[0] = bi0;
1339           from += 1;
1340           to_next += 1;
1341           n_left_from -= 1;
1342           n_left_to_next -= 1;
1343
1344           b0 = vlib_get_buffer (vm, bi0);
1345           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1346
1347           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1348           fib_index0 =
1349             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
1350                                                  sw_if_index0);
1351
1352           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
1353             {
1354               next0 = NAT64_IN2OUT_NEXT_DROP;
1355               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1356               goto trace0;
1357             }
1358
1359           ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
1360
1361           if (PREDICT_FALSE
1362               (ip6_parse
1363                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1364                 &frag_offset0)))
1365             {
1366               next0 = NAT64_IN2OUT_NEXT_DROP;
1367               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1368               goto trace0;
1369             }
1370
1371           if (PREDICT_FALSE
1372               (!(l4_protocol0 == IP_PROTOCOL_TCP
1373                  || l4_protocol0 == IP_PROTOCOL_UDP)))
1374             {
1375               next0 = NAT64_IN2OUT_NEXT_DROP;
1376               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1377               goto trace0;
1378             }
1379
1380           udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
1381           frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
1382           proto0 = ip_proto_to_snat_proto (l4_protocol0);
1383
1384           reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
1385                                                  ip60->dst_address,
1386                                                  frag0->identification,
1387                                                  l4_protocol0,
1388                                                  1, &fragments_to_drop);
1389
1390           if (PREDICT_FALSE (!reass0))
1391             {
1392               next0 = NAT64_IN2OUT_NEXT_DROP;
1393               b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
1394               goto trace0;
1395             }
1396
1397           if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
1398             {
1399               ctx0.first_frag = 0;
1400               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
1401                 {
1402                   if (nat_ip6_reass_add_fragment (reass0, bi0))
1403                     {
1404                       b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
1405                       next0 = NAT64_IN2OUT_NEXT_DROP;
1406                       goto trace0;
1407                     }
1408                   cached0 = 1;
1409                   goto trace0;
1410                 }
1411             }
1412           else
1413             {
1414               ctx0.first_frag = 1;
1415
1416               saddr0.as_u64[0] = ip60->src_address.as_u64[0];
1417               saddr0.as_u64[1] = ip60->src_address.as_u64[1];
1418               daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
1419               daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
1420
1421               ste0 =
1422                 nat64_db_st_entry_find (&nm->db, &saddr0, &daddr0,
1423                                         udp0->src_port, udp0->dst_port,
1424                                         l4_protocol0, fib_index0, 1);
1425               if (!ste0)
1426                 {
1427                   bibe0 =
1428                     nat64_db_bib_entry_find (&nm->db, &saddr0, udp0->src_port,
1429                                              l4_protocol0, fib_index0, 1);
1430                   if (!bibe0)
1431                     {
1432                       u16 out_port0;
1433                       ip4_address_t out_addr0;
1434                       if (nat64_alloc_out_addr_and_port
1435                           (fib_index0, proto0, &out_addr0, &out_port0))
1436                         {
1437                           next0 = NAT64_IN2OUT_NEXT_DROP;
1438                           b0->error =
1439                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1440                           goto trace0;
1441                         }
1442
1443                       bibe0 =
1444                         nat64_db_bib_entry_create (&nm->db,
1445                                                    &ip60->src_address,
1446                                                    &out_addr0, udp0->src_port,
1447                                                    clib_host_to_net_u16
1448                                                    (out_port0), fib_index0,
1449                                                    l4_protocol0, 0);
1450                       if (!bibe0)
1451                         {
1452                           next0 = NAT64_IN2OUT_NEXT_DROP;
1453                           b0->error =
1454                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1455                           goto trace0;
1456                         }
1457                     }
1458                   nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
1459                                      fib_index0);
1460                   ste0 =
1461                     nat64_db_st_entry_create (&nm->db, bibe0,
1462                                               &ip60->dst_address, &daddr0.ip4,
1463                                               udp0->dst_port);
1464                   if (!ste0)
1465                     {
1466                       next0 = NAT64_IN2OUT_NEXT_DROP;
1467                       b0->error =
1468                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1469                       goto trace0;
1470                     }
1471                 }
1472               reass0->sess_index =
1473                 nat64_db_st_entry_get_index (&nm->db, ste0);
1474
1475               nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
1476             }
1477
1478           ctx0.sess_index = reass0->sess_index;
1479           ctx0.proto = l4_protocol0;
1480           ctx0.vm = vm;
1481           ctx0.l4_offset = l4_offset0;
1482
1483           if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
1484             {
1485               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1486               if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
1487                 {
1488                   next0 = NAT64_IN2OUT_NEXT_DROP;
1489                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1490                 }
1491               goto trace0;
1492             }
1493           else
1494             {
1495               if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
1496                 {
1497                   next0 = NAT64_IN2OUT_NEXT_DROP;
1498                   b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1499                   goto trace0;
1500                 }
1501             }
1502
1503         trace0:
1504           if (PREDICT_FALSE
1505               ((node->flags & VLIB_NODE_FLAG_TRACE)
1506                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1507             {
1508               nat64_in2out_reass_trace_t *t =
1509                 vlib_add_trace (vm, node, b0, sizeof (*t));
1510               t->cached = cached0;
1511               t->sw_if_index = sw_if_index0;
1512               t->next_index = next0;
1513             }
1514
1515           if (cached0)
1516             {
1517               n_left_to_next++;
1518               to_next--;
1519             }
1520           else
1521             {
1522               pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1523
1524               /* verify speculative enqueue, maybe switch current next frame */
1525               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1526                                                to_next, n_left_to_next,
1527                                                bi0, next0);
1528             }
1529
1530           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1531             {
1532               from = vlib_frame_vector_args (frame);
1533               u32 len = vec_len (fragments_to_loopback);
1534               if (len <= VLIB_FRAME_SIZE)
1535                 {
1536                   clib_memcpy (from, fragments_to_loopback,
1537                                sizeof (u32) * len);
1538                   n_left_from = len;
1539                   vec_reset_length (fragments_to_loopback);
1540                 }
1541               else
1542                 {
1543                   clib_memcpy (from,
1544                                fragments_to_loopback + (len -
1545                                                         VLIB_FRAME_SIZE),
1546                                sizeof (u32) * VLIB_FRAME_SIZE);
1547                   n_left_from = VLIB_FRAME_SIZE;
1548                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1549                 }
1550             }
1551         }
1552
1553       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1554     }
1555
1556   vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
1557                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1558                                pkts_processed);
1559
1560   nat_send_all_to_node (vm, fragments_to_drop, node,
1561                         &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
1562                         NAT64_IN2OUT_NEXT_DROP);
1563
1564   vec_free (fragments_to_drop);
1565   vec_free (fragments_to_loopback);
1566   return frame->n_vectors;
1567 }
1568
1569 /* *INDENT-OFF* */
1570 VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
1571   .function = nat64_in2out_reass_node_fn,
1572   .name = "nat64-in2out-reass",
1573   .vector_size = sizeof (u32),
1574   .format_trace = format_nat64_in2out_reass_trace,
1575   .type = VLIB_NODE_TYPE_INTERNAL,
1576   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1577   .error_strings = nat64_in2out_error_strings,
1578   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1579   /* edit / add dispositions here */
1580   .next_nodes = {
1581     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1582     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1583     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1584     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1585     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1586   },
1587 };
1588 /* *INDENT-ON* */
1589
1590 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node,
1591                               nat64_in2out_reass_node_fn);
1592
1593 /*
1594  * fd.io coding-style-patch-verification: ON
1595  *
1596  * Local Variables:
1597  * eval: (c-set-style "gnu")
1598  * End:
1599  */