NAT64: multi-thread support (VPP-891)
[vpp.git] / src / plugins / nat / nat64_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <vnet/ip/ip6_to_ip4.h>
23 #include <vnet/fib/fib_table.h>
24
25 typedef struct
26 {
27   u32 sw_if_index;
28   u32 next_index;
29   u8 is_slow_path;
30 } nat64_in2out_trace_t;
31
32 static u8 *
33 format_nat64_in2out_trace (u8 * s, va_list * args)
34 {
35   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
36   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
37   nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
38   char *tag;
39
40   tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
41
42   s =
43     format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
44             t->next_index);
45
46   return s;
47 }
48
49 typedef struct
50 {
51   u32 sw_if_index;
52   u32 next_index;
53   u8 cached;
54 } nat64_in2out_reass_trace_t;
55
56 static u8 *
57 format_nat64_in2out_reass_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   nat64_in2out_reass_trace_t *t =
62     va_arg (*args, nat64_in2out_reass_trace_t *);
63
64   s =
65     format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
66             t->sw_if_index, t->next_index,
67             t->cached ? "cached" : "translated");
68
69   return s;
70 }
71
72 vlib_node_registration_t nat64_in2out_node;
73 vlib_node_registration_t nat64_in2out_slowpath_node;
74 vlib_node_registration_t nat64_in2out_reass_node;
75 vlib_node_registration_t nat64_in2out_handoff_node;
76
77 #define foreach_nat64_in2out_error                       \
78 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
79 _(IN2OUT_PACKETS, "good in2out packets processed")       \
80 _(NO_TRANSLATION, "no translation")                      \
81 _(UNKNOWN, "unknown")                                    \
82 _(DROP_FRAGMENT, "Drop fragment")                        \
83 _(MAX_REASS, "Maximum reassemblies exceeded")            \
84 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
85
86
87 typedef enum
88 {
89 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
90   foreach_nat64_in2out_error
91 #undef _
92     NAT64_IN2OUT_N_ERROR,
93 } nat64_in2out_error_t;
94
95 static char *nat64_in2out_error_strings[] = {
96 #define _(sym,string) string,
97   foreach_nat64_in2out_error
98 #undef _
99 };
100
101 typedef enum
102 {
103   NAT64_IN2OUT_NEXT_IP4_LOOKUP,
104   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
105   NAT64_IN2OUT_NEXT_DROP,
106   NAT64_IN2OUT_NEXT_SLOWPATH,
107   NAT64_IN2OUT_NEXT_REASS,
108   NAT64_IN2OUT_N_NEXT,
109 } nat64_in2out_next_t;
110
111 typedef struct nat64_in2out_set_ctx_t_
112 {
113   vlib_buffer_t *b;
114   vlib_main_t *vm;
115   u32 thread_index;
116 } nat64_in2out_set_ctx_t;
117
118 /**
119  * @brief Check whether is a hairpinning.
120  *
121  * If the destination IP address of the packet is an IPv4 address assigned to
122  * the NAT64 itself, then the packet is a hairpin packet.
123  *
124  * param dst_addr Destination address of the packet.
125  *
126  * @returns 1 if hairpinning, otherwise 0.
127  */
128 static_always_inline int
129 is_hairpinning (ip6_address_t * dst_addr)
130 {
131   nat64_main_t *nm = &nat64_main;
132   int i;
133
134   for (i = 0; i < vec_len (nm->addr_pool); i++)
135     {
136       if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
137         return 1;
138     }
139
140   return 0;
141 }
142
143 static int
144 nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
145                              void *arg)
146 {
147   nat64_main_t *nm = &nat64_main;
148   nat64_in2out_set_ctx_t *ctx = arg;
149   nat64_db_bib_entry_t *bibe;
150   nat64_db_st_entry_t *ste;
151   ip46_address_t saddr, daddr;
152   u32 sw_if_index, fib_index;
153   udp_header_t *udp = ip6_next_header (ip6);
154   u8 proto = ip6->protocol;
155   u16 sport = udp->src_port;
156   u16 dport = udp->dst_port;
157   nat64_db_t *db = &nm->db[ctx->thread_index];
158
159   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
160   fib_index =
161     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
162
163   saddr.as_u64[0] = ip6->src_address.as_u64[0];
164   saddr.as_u64[1] = ip6->src_address.as_u64[1];
165   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
166   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
167
168   ste =
169     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
170                             fib_index, 1);
171
172   if (ste)
173     {
174       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
175       if (!bibe)
176         return -1;
177     }
178   else
179     {
180       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
181
182       if (!bibe)
183         {
184           u16 out_port;
185           ip4_address_t out_addr;
186           if (nat64_alloc_out_addr_and_port
187               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
188                &out_port, ctx->thread_index))
189             return -1;
190
191           bibe =
192             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
193                                        sport, clib_host_to_net_u16 (out_port),
194                                        fib_index, proto, 0);
195           if (!bibe)
196             return -1;
197         }
198
199       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
200       ste =
201         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
202                                   &daddr.ip4, dport);
203       if (!ste)
204         return -1;
205     }
206
207   nat64_session_reset_timeout (ste, ctx->vm);
208
209   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
210   udp->src_port = bibe->out_port;
211
212   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
213
214   if (proto == IP_PROTOCOL_TCP)
215     {
216       u16 *checksum;
217       ip_csum_t csum;
218       tcp_header_t *tcp = ip6_next_header (ip6);
219
220       checksum = &tcp->checksum;
221       csum = ip_csum_sub_even (*checksum, sport);
222       csum = ip_csum_add_even (csum, udp->src_port);
223       *checksum = ip_csum_fold (csum);
224     }
225
226   return 0;
227 }
228
229 static int
230 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
231 {
232   nat64_main_t *nm = &nat64_main;
233   nat64_in2out_set_ctx_t *ctx = arg;
234   nat64_db_bib_entry_t *bibe;
235   nat64_db_st_entry_t *ste;
236   ip46_address_t saddr, daddr;
237   u32 sw_if_index, fib_index;
238   icmp46_header_t *icmp = ip6_next_header (ip6);
239   nat64_db_t *db = &nm->db[ctx->thread_index];
240
241   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
242   fib_index =
243     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
244
245   saddr.as_u64[0] = ip6->src_address.as_u64[0];
246   saddr.as_u64[1] = ip6->src_address.as_u64[1];
247   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
248   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
249
250   if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
251     {
252       u16 in_id = ((u16 *) (icmp))[2];
253       ste =
254         nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
255                                 IP_PROTOCOL_ICMP, fib_index, 1);
256
257       if (ste)
258         {
259           bibe =
260             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
261                                          ste->bibe_index);
262           if (!bibe)
263             return -1;
264         }
265       else
266         {
267           bibe =
268             nat64_db_bib_entry_find (db, &saddr, in_id,
269                                      IP_PROTOCOL_ICMP, fib_index, 1);
270
271           if (!bibe)
272             {
273               u16 out_id;
274               ip4_address_t out_addr;
275               if (nat64_alloc_out_addr_and_port
276                   (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id,
277                    ctx->thread_index))
278                 return -1;
279
280               bibe =
281                 nat64_db_bib_entry_create (db, &ip6->src_address,
282                                            &out_addr, in_id,
283                                            clib_host_to_net_u16 (out_id),
284                                            fib_index, IP_PROTOCOL_ICMP, 0);
285               if (!bibe)
286                 return -1;
287             }
288
289           nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
290           ste =
291             nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
292                                       &daddr.ip4, 0);
293           if (!ste)
294             return -1;
295         }
296
297       nat64_session_reset_timeout (ste, ctx->vm);
298
299       ip4->src_address.as_u32 = bibe->out_addr.as_u32;
300       ((u16 *) (icmp))[2] = bibe->out_port;
301
302       ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
303     }
304   else
305     {
306       if (!vec_len (nm->addr_pool))
307         return -1;
308
309       ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
310       nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
311     }
312
313   return 0;
314 }
315
316 static int
317 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
318                                 void *arg)
319 {
320   nat64_main_t *nm = &nat64_main;
321   nat64_in2out_set_ctx_t *ctx = arg;
322   nat64_db_st_entry_t *ste;
323   nat64_db_bib_entry_t *bibe;
324   ip46_address_t saddr, daddr;
325   u32 sw_if_index, fib_index;
326   u8 proto = ip6->protocol;
327   nat64_db_t *db = &nm->db[ctx->thread_index];
328
329   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
330   fib_index =
331     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
332
333   saddr.as_u64[0] = ip6->src_address.as_u64[0];
334   saddr.as_u64[1] = ip6->src_address.as_u64[1];
335   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
336   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
337
338   if (proto == IP_PROTOCOL_ICMP6)
339     {
340       icmp46_header_t *icmp = ip6_next_header (ip6);
341       u16 in_id = ((u16 *) (icmp))[2];
342       proto = IP_PROTOCOL_ICMP;
343
344       if (!
345           (icmp->type == ICMP4_echo_request
346            || icmp->type == ICMP4_echo_reply))
347         return -1;
348
349       ste =
350         nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
351                                 fib_index, 1);
352       if (!ste)
353         return -1;
354
355       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
356       if (!bibe)
357         return -1;
358
359       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
360       ((u16 *) (icmp))[2] = bibe->out_port;
361       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
362     }
363   else
364     {
365       udp_header_t *udp = ip6_next_header (ip6);
366       tcp_header_t *tcp = ip6_next_header (ip6);
367       u16 *checksum;
368       ip_csum_t csum;
369
370       u16 sport = udp->src_port;
371       u16 dport = udp->dst_port;
372
373       ste =
374         nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
375                                 fib_index, 1);
376       if (!ste)
377         return -1;
378
379       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
380       if (!bibe)
381         return -1;
382
383       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
384       udp->dst_port = bibe->out_port;
385       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
386
387       if (proto == IP_PROTOCOL_TCP)
388         checksum = &tcp->checksum;
389       else
390         checksum = &udp->checksum;
391       csum = ip_csum_sub_even (*checksum, dport);
392       csum = ip_csum_add_even (csum, udp->dst_port);
393       *checksum = ip_csum_fold (csum);
394     }
395
396   return 0;
397 }
398
399 typedef struct unk_proto_st_walk_ctx_t_
400 {
401   ip6_address_t src_addr;
402   ip6_address_t dst_addr;
403   ip4_address_t out_addr;
404   u32 fib_index;
405   u32 thread_index;
406   u8 proto;
407 } unk_proto_st_walk_ctx_t;
408
409 static int
410 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
411 {
412   nat64_main_t *nm = &nat64_main;
413   unk_proto_st_walk_ctx_t *ctx = arg;
414   nat64_db_bib_entry_t *bibe;
415   ip46_address_t saddr, daddr;
416   nat64_db_t *db = &nm->db[ctx->thread_index];
417
418   if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
419     {
420       bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
421       if (!bibe)
422         return -1;
423
424       if (ip46_address_is_equal (&bibe->in_addr, &ctx->src_addr)
425           && bibe->fib_index == ctx->fib_index)
426         {
427           memset (&saddr, 0, sizeof (saddr));
428           saddr.ip4.as_u32 = bibe->out_addr.as_u32;
429           memset (&daddr, 0, sizeof (daddr));
430           nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
431
432           if (nat64_db_st_entry_find
433               (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
434             return -1;
435
436           ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
437           return 1;
438         }
439     }
440
441   return 0;
442 }
443
444 static int
445 nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
446                                void *arg)
447 {
448   nat64_main_t *nm = &nat64_main;
449   nat64_in2out_set_ctx_t *s_ctx = arg;
450   nat64_db_bib_entry_t *bibe;
451   nat64_db_st_entry_t *ste;
452   ip46_address_t saddr, daddr, addr;
453   u32 sw_if_index, fib_index;
454   u8 proto = ip6->protocol;
455   int i;
456   nat64_db_t *db = &nm->db[s_ctx->thread_index];
457
458   sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
459   fib_index =
460     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
461
462   saddr.as_u64[0] = ip6->src_address.as_u64[0];
463   saddr.as_u64[1] = ip6->src_address.as_u64[1];
464   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
465   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
466
467   ste =
468     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
469
470   if (ste)
471     {
472       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
473       if (!bibe)
474         return -1;
475     }
476   else
477     {
478       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
479
480       if (!bibe)
481         {
482           /* Choose same out address as for TCP/UDP session to same dst */
483           unk_proto_st_walk_ctx_t ctx = {
484             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
485             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
486             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
487             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
488             .out_addr.as_u32 = 0,
489             .fib_index = fib_index,
490             .proto = proto,
491             .thread_index = s_ctx->thread_index,
492           };
493
494           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
495
496           if (!ctx.out_addr.as_u32)
497             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
498
499           /* Verify if out address is not already in use for protocol */
500           memset (&addr, 0, sizeof (addr));
501           addr.ip4.as_u32 = ctx.out_addr.as_u32;
502           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
503             ctx.out_addr.as_u32 = 0;
504
505           if (!ctx.out_addr.as_u32)
506             {
507               for (i = 0; i < vec_len (nm->addr_pool); i++)
508                 {
509                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
510                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
511                     break;
512                 }
513             }
514
515           if (!ctx.out_addr.as_u32)
516             return -1;
517
518           bibe =
519             nat64_db_bib_entry_create (db, &ip6->src_address,
520                                        &ctx.out_addr, 0, 0, fib_index, proto,
521                                        0);
522           if (!bibe)
523             return -1;
524         }
525
526       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
527       ste =
528         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
529       if (!ste)
530         return -1;
531     }
532
533   nat64_session_reset_timeout (ste, s_ctx->vm);
534
535   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
536   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
537
538   return 0;
539 }
540
541
542
543 static int
544 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
545                                   ip6_header_t * ip6, u32 thread_index)
546 {
547   nat64_main_t *nm = &nat64_main;
548   nat64_db_bib_entry_t *bibe;
549   nat64_db_st_entry_t *ste;
550   ip46_address_t saddr, daddr;
551   u32 sw_if_index, fib_index;
552   udp_header_t *udp = ip6_next_header (ip6);
553   tcp_header_t *tcp = ip6_next_header (ip6);
554   u8 proto = ip6->protocol;
555   u16 sport = udp->src_port;
556   u16 dport = udp->dst_port;
557   u16 *checksum;
558   ip_csum_t csum;
559   nat64_db_t *db = &nm->db[thread_index];
560
561   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
562   fib_index =
563     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
564
565   saddr.as_u64[0] = ip6->src_address.as_u64[0];
566   saddr.as_u64[1] = ip6->src_address.as_u64[1];
567   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
568   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
569
570   if (proto == IP_PROTOCOL_UDP)
571     checksum = &udp->checksum;
572   else
573     checksum = &tcp->checksum;
574
575   csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
576   csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
577   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
578   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
579   csum = ip_csum_sub_even (csum, sport);
580   csum = ip_csum_sub_even (csum, dport);
581
582   ste =
583     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
584                             fib_index, 1);
585
586   if (ste)
587     {
588       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
589       if (!bibe)
590         return -1;
591     }
592   else
593     {
594       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
595
596       if (!bibe)
597         {
598           u16 out_port;
599           ip4_address_t out_addr;
600           if (nat64_alloc_out_addr_and_port
601               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
602                &out_port, thread_index))
603             return -1;
604
605           bibe =
606             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
607                                        sport, clib_host_to_net_u16 (out_port),
608                                        fib_index, proto, 0);
609           if (!bibe)
610             return -1;
611         }
612
613       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
614       ste =
615         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
616                                   &daddr.ip4, dport);
617       if (!ste)
618         return -1;
619     }
620
621   nat64_session_reset_timeout (ste, vm);
622
623   sport = udp->src_port = bibe->out_port;
624   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
625
626   memset (&daddr, 0, sizeof (daddr));
627   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
628
629   bibe = 0;
630   /* *INDENT-OFF* */
631   vec_foreach (db, nm->db)
632     {
633       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
634
635       if (bibe)
636         break;
637     }
638   /* *INDENT-ON* */
639
640   if (!bibe)
641     return -1;
642
643   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
644   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
645   udp->dst_port = bibe->in_port;
646
647   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
648   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
649   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
650   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
651   csum = ip_csum_add_even (csum, udp->src_port);
652   csum = ip_csum_add_even (csum, udp->dst_port);
653   *checksum = ip_csum_fold (csum);
654
655   return 0;
656 }
657
658 static int
659 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
660                                ip6_header_t * ip6, u32 thread_index)
661 {
662   nat64_main_t *nm = &nat64_main;
663   nat64_db_bib_entry_t *bibe;
664   nat64_db_st_entry_t *ste;
665   icmp46_header_t *icmp = ip6_next_header (ip6);
666   ip6_header_t *inner_ip6;
667   ip46_address_t saddr, daddr;
668   u32 sw_if_index, fib_index;
669   u8 proto;
670   udp_header_t *udp;
671   tcp_header_t *tcp;
672   u16 *checksum, sport, dport;
673   ip_csum_t csum;
674   nat64_db_t *db = &nm->db[thread_index];
675
676   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
677     return -1;
678
679   inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
680
681   proto = inner_ip6->protocol;
682
683   if (proto == IP_PROTOCOL_ICMP6)
684     return -1;
685
686   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
687   fib_index =
688     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
689
690   saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
691   saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
692   daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
693   daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
694
695   udp = ip6_next_header (inner_ip6);
696   tcp = ip6_next_header (inner_ip6);
697
698   sport = udp->src_port;
699   dport = udp->dst_port;
700
701   if (proto == IP_PROTOCOL_UDP)
702     checksum = &udp->checksum;
703   else
704     checksum = &tcp->checksum;
705
706   csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
707   csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
708   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
709   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
710   csum = ip_csum_sub_even (csum, sport);
711   csum = ip_csum_sub_even (csum, dport);
712
713   ste =
714     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
715                             fib_index, 1);
716   if (!ste)
717     return -1;
718
719   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
720   if (!bibe)
721     return -1;
722
723   dport = udp->dst_port = bibe->out_port;
724   nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
725
726   memset (&saddr, 0, sizeof (saddr));
727   memset (&daddr, 0, sizeof (daddr));
728   saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
729   daddr.ip4.as_u32 = bibe->out_addr.as_u32;
730
731   ste = 0;
732   /* *INDENT-OFF* */
733   vec_foreach (db, nm->db)
734     {
735       ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
736                                     0, 0);
737
738       if (ste)
739         break;
740     }
741   /* *INDENT-ON* */
742
743   if (!ste)
744     return -1;
745
746   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
747   if (!bibe)
748     return -1;
749
750   inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
751   inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
752   udp->src_port = bibe->in_port;
753
754   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
755   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
756   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
757   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
758   csum = ip_csum_add_even (csum, udp->src_port);
759   csum = ip_csum_add_even (csum, udp->dst_port);
760   *checksum = ip_csum_fold (csum);
761
762   if (!vec_len (nm->addr_pool))
763     return -1;
764
765   nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
766   ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
767   ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
768
769   icmp->checksum = 0;
770   csum = ip_csum_with_carry (0, ip6->payload_length);
771   csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
772   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
773   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
774   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
775   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
776   csum =
777     ip_incremental_checksum (csum, icmp,
778                              clib_net_to_host_u16 (ip6->payload_length));
779   icmp->checksum = ~ip_csum_fold (csum);
780
781   return 0;
782 }
783
784 static int
785 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
786                                     ip6_header_t * ip6, u32 thread_index)
787 {
788   nat64_main_t *nm = &nat64_main;
789   nat64_db_bib_entry_t *bibe;
790   nat64_db_st_entry_t *ste;
791   ip46_address_t saddr, daddr, addr;
792   u32 sw_if_index, fib_index;
793   u8 proto = ip6->protocol;
794   int i;
795   nat64_db_t *db = &nm->db[thread_index];
796
797   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
798   fib_index =
799     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
800
801   saddr.as_u64[0] = ip6->src_address.as_u64[0];
802   saddr.as_u64[1] = ip6->src_address.as_u64[1];
803   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
804   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
805
806   ste =
807     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
808
809   if (ste)
810     {
811       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
812       if (!bibe)
813         return -1;
814     }
815   else
816     {
817       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
818
819       if (!bibe)
820         {
821           /* Choose same out address as for TCP/UDP session to same dst */
822           unk_proto_st_walk_ctx_t ctx = {
823             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
824             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
825             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
826             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
827             .out_addr.as_u32 = 0,
828             .fib_index = fib_index,
829             .proto = proto,
830             .thread_index = thread_index,
831           };
832
833           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
834
835           if (!ctx.out_addr.as_u32)
836             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
837
838           /* Verify if out address is not already in use for protocol */
839           memset (&addr, 0, sizeof (addr));
840           addr.ip4.as_u32 = ctx.out_addr.as_u32;
841           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
842             ctx.out_addr.as_u32 = 0;
843
844           if (!ctx.out_addr.as_u32)
845             {
846               for (i = 0; i < vec_len (nm->addr_pool); i++)
847                 {
848                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
849                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
850                     break;
851                 }
852             }
853
854           if (!ctx.out_addr.as_u32)
855             return -1;
856
857           bibe =
858             nat64_db_bib_entry_create (db, &ip6->src_address,
859                                        &ctx.out_addr, 0, 0, fib_index, proto,
860                                        0);
861           if (!bibe)
862             return -1;
863         }
864
865       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
866       ste =
867         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
868       if (!ste)
869         return -1;
870     }
871
872   nat64_session_reset_timeout (ste, vm);
873
874   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
875
876   memset (&daddr, 0, sizeof (daddr));
877   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
878
879   bibe = 0;
880   /* *INDENT-OFF* */
881   vec_foreach (db, nm->db)
882     {
883       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
884
885       if (bibe)
886         break;
887     }
888   /* *INDENT-ON* */
889
890   if (!bibe)
891     return -1;
892
893   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
894   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
895
896   return 0;
897 }
898
899 static inline uword
900 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
901                              vlib_frame_t * frame, u8 is_slow_path)
902 {
903   u32 n_left_from, *from, *to_next;
904   nat64_in2out_next_t next_index;
905   u32 pkts_processed = 0;
906   u32 stats_node_index;
907   u32 thread_index = vlib_get_thread_index ();
908
909   stats_node_index =
910     is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index;
911
912   from = vlib_frame_vector_args (frame);
913   n_left_from = frame->n_vectors;
914   next_index = node->cached_next_index;
915
916   while (n_left_from > 0)
917     {
918       u32 n_left_to_next;
919
920       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
921
922       while (n_left_from > 0 && n_left_to_next > 0)
923         {
924           u32 bi0;
925           vlib_buffer_t *b0;
926           u32 next0;
927           ip6_header_t *ip60;
928           u16 l4_offset0, frag_offset0;
929           u8 l4_protocol0;
930           u32 proto0;
931           nat64_in2out_set_ctx_t ctx0;
932
933           /* speculatively enqueue b0 to the current next frame */
934           bi0 = from[0];
935           to_next[0] = bi0;
936           from += 1;
937           to_next += 1;
938           n_left_from -= 1;
939           n_left_to_next -= 1;
940
941           b0 = vlib_get_buffer (vm, bi0);
942           ip60 = vlib_buffer_get_current (b0);
943
944           ctx0.b = b0;
945           ctx0.vm = vm;
946           ctx0.thread_index = thread_index;
947
948           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
949
950           if (PREDICT_FALSE
951               (ip6_parse
952                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
953                 &frag_offset0)))
954             {
955               next0 = NAT64_IN2OUT_NEXT_DROP;
956               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
957               goto trace0;
958             }
959
960           proto0 = ip_proto_to_snat_proto (l4_protocol0);
961
962           if (is_slow_path)
963             {
964               if (PREDICT_TRUE (proto0 == ~0))
965                 {
966                   if (is_hairpinning (&ip60->dst_address))
967                     {
968                       next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
969                       if (nat64_in2out_unk_proto_hairpinning
970                           (vm, b0, ip60, thread_index))
971                         {
972                           next0 = NAT64_IN2OUT_NEXT_DROP;
973                           b0->error =
974                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
975                         }
976                       goto trace0;
977                     }
978
979                   if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0))
980                     {
981                       next0 = NAT64_IN2OUT_NEXT_DROP;
982                       b0->error =
983                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
984                       goto trace0;
985                     }
986                 }
987               goto trace0;
988             }
989           else
990             {
991               if (PREDICT_FALSE (proto0 == ~0))
992                 {
993                   next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
994                   goto trace0;
995                 }
996             }
997
998           if (PREDICT_FALSE
999               (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
1000             {
1001               next0 = NAT64_IN2OUT_NEXT_REASS;
1002               goto trace0;
1003             }
1004
1005           if (proto0 == SNAT_PROTOCOL_ICMP)
1006             {
1007               if (is_hairpinning (&ip60->dst_address))
1008                 {
1009                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1010                   if (nat64_in2out_icmp_hairpinning
1011                       (vm, b0, ip60, thread_index))
1012                     {
1013                       next0 = NAT64_IN2OUT_NEXT_DROP;
1014                       b0->error =
1015                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1016                     }
1017                   goto trace0;
1018                 }
1019
1020               if (icmp6_to_icmp
1021                   (b0, nat64_in2out_icmp_set_cb, &ctx0,
1022                    nat64_in2out_inner_icmp_set_cb, &ctx0))
1023                 {
1024                   next0 = NAT64_IN2OUT_NEXT_DROP;
1025                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1026                   goto trace0;
1027                 }
1028             }
1029           else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
1030             {
1031               if (is_hairpinning (&ip60->dst_address))
1032                 {
1033                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1034                   if (nat64_in2out_tcp_udp_hairpinning
1035                       (vm, b0, ip60, thread_index))
1036                     {
1037                       next0 = NAT64_IN2OUT_NEXT_DROP;
1038                       b0->error =
1039                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1040                     }
1041                   goto trace0;
1042                 }
1043
1044               if (ip6_to_ip4_tcp_udp
1045                   (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0))
1046                 {
1047                   next0 = NAT64_IN2OUT_NEXT_DROP;
1048                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1049                   goto trace0;
1050                 }
1051             }
1052
1053         trace0:
1054           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1055                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1056             {
1057               nat64_in2out_trace_t *t =
1058                 vlib_add_trace (vm, node, b0, sizeof (*t));
1059               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1060               t->next_index = next0;
1061               t->is_slow_path = is_slow_path;
1062             }
1063
1064           pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1065
1066           /* verify speculative enqueue, maybe switch current next frame */
1067           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1068                                            n_left_to_next, bi0, next0);
1069         }
1070       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1071     }
1072   vlib_node_increment_counter (vm, stats_node_index,
1073                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1074                                pkts_processed);
1075   return frame->n_vectors;
1076 }
1077
1078 static uword
1079 nat64_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1080                       vlib_frame_t * frame)
1081 {
1082   return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1083 }
1084
1085 /* *INDENT-OFF* */
1086 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1087   .function = nat64_in2out_node_fn,
1088   .name = "nat64-in2out",
1089   .vector_size = sizeof (u32),
1090   .format_trace = format_nat64_in2out_trace,
1091   .type = VLIB_NODE_TYPE_INTERNAL,
1092   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1093   .error_strings = nat64_in2out_error_strings,
1094   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1095   /* edit / add dispositions here */
1096   .next_nodes = {
1097     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1098     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1099     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1100     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1101     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1102   },
1103 };
1104 /* *INDENT-ON* */
1105
1106 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_node, nat64_in2out_node_fn);
1107
1108 static uword
1109 nat64_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1110                                vlib_frame_t * frame)
1111 {
1112   return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1113 }
1114
1115 /* *INDENT-OFF* */
1116 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1117   .function = nat64_in2out_slowpath_node_fn,
1118   .name = "nat64-in2out-slowpath",
1119   .vector_size = sizeof (u32),
1120   .format_trace = format_nat64_in2out_trace,
1121   .type = VLIB_NODE_TYPE_INTERNAL,
1122   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1123   .error_strings = nat64_in2out_error_strings,
1124   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1125   /* edit / add dispositions here */
1126   .next_nodes = {
1127     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1128     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1129     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1130     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1131     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1132   },
1133 };
1134 /* *INDENT-ON* */
1135
1136 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
1137                               nat64_in2out_slowpath_node_fn);
1138
1139 typedef struct nat64_in2out_frag_set_ctx_t_
1140 {
1141   vlib_main_t *vm;
1142   u32 sess_index;
1143   u32 thread_index;
1144   u16 l4_offset;
1145   u8 proto;
1146   u8 first_frag;
1147 } nat64_in2out_frag_set_ctx_t;
1148
1149 static int
1150 nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
1151 {
1152   nat64_main_t *nm = &nat64_main;
1153   nat64_in2out_frag_set_ctx_t *ctx = arg;
1154   nat64_db_st_entry_t *ste;
1155   nat64_db_bib_entry_t *bibe;
1156   udp_header_t *udp;
1157   nat64_db_t *db = &nm->db[ctx->thread_index];
1158
1159   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1160   if (!ste)
1161     return -1;
1162
1163   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1164   if (!bibe)
1165     return -1;
1166
1167   nat64_session_reset_timeout (ste, ctx->vm);
1168
1169   if (ctx->first_frag)
1170     {
1171       udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1172
1173       if (ctx->proto == IP_PROTOCOL_TCP)
1174         {
1175           u16 *checksum;
1176           ip_csum_t csum;
1177           tcp_header_t *tcp = (tcp_header_t *) udp;
1178
1179           checksum = &tcp->checksum;
1180           csum = ip_csum_sub_even (*checksum, tcp->src_port);
1181           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
1182           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1183           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1184           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1185           csum = ip_csum_add_even (csum, bibe->out_port);
1186           csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
1187           csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
1188           *checksum = ip_csum_fold (csum);
1189         }
1190
1191       udp->src_port = bibe->out_port;
1192     }
1193
1194   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
1195   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
1196
1197   return 0;
1198 }
1199
1200 static int
1201 nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
1202                                nat64_in2out_frag_set_ctx_t * ctx)
1203 {
1204   nat64_main_t *nm = &nat64_main;
1205   nat64_db_st_entry_t *ste;
1206   nat64_db_bib_entry_t *bibe;
1207   udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1208   tcp_header_t *tcp = (tcp_header_t *) udp;
1209   u16 sport = udp->src_port;
1210   u16 dport = udp->dst_port;
1211   u16 *checksum;
1212   ip_csum_t csum;
1213   ip46_address_t daddr;
1214   nat64_db_t *db = &nm->db[ctx->thread_index];
1215
1216   if (ctx->first_frag)
1217     {
1218       if (ctx->proto == IP_PROTOCOL_UDP)
1219         checksum = &udp->checksum;
1220       else
1221         checksum = &tcp->checksum;
1222
1223       csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
1224       csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1225       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1226       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1227       csum = ip_csum_sub_even (csum, sport);
1228       csum = ip_csum_sub_even (csum, dport);
1229     }
1230
1231   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1232   if (!ste)
1233     return -1;
1234
1235   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1236   if (!bibe)
1237     return -1;
1238
1239   nat64_session_reset_timeout (ste, ctx->vm);
1240
1241   sport = bibe->out_port;
1242   dport = ste->r_port;
1243
1244   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
1245
1246   memset (&daddr, 0, sizeof (daddr));
1247   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1248
1249   bibe = 0;
1250   /* *INDENT-OFF* */
1251   vec_foreach (db, nm->db)
1252     {
1253       bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0);
1254
1255       if (bibe)
1256         break;
1257     }
1258   /* *INDENT-ON* */
1259
1260   if (!bibe)
1261     return -1;
1262
1263   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1264   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1265
1266   if (ctx->first_frag)
1267     {
1268       udp->dst_port = bibe->in_port;
1269       udp->src_port = sport;
1270       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
1271       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
1272       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
1273       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
1274       csum = ip_csum_add_even (csum, udp->src_port);
1275       csum = ip_csum_add_even (csum, udp->dst_port);
1276       *checksum = ip_csum_fold (csum);
1277     }
1278
1279   return 0;
1280 }
1281
1282 static uword
1283 nat64_in2out_reass_node_fn (vlib_main_t * vm,
1284                             vlib_node_runtime_t * node, vlib_frame_t * frame)
1285 {
1286   u32 n_left_from, *from, *to_next;
1287   nat64_in2out_next_t next_index;
1288   u32 pkts_processed = 0;
1289   u32 *fragments_to_drop = 0;
1290   u32 *fragments_to_loopback = 0;
1291   nat64_main_t *nm = &nat64_main;
1292   u32 thread_index = vlib_get_thread_index ();
1293
1294   from = vlib_frame_vector_args (frame);
1295   n_left_from = frame->n_vectors;
1296   next_index = node->cached_next_index;
1297
1298   while (n_left_from > 0)
1299     {
1300       u32 n_left_to_next;
1301
1302       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1303
1304       while (n_left_from > 0 && n_left_to_next > 0)
1305         {
1306           u32 bi0;
1307           vlib_buffer_t *b0;
1308           u32 next0;
1309           u8 cached0 = 0;
1310           ip6_header_t *ip60;
1311           u16 l4_offset0, frag_offset0;
1312           u8 l4_protocol0;
1313           nat_reass_ip6_t *reass0;
1314           ip6_frag_hdr_t *frag0;
1315           nat64_db_bib_entry_t *bibe0;
1316           nat64_db_st_entry_t *ste0;
1317           udp_header_t *udp0;
1318           snat_protocol_t proto0;
1319           u32 sw_if_index0, fib_index0;
1320           ip46_address_t saddr0, daddr0;
1321           nat64_in2out_frag_set_ctx_t ctx0;
1322           nat64_db_t *db = &nm->db[thread_index];
1323
1324           /* speculatively enqueue b0 to the current next frame */
1325           bi0 = from[0];
1326           to_next[0] = bi0;
1327           from += 1;
1328           to_next += 1;
1329           n_left_from -= 1;
1330           n_left_to_next -= 1;
1331
1332           b0 = vlib_get_buffer (vm, bi0);
1333           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1334
1335           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1336           fib_index0 =
1337             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
1338                                                  sw_if_index0);
1339
1340           ctx0.thread_index = thread_index;
1341
1342           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
1343             {
1344               next0 = NAT64_IN2OUT_NEXT_DROP;
1345               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1346               goto trace0;
1347             }
1348
1349           ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
1350
1351           if (PREDICT_FALSE
1352               (ip6_parse
1353                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1354                 &frag_offset0)))
1355             {
1356               next0 = NAT64_IN2OUT_NEXT_DROP;
1357               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1358               goto trace0;
1359             }
1360
1361           if (PREDICT_FALSE
1362               (!(l4_protocol0 == IP_PROTOCOL_TCP
1363                  || l4_protocol0 == IP_PROTOCOL_UDP)))
1364             {
1365               next0 = NAT64_IN2OUT_NEXT_DROP;
1366               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1367               goto trace0;
1368             }
1369
1370           udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
1371           frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
1372           proto0 = ip_proto_to_snat_proto (l4_protocol0);
1373
1374           reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
1375                                                  ip60->dst_address,
1376                                                  frag0->identification,
1377                                                  l4_protocol0,
1378                                                  1, &fragments_to_drop);
1379
1380           if (PREDICT_FALSE (!reass0))
1381             {
1382               next0 = NAT64_IN2OUT_NEXT_DROP;
1383               b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
1384               goto trace0;
1385             }
1386
1387           if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
1388             {
1389               ctx0.first_frag = 0;
1390               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
1391                 {
1392                   if (nat_ip6_reass_add_fragment (reass0, bi0))
1393                     {
1394                       b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
1395                       next0 = NAT64_IN2OUT_NEXT_DROP;
1396                       goto trace0;
1397                     }
1398                   cached0 = 1;
1399                   goto trace0;
1400                 }
1401             }
1402           else
1403             {
1404               ctx0.first_frag = 1;
1405
1406               saddr0.as_u64[0] = ip60->src_address.as_u64[0];
1407               saddr0.as_u64[1] = ip60->src_address.as_u64[1];
1408               daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
1409               daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
1410
1411               ste0 =
1412                 nat64_db_st_entry_find (db, &saddr0, &daddr0,
1413                                         udp0->src_port, udp0->dst_port,
1414                                         l4_protocol0, fib_index0, 1);
1415               if (!ste0)
1416                 {
1417                   bibe0 =
1418                     nat64_db_bib_entry_find (db, &saddr0, udp0->src_port,
1419                                              l4_protocol0, fib_index0, 1);
1420                   if (!bibe0)
1421                     {
1422                       u16 out_port0;
1423                       ip4_address_t out_addr0;
1424                       if (nat64_alloc_out_addr_and_port
1425                           (fib_index0, proto0, &out_addr0, &out_port0,
1426                            thread_index))
1427                         {
1428                           next0 = NAT64_IN2OUT_NEXT_DROP;
1429                           b0->error =
1430                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1431                           goto trace0;
1432                         }
1433
1434                       bibe0 =
1435                         nat64_db_bib_entry_create (db,
1436                                                    &ip60->src_address,
1437                                                    &out_addr0, udp0->src_port,
1438                                                    clib_host_to_net_u16
1439                                                    (out_port0), fib_index0,
1440                                                    l4_protocol0, 0);
1441                       if (!bibe0)
1442                         {
1443                           next0 = NAT64_IN2OUT_NEXT_DROP;
1444                           b0->error =
1445                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1446                           goto trace0;
1447                         }
1448                     }
1449                   nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
1450                                      fib_index0);
1451                   ste0 =
1452                     nat64_db_st_entry_create (db, bibe0,
1453                                               &ip60->dst_address, &daddr0.ip4,
1454                                               udp0->dst_port);
1455                   if (!ste0)
1456                     {
1457                       next0 = NAT64_IN2OUT_NEXT_DROP;
1458                       b0->error =
1459                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1460                       goto trace0;
1461                     }
1462                 }
1463               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
1464
1465               nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
1466             }
1467
1468           ctx0.sess_index = reass0->sess_index;
1469           ctx0.proto = l4_protocol0;
1470           ctx0.vm = vm;
1471           ctx0.l4_offset = l4_offset0;
1472
1473           if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
1474             {
1475               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1476               if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
1477                 {
1478                   next0 = NAT64_IN2OUT_NEXT_DROP;
1479                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1480                 }
1481               goto trace0;
1482             }
1483           else
1484             {
1485               if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
1486                 {
1487                   next0 = NAT64_IN2OUT_NEXT_DROP;
1488                   b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1489                   goto trace0;
1490                 }
1491             }
1492
1493         trace0:
1494           if (PREDICT_FALSE
1495               ((node->flags & VLIB_NODE_FLAG_TRACE)
1496                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1497             {
1498               nat64_in2out_reass_trace_t *t =
1499                 vlib_add_trace (vm, node, b0, sizeof (*t));
1500               t->cached = cached0;
1501               t->sw_if_index = sw_if_index0;
1502               t->next_index = next0;
1503             }
1504
1505           if (cached0)
1506             {
1507               n_left_to_next++;
1508               to_next--;
1509             }
1510           else
1511             {
1512               pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1513
1514               /* verify speculative enqueue, maybe switch current next frame */
1515               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1516                                                to_next, n_left_to_next,
1517                                                bi0, next0);
1518             }
1519
1520           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1521             {
1522               from = vlib_frame_vector_args (frame);
1523               u32 len = vec_len (fragments_to_loopback);
1524               if (len <= VLIB_FRAME_SIZE)
1525                 {
1526                   clib_memcpy (from, fragments_to_loopback,
1527                                sizeof (u32) * len);
1528                   n_left_from = len;
1529                   vec_reset_length (fragments_to_loopback);
1530                 }
1531               else
1532                 {
1533                   clib_memcpy (from,
1534                                fragments_to_loopback + (len -
1535                                                         VLIB_FRAME_SIZE),
1536                                sizeof (u32) * VLIB_FRAME_SIZE);
1537                   n_left_from = VLIB_FRAME_SIZE;
1538                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1539                 }
1540             }
1541         }
1542
1543       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1544     }
1545
1546   vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
1547                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1548                                pkts_processed);
1549
1550   nat_send_all_to_node (vm, fragments_to_drop, node,
1551                         &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
1552                         NAT64_IN2OUT_NEXT_DROP);
1553
1554   vec_free (fragments_to_drop);
1555   vec_free (fragments_to_loopback);
1556   return frame->n_vectors;
1557 }
1558
1559 /* *INDENT-OFF* */
1560 VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
1561   .function = nat64_in2out_reass_node_fn,
1562   .name = "nat64-in2out-reass",
1563   .vector_size = sizeof (u32),
1564   .format_trace = format_nat64_in2out_reass_trace,
1565   .type = VLIB_NODE_TYPE_INTERNAL,
1566   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1567   .error_strings = nat64_in2out_error_strings,
1568   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1569   /* edit / add dispositions here */
1570   .next_nodes = {
1571     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1572     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1573     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1574     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1575     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1576   },
1577 };
1578 /* *INDENT-ON* */
1579
1580 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node,
1581                               nat64_in2out_reass_node_fn);
1582
1583 typedef struct
1584 {
1585   u32 next_worker_index;
1586   u8 do_handoff;
1587 } nat64_in2out_handoff_trace_t;
1588
1589 static u8 *
1590 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1591 {
1592   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1593   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1594   nat64_in2out_handoff_trace_t *t =
1595     va_arg (*args, nat64_in2out_handoff_trace_t *);
1596   char *m;
1597
1598   m = t->do_handoff ? "next worker" : "same worker";
1599   s = format (s, "NAT64-IN2OUT-HANDOFF: %s %d", m, t->next_worker_index);
1600
1601   return s;
1602 }
1603
1604 static inline uword
1605 nat64_in2out_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1606                               vlib_frame_t * frame)
1607 {
1608   nat64_main_t *nm = &nat64_main;
1609   vlib_thread_main_t *tm = vlib_get_thread_main ();
1610   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
1611   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1612   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1613     = 0;
1614   vlib_frame_queue_elt_t *hf = 0;
1615   vlib_frame_queue_t *fq;
1616   vlib_frame_t *f = 0, *d = 0;
1617   int i;
1618   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1619   u32 next_worker_index = 0;
1620   u32 current_worker_index = ~0;
1621   u32 thread_index = vlib_get_thread_index ();
1622   u32 fq_index;
1623   u32 to_node_index;
1624
1625   fq_index = nm->fq_in2out_index;
1626   to_node_index = nat64_in2out_node.index;
1627
1628   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1629     {
1630       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1631
1632       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1633                                tm->n_vlib_mains - 1,
1634                                (vlib_frame_queue_t *) (~0));
1635     }
1636
1637   from = vlib_frame_vector_args (frame);
1638   n_left_from = frame->n_vectors;
1639
1640   while (n_left_from > 0)
1641     {
1642       u32 bi0;
1643       vlib_buffer_t *b0;
1644       ip6_header_t *ip0;
1645       u8 do_handoff;
1646
1647       bi0 = from[0];
1648       from += 1;
1649       n_left_from -= 1;
1650
1651       b0 = vlib_get_buffer (vm, bi0);
1652
1653       ip0 = vlib_buffer_get_current (b0);
1654
1655       next_worker_index = nat64_get_worker_in2out (&ip0->src_address);
1656
1657       if (PREDICT_FALSE (next_worker_index != thread_index))
1658         {
1659           do_handoff = 1;
1660
1661           if (next_worker_index != current_worker_index)
1662             {
1663               fq =
1664                 is_vlib_frame_queue_congested (fq_index, next_worker_index,
1665                                                30,
1666                                                congested_handoff_queue_by_worker_index);
1667
1668               if (fq)
1669                 {
1670                   /* if this is 1st frame */
1671                   if (!d)
1672                     {
1673                       d = vlib_get_frame_to_node (vm, nm->error_node_index);
1674                       to_next_drop = vlib_frame_vector_args (d);
1675                     }
1676
1677                   to_next_drop[0] = bi0;
1678                   to_next_drop += 1;
1679                   d->n_vectors++;
1680                   goto trace0;
1681                 }
1682
1683               if (hf)
1684                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1685
1686               hf =
1687                 vlib_get_worker_handoff_queue_elt (fq_index,
1688                                                    next_worker_index,
1689                                                    handoff_queue_elt_by_worker_index);
1690               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1691               to_next_worker = &hf->buffer_index[hf->n_vectors];
1692               current_worker_index = next_worker_index;
1693             }
1694
1695           /* enqueue to correct worker thread */
1696           to_next_worker[0] = bi0;
1697           to_next_worker++;
1698           n_left_to_next_worker--;
1699
1700           if (n_left_to_next_worker == 0)
1701             {
1702               hf->n_vectors = VLIB_FRAME_SIZE;
1703               vlib_put_frame_queue_elt (hf);
1704               current_worker_index = ~0;
1705               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1706               hf = 0;
1707             }
1708         }
1709       else
1710         {
1711           do_handoff = 0;
1712           /* if this is 1st frame */
1713           if (!f)
1714             {
1715               f = vlib_get_frame_to_node (vm, to_node_index);
1716               to_next = vlib_frame_vector_args (f);
1717             }
1718
1719           to_next[0] = bi0;
1720           to_next += 1;
1721           f->n_vectors++;
1722         }
1723
1724     trace0:
1725       if (PREDICT_FALSE
1726           ((node->flags & VLIB_NODE_FLAG_TRACE)
1727            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1728         {
1729           nat64_in2out_handoff_trace_t *t =
1730             vlib_add_trace (vm, node, b0, sizeof (*t));
1731           t->next_worker_index = next_worker_index;
1732           t->do_handoff = do_handoff;
1733         }
1734     }
1735
1736   if (f)
1737     vlib_put_frame_to_node (vm, to_node_index, f);
1738
1739   if (d)
1740     vlib_put_frame_to_node (vm, nm->error_node_index, d);
1741
1742   if (hf)
1743     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1744
1745   /* Ship frames to the worker nodes */
1746   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1747     {
1748       if (handoff_queue_elt_by_worker_index[i])
1749         {
1750           hf = handoff_queue_elt_by_worker_index[i];
1751           /*
1752            * It works better to let the handoff node
1753            * rate-adapt, always ship the handoff queue element.
1754            */
1755           if (1 || hf->n_vectors == hf->last_n_vectors)
1756             {
1757               vlib_put_frame_queue_elt (hf);
1758               handoff_queue_elt_by_worker_index[i] = 0;
1759             }
1760           else
1761             hf->last_n_vectors = hf->n_vectors;
1762         }
1763       congested_handoff_queue_by_worker_index[i] =
1764         (vlib_frame_queue_t *) (~0);
1765     }
1766   hf = 0;
1767   current_worker_index = ~0;
1768   return frame->n_vectors;
1769 }
1770
1771 /* *INDENT-OFF* */
1772 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1773   .function = nat64_in2out_handoff_node_fn,
1774   .name = "nat64-in2out-handoff",
1775   .vector_size = sizeof (u32),
1776   .format_trace = format_nat64_in2out_handoff_trace,
1777   .type = VLIB_NODE_TYPE_INTERNAL,
1778
1779   .n_next_nodes = 1,
1780
1781   .next_nodes = {
1782     [0] = "error-drop",
1783   },
1784 };
1785 /* *INDENT-ON* */
1786
1787 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_handoff_node,
1788                               nat64_in2out_handoff_node_fn);
1789
1790 /*
1791  * fd.io coding-style-patch-verification: ON
1792  *
1793  * Local Variables:
1794  * eval: (c-set-style "gnu")
1795  * End:
1796  */