NAT44: code cleanup and refactor (VPP-1285)
[vpp.git] / src / plugins / nat / nat64_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/ip/ip6_to_ip4.h>
24 #include <vnet/fib/fib_table.h>
25
26 typedef struct
27 {
28   u32 sw_if_index;
29   u32 next_index;
30   u8 is_slow_path;
31 } nat64_in2out_trace_t;
32
33 static u8 *
34 format_nat64_in2out_trace (u8 * s, va_list * args)
35 {
36   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38   nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
39   char *tag;
40
41   tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
42
43   s =
44     format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
45             t->next_index);
46
47   return s;
48 }
49
50 typedef struct
51 {
52   u32 sw_if_index;
53   u32 next_index;
54   u8 cached;
55 } nat64_in2out_reass_trace_t;
56
57 static u8 *
58 format_nat64_in2out_reass_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   nat64_in2out_reass_trace_t *t =
63     va_arg (*args, nat64_in2out_reass_trace_t *);
64
65   s =
66     format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
67             t->sw_if_index, t->next_index,
68             t->cached ? "cached" : "translated");
69
70   return s;
71 }
72
73 vlib_node_registration_t nat64_in2out_node;
74 vlib_node_registration_t nat64_in2out_slowpath_node;
75 vlib_node_registration_t nat64_in2out_reass_node;
76 vlib_node_registration_t nat64_in2out_handoff_node;
77
78 #define foreach_nat64_in2out_error                       \
79 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
80 _(IN2OUT_PACKETS, "good in2out packets processed")       \
81 _(NO_TRANSLATION, "no translation")                      \
82 _(UNKNOWN, "unknown")                                    \
83 _(DROP_FRAGMENT, "Drop fragment")                        \
84 _(MAX_REASS, "Maximum reassemblies exceeded")            \
85 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
86
87
88 typedef enum
89 {
90 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
91   foreach_nat64_in2out_error
92 #undef _
93     NAT64_IN2OUT_N_ERROR,
94 } nat64_in2out_error_t;
95
96 static char *nat64_in2out_error_strings[] = {
97 #define _(sym,string) string,
98   foreach_nat64_in2out_error
99 #undef _
100 };
101
102 typedef enum
103 {
104   NAT64_IN2OUT_NEXT_IP4_LOOKUP,
105   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
106   NAT64_IN2OUT_NEXT_DROP,
107   NAT64_IN2OUT_NEXT_SLOWPATH,
108   NAT64_IN2OUT_NEXT_REASS,
109   NAT64_IN2OUT_N_NEXT,
110 } nat64_in2out_next_t;
111
112 typedef struct nat64_in2out_set_ctx_t_
113 {
114   vlib_buffer_t *b;
115   vlib_main_t *vm;
116   u32 thread_index;
117 } nat64_in2out_set_ctx_t;
118
119 /**
120  * @brief Check whether is a hairpinning.
121  *
122  * If the destination IP address of the packet is an IPv4 address assigned to
123  * the NAT64 itself, then the packet is a hairpin packet.
124  *
125  * param dst_addr Destination address of the packet.
126  *
127  * @returns 1 if hairpinning, otherwise 0.
128  */
129 static_always_inline int
130 is_hairpinning (ip6_address_t * dst_addr)
131 {
132   nat64_main_t *nm = &nat64_main;
133   int i;
134
135   for (i = 0; i < vec_len (nm->addr_pool); i++)
136     {
137       if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
138         return 1;
139     }
140
141   return 0;
142 }
143
144 static int
145 nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
146                              void *arg)
147 {
148   nat64_main_t *nm = &nat64_main;
149   nat64_in2out_set_ctx_t *ctx = arg;
150   nat64_db_bib_entry_t *bibe;
151   nat64_db_st_entry_t *ste;
152   ip46_address_t saddr, daddr;
153   u32 sw_if_index, fib_index;
154   udp_header_t *udp = ip6_next_header (ip6);
155   u8 proto = ip6->protocol;
156   u16 sport = udp->src_port;
157   u16 dport = udp->dst_port;
158   nat64_db_t *db = &nm->db[ctx->thread_index];
159
160   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
161   fib_index =
162     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
163
164   saddr.as_u64[0] = ip6->src_address.as_u64[0];
165   saddr.as_u64[1] = ip6->src_address.as_u64[1];
166   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
167   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
168
169   ste =
170     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
171                             fib_index, 1);
172
173   if (ste)
174     {
175       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
176       if (!bibe)
177         return -1;
178     }
179   else
180     {
181       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
182
183       if (!bibe)
184         {
185           u16 out_port;
186           ip4_address_t out_addr;
187           if (nat64_alloc_out_addr_and_port
188               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
189                &out_port, ctx->thread_index))
190             return -1;
191
192           bibe =
193             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
194                                        sport, out_port, fib_index, proto, 0);
195           if (!bibe)
196             return -1;
197         }
198
199       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
200       ste =
201         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
202                                   &daddr.ip4, dport);
203       if (!ste)
204         return -1;
205     }
206
207   nat64_session_reset_timeout (ste, ctx->vm);
208
209   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
210   udp->src_port = bibe->out_port;
211
212   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
213
214   if (proto == IP_PROTOCOL_TCP)
215     {
216       u16 *checksum;
217       ip_csum_t csum;
218       tcp_header_t *tcp = ip6_next_header (ip6);
219
220       checksum = &tcp->checksum;
221       csum = ip_csum_sub_even (*checksum, sport);
222       csum = ip_csum_add_even (csum, udp->src_port);
223       *checksum = ip_csum_fold (csum);
224     }
225
226   return 0;
227 }
228
229 static int
230 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
231 {
232   nat64_main_t *nm = &nat64_main;
233   nat64_in2out_set_ctx_t *ctx = arg;
234   nat64_db_bib_entry_t *bibe;
235   nat64_db_st_entry_t *ste;
236   ip46_address_t saddr, daddr;
237   u32 sw_if_index, fib_index;
238   icmp46_header_t *icmp = ip6_next_header (ip6);
239   nat64_db_t *db = &nm->db[ctx->thread_index];
240
241   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
242   fib_index =
243     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
244
245   saddr.as_u64[0] = ip6->src_address.as_u64[0];
246   saddr.as_u64[1] = ip6->src_address.as_u64[1];
247   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
248   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
249
250   if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
251     {
252       u16 in_id = ((u16 *) (icmp))[2];
253       ste =
254         nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
255                                 IP_PROTOCOL_ICMP, fib_index, 1);
256
257       if (ste)
258         {
259           bibe =
260             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
261                                          ste->bibe_index);
262           if (!bibe)
263             return -1;
264         }
265       else
266         {
267           bibe =
268             nat64_db_bib_entry_find (db, &saddr, in_id,
269                                      IP_PROTOCOL_ICMP, fib_index, 1);
270
271           if (!bibe)
272             {
273               u16 out_id;
274               ip4_address_t out_addr;
275               if (nat64_alloc_out_addr_and_port
276                   (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id,
277                    ctx->thread_index))
278                 return -1;
279
280               bibe =
281                 nat64_db_bib_entry_create (db, &ip6->src_address,
282                                            &out_addr, in_id, out_id,
283                                            fib_index, IP_PROTOCOL_ICMP, 0);
284               if (!bibe)
285                 return -1;
286             }
287
288           nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
289           ste =
290             nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
291                                       &daddr.ip4, 0);
292           if (!ste)
293             return -1;
294         }
295
296       nat64_session_reset_timeout (ste, ctx->vm);
297
298       ip4->src_address.as_u32 = bibe->out_addr.as_u32;
299       ((u16 *) (icmp))[2] = bibe->out_port;
300
301       ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
302     }
303   else
304     {
305       if (!vec_len (nm->addr_pool))
306         return -1;
307
308       ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
309       nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
310     }
311
312   return 0;
313 }
314
315 static int
316 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
317                                 void *arg)
318 {
319   nat64_main_t *nm = &nat64_main;
320   nat64_in2out_set_ctx_t *ctx = arg;
321   nat64_db_st_entry_t *ste;
322   nat64_db_bib_entry_t *bibe;
323   ip46_address_t saddr, daddr;
324   u32 sw_if_index, fib_index;
325   u8 proto = ip6->protocol;
326   nat64_db_t *db = &nm->db[ctx->thread_index];
327
328   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
329   fib_index =
330     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
331
332   saddr.as_u64[0] = ip6->src_address.as_u64[0];
333   saddr.as_u64[1] = ip6->src_address.as_u64[1];
334   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
335   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
336
337   if (proto == IP_PROTOCOL_ICMP6)
338     {
339       icmp46_header_t *icmp = ip6_next_header (ip6);
340       u16 in_id = ((u16 *) (icmp))[2];
341       proto = IP_PROTOCOL_ICMP;
342
343       if (!
344           (icmp->type == ICMP4_echo_request
345            || icmp->type == ICMP4_echo_reply))
346         return -1;
347
348       ste =
349         nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
350                                 fib_index, 1);
351       if (!ste)
352         return -1;
353
354       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
355       if (!bibe)
356         return -1;
357
358       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
359       ((u16 *) (icmp))[2] = bibe->out_port;
360       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
361     }
362   else
363     {
364       udp_header_t *udp = ip6_next_header (ip6);
365       tcp_header_t *tcp = ip6_next_header (ip6);
366       u16 *checksum;
367       ip_csum_t csum;
368
369       u16 sport = udp->src_port;
370       u16 dport = udp->dst_port;
371
372       ste =
373         nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
374                                 fib_index, 1);
375       if (!ste)
376         return -1;
377
378       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
379       if (!bibe)
380         return -1;
381
382       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
383       udp->dst_port = bibe->out_port;
384       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
385
386       if (proto == IP_PROTOCOL_TCP)
387         checksum = &tcp->checksum;
388       else
389         checksum = &udp->checksum;
390       csum = ip_csum_sub_even (*checksum, dport);
391       csum = ip_csum_add_even (csum, udp->dst_port);
392       *checksum = ip_csum_fold (csum);
393     }
394
395   return 0;
396 }
397
398 typedef struct unk_proto_st_walk_ctx_t_
399 {
400   ip6_address_t src_addr;
401   ip6_address_t dst_addr;
402   ip4_address_t out_addr;
403   u32 fib_index;
404   u32 thread_index;
405   u8 proto;
406 } unk_proto_st_walk_ctx_t;
407
408 static int
409 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
410 {
411   nat64_main_t *nm = &nat64_main;
412   unk_proto_st_walk_ctx_t *ctx = arg;
413   nat64_db_bib_entry_t *bibe;
414   ip46_address_t saddr, daddr;
415   nat64_db_t *db = &nm->db[ctx->thread_index];
416
417   if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
418     {
419       bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
420       if (!bibe)
421         return -1;
422
423       if (ip46_address_is_equal (&bibe->in_addr, &ctx->src_addr)
424           && bibe->fib_index == ctx->fib_index)
425         {
426           memset (&saddr, 0, sizeof (saddr));
427           saddr.ip4.as_u32 = bibe->out_addr.as_u32;
428           memset (&daddr, 0, sizeof (daddr));
429           nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
430
431           if (nat64_db_st_entry_find
432               (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
433             return -1;
434
435           ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
436           return 1;
437         }
438     }
439
440   return 0;
441 }
442
443 static int
444 nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
445                                void *arg)
446 {
447   nat64_main_t *nm = &nat64_main;
448   nat64_in2out_set_ctx_t *s_ctx = arg;
449   nat64_db_bib_entry_t *bibe;
450   nat64_db_st_entry_t *ste;
451   ip46_address_t saddr, daddr, addr;
452   u32 sw_if_index, fib_index;
453   u8 proto = ip6->protocol;
454   int i;
455   nat64_db_t *db = &nm->db[s_ctx->thread_index];
456
457   sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
458   fib_index =
459     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
460
461   saddr.as_u64[0] = ip6->src_address.as_u64[0];
462   saddr.as_u64[1] = ip6->src_address.as_u64[1];
463   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
464   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
465
466   ste =
467     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
468
469   if (ste)
470     {
471       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
472       if (!bibe)
473         return -1;
474     }
475   else
476     {
477       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
478
479       if (!bibe)
480         {
481           /* Choose same out address as for TCP/UDP session to same dst */
482           unk_proto_st_walk_ctx_t ctx = {
483             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
484             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
485             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
486             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
487             .out_addr.as_u32 = 0,
488             .fib_index = fib_index,
489             .proto = proto,
490             .thread_index = s_ctx->thread_index,
491           };
492
493           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
494
495           if (!ctx.out_addr.as_u32)
496             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
497
498           /* Verify if out address is not already in use for protocol */
499           memset (&addr, 0, sizeof (addr));
500           addr.ip4.as_u32 = ctx.out_addr.as_u32;
501           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
502             ctx.out_addr.as_u32 = 0;
503
504           if (!ctx.out_addr.as_u32)
505             {
506               for (i = 0; i < vec_len (nm->addr_pool); i++)
507                 {
508                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
509                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
510                     break;
511                 }
512             }
513
514           if (!ctx.out_addr.as_u32)
515             return -1;
516
517           bibe =
518             nat64_db_bib_entry_create (db, &ip6->src_address,
519                                        &ctx.out_addr, 0, 0, fib_index, proto,
520                                        0);
521           if (!bibe)
522             return -1;
523         }
524
525       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
526       ste =
527         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
528       if (!ste)
529         return -1;
530     }
531
532   nat64_session_reset_timeout (ste, s_ctx->vm);
533
534   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
535   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
536
537   return 0;
538 }
539
540
541
542 static int
543 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
544                                   ip6_header_t * ip6, u32 thread_index)
545 {
546   nat64_main_t *nm = &nat64_main;
547   nat64_db_bib_entry_t *bibe;
548   nat64_db_st_entry_t *ste;
549   ip46_address_t saddr, daddr;
550   u32 sw_if_index, fib_index;
551   udp_header_t *udp = ip6_next_header (ip6);
552   tcp_header_t *tcp = ip6_next_header (ip6);
553   u8 proto = ip6->protocol;
554   u16 sport = udp->src_port;
555   u16 dport = udp->dst_port;
556   u16 *checksum;
557   ip_csum_t csum;
558   nat64_db_t *db = &nm->db[thread_index];
559
560   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
561   fib_index =
562     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
563
564   saddr.as_u64[0] = ip6->src_address.as_u64[0];
565   saddr.as_u64[1] = ip6->src_address.as_u64[1];
566   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
567   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
568
569   if (proto == IP_PROTOCOL_UDP)
570     checksum = &udp->checksum;
571   else
572     checksum = &tcp->checksum;
573
574   csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
575   csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
576   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
577   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
578   csum = ip_csum_sub_even (csum, sport);
579   csum = ip_csum_sub_even (csum, dport);
580
581   ste =
582     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
583                             fib_index, 1);
584
585   if (ste)
586     {
587       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
588       if (!bibe)
589         return -1;
590     }
591   else
592     {
593       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
594
595       if (!bibe)
596         {
597           u16 out_port;
598           ip4_address_t out_addr;
599           if (nat64_alloc_out_addr_and_port
600               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
601                &out_port, thread_index))
602             return -1;
603
604           bibe =
605             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
606                                        sport, out_port, fib_index, proto, 0);
607           if (!bibe)
608             return -1;
609         }
610
611       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
612       ste =
613         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
614                                   &daddr.ip4, dport);
615       if (!ste)
616         return -1;
617     }
618
619   nat64_session_reset_timeout (ste, vm);
620
621   sport = udp->src_port = bibe->out_port;
622   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
623
624   memset (&daddr, 0, sizeof (daddr));
625   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
626
627   bibe = 0;
628   /* *INDENT-OFF* */
629   vec_foreach (db, nm->db)
630     {
631       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
632
633       if (bibe)
634         break;
635     }
636   /* *INDENT-ON* */
637
638   if (!bibe)
639     return -1;
640
641   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
642   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
643   udp->dst_port = bibe->in_port;
644
645   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
646   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
647   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
648   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
649   csum = ip_csum_add_even (csum, udp->src_port);
650   csum = ip_csum_add_even (csum, udp->dst_port);
651   *checksum = ip_csum_fold (csum);
652
653   return 0;
654 }
655
656 static int
657 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
658                                ip6_header_t * ip6, u32 thread_index)
659 {
660   nat64_main_t *nm = &nat64_main;
661   nat64_db_bib_entry_t *bibe;
662   nat64_db_st_entry_t *ste;
663   icmp46_header_t *icmp = ip6_next_header (ip6);
664   ip6_header_t *inner_ip6;
665   ip46_address_t saddr, daddr;
666   u32 sw_if_index, fib_index;
667   u8 proto;
668   udp_header_t *udp;
669   tcp_header_t *tcp;
670   u16 *checksum, sport, dport;
671   ip_csum_t csum;
672   nat64_db_t *db = &nm->db[thread_index];
673
674   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
675     return -1;
676
677   inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
678
679   proto = inner_ip6->protocol;
680
681   if (proto == IP_PROTOCOL_ICMP6)
682     return -1;
683
684   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
685   fib_index =
686     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
687
688   saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
689   saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
690   daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
691   daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
692
693   udp = ip6_next_header (inner_ip6);
694   tcp = ip6_next_header (inner_ip6);
695
696   sport = udp->src_port;
697   dport = udp->dst_port;
698
699   if (proto == IP_PROTOCOL_UDP)
700     checksum = &udp->checksum;
701   else
702     checksum = &tcp->checksum;
703
704   csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
705   csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
706   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
707   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
708   csum = ip_csum_sub_even (csum, sport);
709   csum = ip_csum_sub_even (csum, dport);
710
711   ste =
712     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
713                             fib_index, 1);
714   if (!ste)
715     return -1;
716
717   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
718   if (!bibe)
719     return -1;
720
721   dport = udp->dst_port = bibe->out_port;
722   nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
723
724   memset (&saddr, 0, sizeof (saddr));
725   memset (&daddr, 0, sizeof (daddr));
726   saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
727   daddr.ip4.as_u32 = bibe->out_addr.as_u32;
728
729   ste = 0;
730   /* *INDENT-OFF* */
731   vec_foreach (db, nm->db)
732     {
733       ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
734                                     0, 0);
735
736       if (ste)
737         break;
738     }
739   /* *INDENT-ON* */
740
741   if (!ste)
742     return -1;
743
744   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
745   if (!bibe)
746     return -1;
747
748   inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
749   inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
750   udp->src_port = bibe->in_port;
751
752   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
753   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
754   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
755   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
756   csum = ip_csum_add_even (csum, udp->src_port);
757   csum = ip_csum_add_even (csum, udp->dst_port);
758   *checksum = ip_csum_fold (csum);
759
760   if (!vec_len (nm->addr_pool))
761     return -1;
762
763   nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
764   ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
765   ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
766
767   icmp->checksum = 0;
768   csum = ip_csum_with_carry (0, ip6->payload_length);
769   csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
770   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
771   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
772   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
773   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
774   csum =
775     ip_incremental_checksum (csum, icmp,
776                              clib_net_to_host_u16 (ip6->payload_length));
777   icmp->checksum = ~ip_csum_fold (csum);
778
779   return 0;
780 }
781
782 static int
783 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
784                                     ip6_header_t * ip6, u32 thread_index)
785 {
786   nat64_main_t *nm = &nat64_main;
787   nat64_db_bib_entry_t *bibe;
788   nat64_db_st_entry_t *ste;
789   ip46_address_t saddr, daddr, addr;
790   u32 sw_if_index, fib_index;
791   u8 proto = ip6->protocol;
792   int i;
793   nat64_db_t *db = &nm->db[thread_index];
794
795   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
796   fib_index =
797     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
798
799   saddr.as_u64[0] = ip6->src_address.as_u64[0];
800   saddr.as_u64[1] = ip6->src_address.as_u64[1];
801   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
802   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
803
804   ste =
805     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
806
807   if (ste)
808     {
809       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
810       if (!bibe)
811         return -1;
812     }
813   else
814     {
815       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
816
817       if (!bibe)
818         {
819           /* Choose same out address as for TCP/UDP session to same dst */
820           unk_proto_st_walk_ctx_t ctx = {
821             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
822             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
823             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
824             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
825             .out_addr.as_u32 = 0,
826             .fib_index = fib_index,
827             .proto = proto,
828             .thread_index = thread_index,
829           };
830
831           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
832
833           if (!ctx.out_addr.as_u32)
834             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
835
836           /* Verify if out address is not already in use for protocol */
837           memset (&addr, 0, sizeof (addr));
838           addr.ip4.as_u32 = ctx.out_addr.as_u32;
839           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
840             ctx.out_addr.as_u32 = 0;
841
842           if (!ctx.out_addr.as_u32)
843             {
844               for (i = 0; i < vec_len (nm->addr_pool); i++)
845                 {
846                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
847                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
848                     break;
849                 }
850             }
851
852           if (!ctx.out_addr.as_u32)
853             return -1;
854
855           bibe =
856             nat64_db_bib_entry_create (db, &ip6->src_address,
857                                        &ctx.out_addr, 0, 0, fib_index, proto,
858                                        0);
859           if (!bibe)
860             return -1;
861         }
862
863       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
864       ste =
865         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
866       if (!ste)
867         return -1;
868     }
869
870   nat64_session_reset_timeout (ste, vm);
871
872   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
873
874   memset (&daddr, 0, sizeof (daddr));
875   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
876
877   bibe = 0;
878   /* *INDENT-OFF* */
879   vec_foreach (db, nm->db)
880     {
881       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
882
883       if (bibe)
884         break;
885     }
886   /* *INDENT-ON* */
887
888   if (!bibe)
889     return -1;
890
891   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
892   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
893
894   return 0;
895 }
896
897 static inline uword
898 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
899                              vlib_frame_t * frame, u8 is_slow_path)
900 {
901   u32 n_left_from, *from, *to_next;
902   nat64_in2out_next_t next_index;
903   u32 pkts_processed = 0;
904   u32 stats_node_index;
905   u32 thread_index = vlib_get_thread_index ();
906
907   stats_node_index =
908     is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index;
909
910   from = vlib_frame_vector_args (frame);
911   n_left_from = frame->n_vectors;
912   next_index = node->cached_next_index;
913
914   while (n_left_from > 0)
915     {
916       u32 n_left_to_next;
917
918       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
919
920       while (n_left_from > 0 && n_left_to_next > 0)
921         {
922           u32 bi0;
923           vlib_buffer_t *b0;
924           u32 next0;
925           ip6_header_t *ip60;
926           u16 l4_offset0, frag_offset0;
927           u8 l4_protocol0;
928           u32 proto0;
929           nat64_in2out_set_ctx_t ctx0;
930
931           /* speculatively enqueue b0 to the current next frame */
932           bi0 = from[0];
933           to_next[0] = bi0;
934           from += 1;
935           to_next += 1;
936           n_left_from -= 1;
937           n_left_to_next -= 1;
938
939           b0 = vlib_get_buffer (vm, bi0);
940           ip60 = vlib_buffer_get_current (b0);
941
942           ctx0.b = b0;
943           ctx0.vm = vm;
944           ctx0.thread_index = thread_index;
945
946           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
947
948           if (PREDICT_FALSE
949               (ip6_parse
950                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
951                 &frag_offset0)))
952             {
953               next0 = NAT64_IN2OUT_NEXT_DROP;
954               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
955               goto trace0;
956             }
957
958           proto0 = ip_proto_to_snat_proto (l4_protocol0);
959
960           if (is_slow_path)
961             {
962               if (PREDICT_TRUE (proto0 == ~0))
963                 {
964                   if (is_hairpinning (&ip60->dst_address))
965                     {
966                       next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
967                       if (nat64_in2out_unk_proto_hairpinning
968                           (vm, b0, ip60, thread_index))
969                         {
970                           next0 = NAT64_IN2OUT_NEXT_DROP;
971                           b0->error =
972                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
973                         }
974                       goto trace0;
975                     }
976
977                   if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0))
978                     {
979                       next0 = NAT64_IN2OUT_NEXT_DROP;
980                       b0->error =
981                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
982                       goto trace0;
983                     }
984                 }
985               goto trace0;
986             }
987           else
988             {
989               if (PREDICT_FALSE (proto0 == ~0))
990                 {
991                   next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
992                   goto trace0;
993                 }
994             }
995
996           if (PREDICT_FALSE
997               (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
998             {
999               next0 = NAT64_IN2OUT_NEXT_REASS;
1000               goto trace0;
1001             }
1002
1003           if (proto0 == SNAT_PROTOCOL_ICMP)
1004             {
1005               if (is_hairpinning (&ip60->dst_address))
1006                 {
1007                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1008                   if (nat64_in2out_icmp_hairpinning
1009                       (vm, b0, ip60, thread_index))
1010                     {
1011                       next0 = NAT64_IN2OUT_NEXT_DROP;
1012                       b0->error =
1013                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1014                     }
1015                   goto trace0;
1016                 }
1017
1018               if (icmp6_to_icmp
1019                   (b0, nat64_in2out_icmp_set_cb, &ctx0,
1020                    nat64_in2out_inner_icmp_set_cb, &ctx0))
1021                 {
1022                   next0 = NAT64_IN2OUT_NEXT_DROP;
1023                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1024                   goto trace0;
1025                 }
1026             }
1027           else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
1028             {
1029               if (is_hairpinning (&ip60->dst_address))
1030                 {
1031                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1032                   if (nat64_in2out_tcp_udp_hairpinning
1033                       (vm, b0, ip60, thread_index))
1034                     {
1035                       next0 = NAT64_IN2OUT_NEXT_DROP;
1036                       b0->error =
1037                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1038                     }
1039                   goto trace0;
1040                 }
1041
1042               if (ip6_to_ip4_tcp_udp
1043                   (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0))
1044                 {
1045                   next0 = NAT64_IN2OUT_NEXT_DROP;
1046                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1047                   goto trace0;
1048                 }
1049             }
1050
1051         trace0:
1052           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1053                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1054             {
1055               nat64_in2out_trace_t *t =
1056                 vlib_add_trace (vm, node, b0, sizeof (*t));
1057               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1058               t->next_index = next0;
1059               t->is_slow_path = is_slow_path;
1060             }
1061
1062           pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1063
1064           /* verify speculative enqueue, maybe switch current next frame */
1065           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1066                                            n_left_to_next, bi0, next0);
1067         }
1068       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1069     }
1070   vlib_node_increment_counter (vm, stats_node_index,
1071                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1072                                pkts_processed);
1073   return frame->n_vectors;
1074 }
1075
1076 static uword
1077 nat64_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1078                       vlib_frame_t * frame)
1079 {
1080   return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1081 }
1082
1083 /* *INDENT-OFF* */
1084 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1085   .function = nat64_in2out_node_fn,
1086   .name = "nat64-in2out",
1087   .vector_size = sizeof (u32),
1088   .format_trace = format_nat64_in2out_trace,
1089   .type = VLIB_NODE_TYPE_INTERNAL,
1090   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1091   .error_strings = nat64_in2out_error_strings,
1092   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1093   /* edit / add dispositions here */
1094   .next_nodes = {
1095     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1096     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1097     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1098     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1099     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1100   },
1101 };
1102 /* *INDENT-ON* */
1103
1104 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_node, nat64_in2out_node_fn);
1105
1106 static uword
1107 nat64_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1108                                vlib_frame_t * frame)
1109 {
1110   return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1111 }
1112
1113 /* *INDENT-OFF* */
1114 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1115   .function = nat64_in2out_slowpath_node_fn,
1116   .name = "nat64-in2out-slowpath",
1117   .vector_size = sizeof (u32),
1118   .format_trace = format_nat64_in2out_trace,
1119   .type = VLIB_NODE_TYPE_INTERNAL,
1120   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1121   .error_strings = nat64_in2out_error_strings,
1122   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1123   /* edit / add dispositions here */
1124   .next_nodes = {
1125     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1126     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1127     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1128     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1129     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1130   },
1131 };
1132 /* *INDENT-ON* */
1133
1134 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
1135                               nat64_in2out_slowpath_node_fn);
1136
1137 typedef struct nat64_in2out_frag_set_ctx_t_
1138 {
1139   vlib_main_t *vm;
1140   u32 sess_index;
1141   u32 thread_index;
1142   u16 l4_offset;
1143   u8 proto;
1144   u8 first_frag;
1145 } nat64_in2out_frag_set_ctx_t;
1146
1147 static int
1148 nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
1149 {
1150   nat64_main_t *nm = &nat64_main;
1151   nat64_in2out_frag_set_ctx_t *ctx = arg;
1152   nat64_db_st_entry_t *ste;
1153   nat64_db_bib_entry_t *bibe;
1154   udp_header_t *udp;
1155   nat64_db_t *db = &nm->db[ctx->thread_index];
1156
1157   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1158   if (!ste)
1159     return -1;
1160
1161   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1162   if (!bibe)
1163     return -1;
1164
1165   nat64_session_reset_timeout (ste, ctx->vm);
1166
1167   if (ctx->first_frag)
1168     {
1169       udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1170
1171       if (ctx->proto == IP_PROTOCOL_TCP)
1172         {
1173           u16 *checksum;
1174           ip_csum_t csum;
1175           tcp_header_t *tcp = (tcp_header_t *) udp;
1176
1177           checksum = &tcp->checksum;
1178           csum = ip_csum_sub_even (*checksum, tcp->src_port);
1179           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
1180           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1181           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1182           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1183           csum = ip_csum_add_even (csum, bibe->out_port);
1184           csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
1185           csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
1186           *checksum = ip_csum_fold (csum);
1187         }
1188
1189       udp->src_port = bibe->out_port;
1190     }
1191
1192   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
1193   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
1194
1195   return 0;
1196 }
1197
1198 static int
1199 nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
1200                                nat64_in2out_frag_set_ctx_t * ctx)
1201 {
1202   nat64_main_t *nm = &nat64_main;
1203   nat64_db_st_entry_t *ste;
1204   nat64_db_bib_entry_t *bibe;
1205   udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1206   tcp_header_t *tcp = (tcp_header_t *) udp;
1207   u16 sport = udp->src_port;
1208   u16 dport = udp->dst_port;
1209   u16 *checksum;
1210   ip_csum_t csum;
1211   ip46_address_t daddr;
1212   nat64_db_t *db = &nm->db[ctx->thread_index];
1213
1214   if (ctx->first_frag)
1215     {
1216       if (ctx->proto == IP_PROTOCOL_UDP)
1217         checksum = &udp->checksum;
1218       else
1219         checksum = &tcp->checksum;
1220
1221       csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
1222       csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1223       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1224       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1225       csum = ip_csum_sub_even (csum, sport);
1226       csum = ip_csum_sub_even (csum, dport);
1227     }
1228
1229   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1230   if (!ste)
1231     return -1;
1232
1233   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1234   if (!bibe)
1235     return -1;
1236
1237   nat64_session_reset_timeout (ste, ctx->vm);
1238
1239   sport = bibe->out_port;
1240   dport = ste->r_port;
1241
1242   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
1243
1244   memset (&daddr, 0, sizeof (daddr));
1245   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1246
1247   bibe = 0;
1248   /* *INDENT-OFF* */
1249   vec_foreach (db, nm->db)
1250     {
1251       bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0);
1252
1253       if (bibe)
1254         break;
1255     }
1256   /* *INDENT-ON* */
1257
1258   if (!bibe)
1259     return -1;
1260
1261   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1262   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1263
1264   if (ctx->first_frag)
1265     {
1266       udp->dst_port = bibe->in_port;
1267       udp->src_port = sport;
1268       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
1269       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
1270       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
1271       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
1272       csum = ip_csum_add_even (csum, udp->src_port);
1273       csum = ip_csum_add_even (csum, udp->dst_port);
1274       *checksum = ip_csum_fold (csum);
1275     }
1276
1277   return 0;
1278 }
1279
1280 static uword
1281 nat64_in2out_reass_node_fn (vlib_main_t * vm,
1282                             vlib_node_runtime_t * node, vlib_frame_t * frame)
1283 {
1284   u32 n_left_from, *from, *to_next;
1285   nat64_in2out_next_t next_index;
1286   u32 pkts_processed = 0;
1287   u32 *fragments_to_drop = 0;
1288   u32 *fragments_to_loopback = 0;
1289   nat64_main_t *nm = &nat64_main;
1290   u32 thread_index = vlib_get_thread_index ();
1291
1292   from = vlib_frame_vector_args (frame);
1293   n_left_from = frame->n_vectors;
1294   next_index = node->cached_next_index;
1295
1296   while (n_left_from > 0)
1297     {
1298       u32 n_left_to_next;
1299
1300       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1301
1302       while (n_left_from > 0 && n_left_to_next > 0)
1303         {
1304           u32 bi0;
1305           vlib_buffer_t *b0;
1306           u32 next0;
1307           u8 cached0 = 0;
1308           ip6_header_t *ip60;
1309           u16 l4_offset0, frag_offset0;
1310           u8 l4_protocol0;
1311           nat_reass_ip6_t *reass0;
1312           ip6_frag_hdr_t *frag0;
1313           nat64_db_bib_entry_t *bibe0;
1314           nat64_db_st_entry_t *ste0;
1315           udp_header_t *udp0;
1316           snat_protocol_t proto0;
1317           u32 sw_if_index0, fib_index0;
1318           ip46_address_t saddr0, daddr0;
1319           nat64_in2out_frag_set_ctx_t ctx0;
1320           nat64_db_t *db = &nm->db[thread_index];
1321
1322           /* speculatively enqueue b0 to the current next frame */
1323           bi0 = from[0];
1324           to_next[0] = bi0;
1325           from += 1;
1326           to_next += 1;
1327           n_left_from -= 1;
1328           n_left_to_next -= 1;
1329
1330           b0 = vlib_get_buffer (vm, bi0);
1331           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1332
1333           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1334           fib_index0 =
1335             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
1336                                                  sw_if_index0);
1337
1338           ctx0.thread_index = thread_index;
1339
1340           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
1341             {
1342               next0 = NAT64_IN2OUT_NEXT_DROP;
1343               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1344               goto trace0;
1345             }
1346
1347           ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
1348
1349           if (PREDICT_FALSE
1350               (ip6_parse
1351                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1352                 &frag_offset0)))
1353             {
1354               next0 = NAT64_IN2OUT_NEXT_DROP;
1355               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1356               goto trace0;
1357             }
1358
1359           if (PREDICT_FALSE
1360               (!(l4_protocol0 == IP_PROTOCOL_TCP
1361                  || l4_protocol0 == IP_PROTOCOL_UDP)))
1362             {
1363               next0 = NAT64_IN2OUT_NEXT_DROP;
1364               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1365               goto trace0;
1366             }
1367
1368           udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
1369           frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
1370           proto0 = ip_proto_to_snat_proto (l4_protocol0);
1371
1372           reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
1373                                                  ip60->dst_address,
1374                                                  frag0->identification,
1375                                                  l4_protocol0,
1376                                                  1, &fragments_to_drop);
1377
1378           if (PREDICT_FALSE (!reass0))
1379             {
1380               next0 = NAT64_IN2OUT_NEXT_DROP;
1381               b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
1382               goto trace0;
1383             }
1384
1385           if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
1386             {
1387               ctx0.first_frag = 0;
1388               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
1389                 {
1390                   if (nat_ip6_reass_add_fragment (reass0, bi0))
1391                     {
1392                       b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
1393                       next0 = NAT64_IN2OUT_NEXT_DROP;
1394                       goto trace0;
1395                     }
1396                   cached0 = 1;
1397                   goto trace0;
1398                 }
1399             }
1400           else
1401             {
1402               ctx0.first_frag = 1;
1403
1404               saddr0.as_u64[0] = ip60->src_address.as_u64[0];
1405               saddr0.as_u64[1] = ip60->src_address.as_u64[1];
1406               daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
1407               daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
1408
1409               ste0 =
1410                 nat64_db_st_entry_find (db, &saddr0, &daddr0,
1411                                         udp0->src_port, udp0->dst_port,
1412                                         l4_protocol0, fib_index0, 1);
1413               if (!ste0)
1414                 {
1415                   bibe0 =
1416                     nat64_db_bib_entry_find (db, &saddr0, udp0->src_port,
1417                                              l4_protocol0, fib_index0, 1);
1418                   if (!bibe0)
1419                     {
1420                       u16 out_port0;
1421                       ip4_address_t out_addr0;
1422                       if (nat64_alloc_out_addr_and_port
1423                           (fib_index0, proto0, &out_addr0, &out_port0,
1424                            thread_index))
1425                         {
1426                           next0 = NAT64_IN2OUT_NEXT_DROP;
1427                           b0->error =
1428                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1429                           goto trace0;
1430                         }
1431
1432                       bibe0 =
1433                         nat64_db_bib_entry_create (db,
1434                                                    &ip60->src_address,
1435                                                    &out_addr0, udp0->src_port,
1436                                                    out_port0, fib_index0,
1437                                                    l4_protocol0, 0);
1438                       if (!bibe0)
1439                         {
1440                           next0 = NAT64_IN2OUT_NEXT_DROP;
1441                           b0->error =
1442                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1443                           goto trace0;
1444                         }
1445                     }
1446                   nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
1447                                      fib_index0);
1448                   ste0 =
1449                     nat64_db_st_entry_create (db, bibe0,
1450                                               &ip60->dst_address, &daddr0.ip4,
1451                                               udp0->dst_port);
1452                   if (!ste0)
1453                     {
1454                       next0 = NAT64_IN2OUT_NEXT_DROP;
1455                       b0->error =
1456                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1457                       goto trace0;
1458                     }
1459                 }
1460               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
1461
1462               nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
1463             }
1464
1465           ctx0.sess_index = reass0->sess_index;
1466           ctx0.proto = l4_protocol0;
1467           ctx0.vm = vm;
1468           ctx0.l4_offset = l4_offset0;
1469
1470           if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
1471             {
1472               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1473               if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
1474                 {
1475                   next0 = NAT64_IN2OUT_NEXT_DROP;
1476                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1477                 }
1478               goto trace0;
1479             }
1480           else
1481             {
1482               if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
1483                 {
1484                   next0 = NAT64_IN2OUT_NEXT_DROP;
1485                   b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1486                   goto trace0;
1487                 }
1488             }
1489
1490         trace0:
1491           if (PREDICT_FALSE
1492               ((node->flags & VLIB_NODE_FLAG_TRACE)
1493                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1494             {
1495               nat64_in2out_reass_trace_t *t =
1496                 vlib_add_trace (vm, node, b0, sizeof (*t));
1497               t->cached = cached0;
1498               t->sw_if_index = sw_if_index0;
1499               t->next_index = next0;
1500             }
1501
1502           if (cached0)
1503             {
1504               n_left_to_next++;
1505               to_next--;
1506             }
1507           else
1508             {
1509               pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1510
1511               /* verify speculative enqueue, maybe switch current next frame */
1512               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1513                                                to_next, n_left_to_next,
1514                                                bi0, next0);
1515             }
1516
1517           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1518             {
1519               from = vlib_frame_vector_args (frame);
1520               u32 len = vec_len (fragments_to_loopback);
1521               if (len <= VLIB_FRAME_SIZE)
1522                 {
1523                   clib_memcpy (from, fragments_to_loopback,
1524                                sizeof (u32) * len);
1525                   n_left_from = len;
1526                   vec_reset_length (fragments_to_loopback);
1527                 }
1528               else
1529                 {
1530                   clib_memcpy (from,
1531                                fragments_to_loopback + (len -
1532                                                         VLIB_FRAME_SIZE),
1533                                sizeof (u32) * VLIB_FRAME_SIZE);
1534                   n_left_from = VLIB_FRAME_SIZE;
1535                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1536                 }
1537             }
1538         }
1539
1540       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1541     }
1542
1543   vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
1544                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1545                                pkts_processed);
1546
1547   nat_send_all_to_node (vm, fragments_to_drop, node,
1548                         &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
1549                         NAT64_IN2OUT_NEXT_DROP);
1550
1551   vec_free (fragments_to_drop);
1552   vec_free (fragments_to_loopback);
1553   return frame->n_vectors;
1554 }
1555
1556 /* *INDENT-OFF* */
1557 VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
1558   .function = nat64_in2out_reass_node_fn,
1559   .name = "nat64-in2out-reass",
1560   .vector_size = sizeof (u32),
1561   .format_trace = format_nat64_in2out_reass_trace,
1562   .type = VLIB_NODE_TYPE_INTERNAL,
1563   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1564   .error_strings = nat64_in2out_error_strings,
1565   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1566   /* edit / add dispositions here */
1567   .next_nodes = {
1568     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1569     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1570     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1571     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1572     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1573   },
1574 };
1575 /* *INDENT-ON* */
1576
1577 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node,
1578                               nat64_in2out_reass_node_fn);
1579
1580 typedef struct
1581 {
1582   u32 next_worker_index;
1583   u8 do_handoff;
1584 } nat64_in2out_handoff_trace_t;
1585
1586 static u8 *
1587 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1588 {
1589   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1590   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1591   nat64_in2out_handoff_trace_t *t =
1592     va_arg (*args, nat64_in2out_handoff_trace_t *);
1593   char *m;
1594
1595   m = t->do_handoff ? "next worker" : "same worker";
1596   s = format (s, "NAT64-IN2OUT-HANDOFF: %s %d", m, t->next_worker_index);
1597
1598   return s;
1599 }
1600
1601 static inline uword
1602 nat64_in2out_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1603                               vlib_frame_t * frame)
1604 {
1605   nat64_main_t *nm = &nat64_main;
1606   vlib_thread_main_t *tm = vlib_get_thread_main ();
1607   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
1608   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1609   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1610     = 0;
1611   vlib_frame_queue_elt_t *hf = 0;
1612   vlib_frame_queue_t *fq;
1613   vlib_frame_t *f = 0, *d = 0;
1614   int i;
1615   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1616   u32 next_worker_index = 0;
1617   u32 current_worker_index = ~0;
1618   u32 thread_index = vlib_get_thread_index ();
1619   u32 fq_index;
1620   u32 to_node_index;
1621
1622   fq_index = nm->fq_in2out_index;
1623   to_node_index = nat64_in2out_node.index;
1624
1625   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1626     {
1627       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1628
1629       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1630                                tm->n_vlib_mains - 1,
1631                                (vlib_frame_queue_t *) (~0));
1632     }
1633
1634   from = vlib_frame_vector_args (frame);
1635   n_left_from = frame->n_vectors;
1636
1637   while (n_left_from > 0)
1638     {
1639       u32 bi0;
1640       vlib_buffer_t *b0;
1641       ip6_header_t *ip0;
1642       u8 do_handoff;
1643
1644       bi0 = from[0];
1645       from += 1;
1646       n_left_from -= 1;
1647
1648       b0 = vlib_get_buffer (vm, bi0);
1649
1650       ip0 = vlib_buffer_get_current (b0);
1651
1652       next_worker_index = nat64_get_worker_in2out (&ip0->src_address);
1653
1654       if (PREDICT_FALSE (next_worker_index != thread_index))
1655         {
1656           do_handoff = 1;
1657
1658           if (next_worker_index != current_worker_index)
1659             {
1660               fq =
1661                 is_vlib_frame_queue_congested (fq_index, next_worker_index,
1662                                                30,
1663                                                congested_handoff_queue_by_worker_index);
1664
1665               if (fq)
1666                 {
1667                   /* if this is 1st frame */
1668                   if (!d)
1669                     {
1670                       d = vlib_get_frame_to_node (vm, nm->error_node_index);
1671                       to_next_drop = vlib_frame_vector_args (d);
1672                     }
1673
1674                   to_next_drop[0] = bi0;
1675                   to_next_drop += 1;
1676                   d->n_vectors++;
1677                   goto trace0;
1678                 }
1679
1680               if (hf)
1681                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1682
1683               hf =
1684                 vlib_get_worker_handoff_queue_elt (fq_index,
1685                                                    next_worker_index,
1686                                                    handoff_queue_elt_by_worker_index);
1687               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1688               to_next_worker = &hf->buffer_index[hf->n_vectors];
1689               current_worker_index = next_worker_index;
1690             }
1691
1692           ASSERT (to_next_worker != 0);
1693
1694           /* enqueue to correct worker thread */
1695           to_next_worker[0] = bi0;
1696           to_next_worker++;
1697           n_left_to_next_worker--;
1698
1699           if (n_left_to_next_worker == 0)
1700             {
1701               hf->n_vectors = VLIB_FRAME_SIZE;
1702               vlib_put_frame_queue_elt (hf);
1703               current_worker_index = ~0;
1704               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1705               hf = 0;
1706             }
1707         }
1708       else
1709         {
1710           do_handoff = 0;
1711           /* if this is 1st frame */
1712           if (!f)
1713             {
1714               f = vlib_get_frame_to_node (vm, to_node_index);
1715               to_next = vlib_frame_vector_args (f);
1716             }
1717
1718           to_next[0] = bi0;
1719           to_next += 1;
1720           f->n_vectors++;
1721         }
1722
1723     trace0:
1724       if (PREDICT_FALSE
1725           ((node->flags & VLIB_NODE_FLAG_TRACE)
1726            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1727         {
1728           nat64_in2out_handoff_trace_t *t =
1729             vlib_add_trace (vm, node, b0, sizeof (*t));
1730           t->next_worker_index = next_worker_index;
1731           t->do_handoff = do_handoff;
1732         }
1733     }
1734
1735   if (f)
1736     vlib_put_frame_to_node (vm, to_node_index, f);
1737
1738   if (d)
1739     vlib_put_frame_to_node (vm, nm->error_node_index, d);
1740
1741   if (hf)
1742     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1743
1744   /* Ship frames to the worker nodes */
1745   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1746     {
1747       if (handoff_queue_elt_by_worker_index[i])
1748         {
1749           hf = handoff_queue_elt_by_worker_index[i];
1750           /*
1751            * It works better to let the handoff node
1752            * rate-adapt, always ship the handoff queue element.
1753            */
1754           if (1 || hf->n_vectors == hf->last_n_vectors)
1755             {
1756               vlib_put_frame_queue_elt (hf);
1757               handoff_queue_elt_by_worker_index[i] = 0;
1758             }
1759           else
1760             hf->last_n_vectors = hf->n_vectors;
1761         }
1762       congested_handoff_queue_by_worker_index[i] =
1763         (vlib_frame_queue_t *) (~0);
1764     }
1765   hf = 0;
1766   current_worker_index = ~0;
1767   return frame->n_vectors;
1768 }
1769
1770 /* *INDENT-OFF* */
1771 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1772   .function = nat64_in2out_handoff_node_fn,
1773   .name = "nat64-in2out-handoff",
1774   .vector_size = sizeof (u32),
1775   .format_trace = format_nat64_in2out_handoff_trace,
1776   .type = VLIB_NODE_TYPE_INTERNAL,
1777
1778   .n_next_nodes = 1,
1779
1780   .next_nodes = {
1781     [0] = "error-drop",
1782   },
1783 };
1784 /* *INDENT-ON* */
1785
1786 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_handoff_node,
1787                               nat64_in2out_handoff_node_fn);
1788
1789 /*
1790  * fd.io coding-style-patch-verification: ON
1791  *
1792  * Local Variables:
1793  * eval: (c-set-style "gnu")
1794  * End:
1795  */