NAT64: free port when dynamic BIB deleted (VPP-1107)
[vpp.git] / src / plugins / nat / nat64_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <vnet/ip/ip6_to_ip4.h>
23 #include <vnet/fib/fib_table.h>
24
25 typedef struct
26 {
27   u32 sw_if_index;
28   u32 next_index;
29   u8 is_slow_path;
30 } nat64_in2out_trace_t;
31
32 static u8 *
33 format_nat64_in2out_trace (u8 * s, va_list * args)
34 {
35   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
36   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
37   nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
38   char *tag;
39
40   tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
41
42   s =
43     format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
44             t->next_index);
45
46   return s;
47 }
48
49 typedef struct
50 {
51   u32 sw_if_index;
52   u32 next_index;
53   u8 cached;
54 } nat64_in2out_reass_trace_t;
55
56 static u8 *
57 format_nat64_in2out_reass_trace (u8 * s, va_list * args)
58 {
59   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
60   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
61   nat64_in2out_reass_trace_t *t =
62     va_arg (*args, nat64_in2out_reass_trace_t *);
63
64   s =
65     format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
66             t->sw_if_index, t->next_index,
67             t->cached ? "cached" : "translated");
68
69   return s;
70 }
71
72 vlib_node_registration_t nat64_in2out_node;
73 vlib_node_registration_t nat64_in2out_slowpath_node;
74 vlib_node_registration_t nat64_in2out_reass_node;
75 vlib_node_registration_t nat64_in2out_handoff_node;
76
77 #define foreach_nat64_in2out_error                       \
78 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
79 _(IN2OUT_PACKETS, "good in2out packets processed")       \
80 _(NO_TRANSLATION, "no translation")                      \
81 _(UNKNOWN, "unknown")                                    \
82 _(DROP_FRAGMENT, "Drop fragment")                        \
83 _(MAX_REASS, "Maximum reassemblies exceeded")            \
84 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
85
86
87 typedef enum
88 {
89 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
90   foreach_nat64_in2out_error
91 #undef _
92     NAT64_IN2OUT_N_ERROR,
93 } nat64_in2out_error_t;
94
95 static char *nat64_in2out_error_strings[] = {
96 #define _(sym,string) string,
97   foreach_nat64_in2out_error
98 #undef _
99 };
100
101 typedef enum
102 {
103   NAT64_IN2OUT_NEXT_IP4_LOOKUP,
104   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
105   NAT64_IN2OUT_NEXT_DROP,
106   NAT64_IN2OUT_NEXT_SLOWPATH,
107   NAT64_IN2OUT_NEXT_REASS,
108   NAT64_IN2OUT_N_NEXT,
109 } nat64_in2out_next_t;
110
111 typedef struct nat64_in2out_set_ctx_t_
112 {
113   vlib_buffer_t *b;
114   vlib_main_t *vm;
115   u32 thread_index;
116 } nat64_in2out_set_ctx_t;
117
118 /**
119  * @brief Check whether is a hairpinning.
120  *
121  * If the destination IP address of the packet is an IPv4 address assigned to
122  * the NAT64 itself, then the packet is a hairpin packet.
123  *
124  * param dst_addr Destination address of the packet.
125  *
126  * @returns 1 if hairpinning, otherwise 0.
127  */
128 static_always_inline int
129 is_hairpinning (ip6_address_t * dst_addr)
130 {
131   nat64_main_t *nm = &nat64_main;
132   int i;
133
134   for (i = 0; i < vec_len (nm->addr_pool); i++)
135     {
136       if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
137         return 1;
138     }
139
140   return 0;
141 }
142
143 static int
144 nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
145                              void *arg)
146 {
147   nat64_main_t *nm = &nat64_main;
148   nat64_in2out_set_ctx_t *ctx = arg;
149   nat64_db_bib_entry_t *bibe;
150   nat64_db_st_entry_t *ste;
151   ip46_address_t saddr, daddr;
152   u32 sw_if_index, fib_index;
153   udp_header_t *udp = ip6_next_header (ip6);
154   u8 proto = ip6->protocol;
155   u16 sport = udp->src_port;
156   u16 dport = udp->dst_port;
157   nat64_db_t *db = &nm->db[ctx->thread_index];
158
159   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
160   fib_index =
161     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
162
163   saddr.as_u64[0] = ip6->src_address.as_u64[0];
164   saddr.as_u64[1] = ip6->src_address.as_u64[1];
165   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
166   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
167
168   ste =
169     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
170                             fib_index, 1);
171
172   if (ste)
173     {
174       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
175       if (!bibe)
176         return -1;
177     }
178   else
179     {
180       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
181
182       if (!bibe)
183         {
184           u16 out_port;
185           ip4_address_t out_addr;
186           if (nat64_alloc_out_addr_and_port
187               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
188                &out_port, ctx->thread_index))
189             return -1;
190
191           bibe =
192             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
193                                        sport, out_port, fib_index, proto, 0);
194           if (!bibe)
195             return -1;
196         }
197
198       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
199       ste =
200         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
201                                   &daddr.ip4, dport);
202       if (!ste)
203         return -1;
204     }
205
206   nat64_session_reset_timeout (ste, ctx->vm);
207
208   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
209   udp->src_port = bibe->out_port;
210
211   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
212
213   if (proto == IP_PROTOCOL_TCP)
214     {
215       u16 *checksum;
216       ip_csum_t csum;
217       tcp_header_t *tcp = ip6_next_header (ip6);
218
219       checksum = &tcp->checksum;
220       csum = ip_csum_sub_even (*checksum, sport);
221       csum = ip_csum_add_even (csum, udp->src_port);
222       *checksum = ip_csum_fold (csum);
223     }
224
225   return 0;
226 }
227
228 static int
229 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
230 {
231   nat64_main_t *nm = &nat64_main;
232   nat64_in2out_set_ctx_t *ctx = arg;
233   nat64_db_bib_entry_t *bibe;
234   nat64_db_st_entry_t *ste;
235   ip46_address_t saddr, daddr;
236   u32 sw_if_index, fib_index;
237   icmp46_header_t *icmp = ip6_next_header (ip6);
238   nat64_db_t *db = &nm->db[ctx->thread_index];
239
240   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
241   fib_index =
242     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
243
244   saddr.as_u64[0] = ip6->src_address.as_u64[0];
245   saddr.as_u64[1] = ip6->src_address.as_u64[1];
246   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
247   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
248
249   if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
250     {
251       u16 in_id = ((u16 *) (icmp))[2];
252       ste =
253         nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
254                                 IP_PROTOCOL_ICMP, fib_index, 1);
255
256       if (ste)
257         {
258           bibe =
259             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
260                                          ste->bibe_index);
261           if (!bibe)
262             return -1;
263         }
264       else
265         {
266           bibe =
267             nat64_db_bib_entry_find (db, &saddr, in_id,
268                                      IP_PROTOCOL_ICMP, fib_index, 1);
269
270           if (!bibe)
271             {
272               u16 out_id;
273               ip4_address_t out_addr;
274               if (nat64_alloc_out_addr_and_port
275                   (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id,
276                    ctx->thread_index))
277                 return -1;
278
279               bibe =
280                 nat64_db_bib_entry_create (db, &ip6->src_address,
281                                            &out_addr, in_id, out_id,
282                                            fib_index, IP_PROTOCOL_ICMP, 0);
283               if (!bibe)
284                 return -1;
285             }
286
287           nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
288           ste =
289             nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
290                                       &daddr.ip4, 0);
291           if (!ste)
292             return -1;
293         }
294
295       nat64_session_reset_timeout (ste, ctx->vm);
296
297       ip4->src_address.as_u32 = bibe->out_addr.as_u32;
298       ((u16 *) (icmp))[2] = bibe->out_port;
299
300       ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
301     }
302   else
303     {
304       if (!vec_len (nm->addr_pool))
305         return -1;
306
307       ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
308       nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
309     }
310
311   return 0;
312 }
313
314 static int
315 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
316                                 void *arg)
317 {
318   nat64_main_t *nm = &nat64_main;
319   nat64_in2out_set_ctx_t *ctx = arg;
320   nat64_db_st_entry_t *ste;
321   nat64_db_bib_entry_t *bibe;
322   ip46_address_t saddr, daddr;
323   u32 sw_if_index, fib_index;
324   u8 proto = ip6->protocol;
325   nat64_db_t *db = &nm->db[ctx->thread_index];
326
327   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
328   fib_index =
329     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
330
331   saddr.as_u64[0] = ip6->src_address.as_u64[0];
332   saddr.as_u64[1] = ip6->src_address.as_u64[1];
333   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
334   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
335
336   if (proto == IP_PROTOCOL_ICMP6)
337     {
338       icmp46_header_t *icmp = ip6_next_header (ip6);
339       u16 in_id = ((u16 *) (icmp))[2];
340       proto = IP_PROTOCOL_ICMP;
341
342       if (!
343           (icmp->type == ICMP4_echo_request
344            || icmp->type == ICMP4_echo_reply))
345         return -1;
346
347       ste =
348         nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
349                                 fib_index, 1);
350       if (!ste)
351         return -1;
352
353       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
354       if (!bibe)
355         return -1;
356
357       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
358       ((u16 *) (icmp))[2] = bibe->out_port;
359       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
360     }
361   else
362     {
363       udp_header_t *udp = ip6_next_header (ip6);
364       tcp_header_t *tcp = ip6_next_header (ip6);
365       u16 *checksum;
366       ip_csum_t csum;
367
368       u16 sport = udp->src_port;
369       u16 dport = udp->dst_port;
370
371       ste =
372         nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
373                                 fib_index, 1);
374       if (!ste)
375         return -1;
376
377       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
378       if (!bibe)
379         return -1;
380
381       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
382       udp->dst_port = bibe->out_port;
383       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
384
385       if (proto == IP_PROTOCOL_TCP)
386         checksum = &tcp->checksum;
387       else
388         checksum = &udp->checksum;
389       csum = ip_csum_sub_even (*checksum, dport);
390       csum = ip_csum_add_even (csum, udp->dst_port);
391       *checksum = ip_csum_fold (csum);
392     }
393
394   return 0;
395 }
396
397 typedef struct unk_proto_st_walk_ctx_t_
398 {
399   ip6_address_t src_addr;
400   ip6_address_t dst_addr;
401   ip4_address_t out_addr;
402   u32 fib_index;
403   u32 thread_index;
404   u8 proto;
405 } unk_proto_st_walk_ctx_t;
406
407 static int
408 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
409 {
410   nat64_main_t *nm = &nat64_main;
411   unk_proto_st_walk_ctx_t *ctx = arg;
412   nat64_db_bib_entry_t *bibe;
413   ip46_address_t saddr, daddr;
414   nat64_db_t *db = &nm->db[ctx->thread_index];
415
416   if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
417     {
418       bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
419       if (!bibe)
420         return -1;
421
422       if (ip46_address_is_equal (&bibe->in_addr, &ctx->src_addr)
423           && bibe->fib_index == ctx->fib_index)
424         {
425           memset (&saddr, 0, sizeof (saddr));
426           saddr.ip4.as_u32 = bibe->out_addr.as_u32;
427           memset (&daddr, 0, sizeof (daddr));
428           nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
429
430           if (nat64_db_st_entry_find
431               (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
432             return -1;
433
434           ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
435           return 1;
436         }
437     }
438
439   return 0;
440 }
441
442 static int
443 nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
444                                void *arg)
445 {
446   nat64_main_t *nm = &nat64_main;
447   nat64_in2out_set_ctx_t *s_ctx = arg;
448   nat64_db_bib_entry_t *bibe;
449   nat64_db_st_entry_t *ste;
450   ip46_address_t saddr, daddr, addr;
451   u32 sw_if_index, fib_index;
452   u8 proto = ip6->protocol;
453   int i;
454   nat64_db_t *db = &nm->db[s_ctx->thread_index];
455
456   sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
457   fib_index =
458     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
459
460   saddr.as_u64[0] = ip6->src_address.as_u64[0];
461   saddr.as_u64[1] = ip6->src_address.as_u64[1];
462   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
463   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
464
465   ste =
466     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
467
468   if (ste)
469     {
470       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
471       if (!bibe)
472         return -1;
473     }
474   else
475     {
476       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
477
478       if (!bibe)
479         {
480           /* Choose same out address as for TCP/UDP session to same dst */
481           unk_proto_st_walk_ctx_t ctx = {
482             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
483             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
484             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
485             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
486             .out_addr.as_u32 = 0,
487             .fib_index = fib_index,
488             .proto = proto,
489             .thread_index = s_ctx->thread_index,
490           };
491
492           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
493
494           if (!ctx.out_addr.as_u32)
495             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
496
497           /* Verify if out address is not already in use for protocol */
498           memset (&addr, 0, sizeof (addr));
499           addr.ip4.as_u32 = ctx.out_addr.as_u32;
500           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
501             ctx.out_addr.as_u32 = 0;
502
503           if (!ctx.out_addr.as_u32)
504             {
505               for (i = 0; i < vec_len (nm->addr_pool); i++)
506                 {
507                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
508                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
509                     break;
510                 }
511             }
512
513           if (!ctx.out_addr.as_u32)
514             return -1;
515
516           bibe =
517             nat64_db_bib_entry_create (db, &ip6->src_address,
518                                        &ctx.out_addr, 0, 0, fib_index, proto,
519                                        0);
520           if (!bibe)
521             return -1;
522         }
523
524       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
525       ste =
526         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
527       if (!ste)
528         return -1;
529     }
530
531   nat64_session_reset_timeout (ste, s_ctx->vm);
532
533   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
534   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
535
536   return 0;
537 }
538
539
540
541 static int
542 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
543                                   ip6_header_t * ip6, u32 thread_index)
544 {
545   nat64_main_t *nm = &nat64_main;
546   nat64_db_bib_entry_t *bibe;
547   nat64_db_st_entry_t *ste;
548   ip46_address_t saddr, daddr;
549   u32 sw_if_index, fib_index;
550   udp_header_t *udp = ip6_next_header (ip6);
551   tcp_header_t *tcp = ip6_next_header (ip6);
552   u8 proto = ip6->protocol;
553   u16 sport = udp->src_port;
554   u16 dport = udp->dst_port;
555   u16 *checksum;
556   ip_csum_t csum;
557   nat64_db_t *db = &nm->db[thread_index];
558
559   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
560   fib_index =
561     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
562
563   saddr.as_u64[0] = ip6->src_address.as_u64[0];
564   saddr.as_u64[1] = ip6->src_address.as_u64[1];
565   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
566   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
567
568   if (proto == IP_PROTOCOL_UDP)
569     checksum = &udp->checksum;
570   else
571     checksum = &tcp->checksum;
572
573   csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
574   csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
575   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
576   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
577   csum = ip_csum_sub_even (csum, sport);
578   csum = ip_csum_sub_even (csum, dport);
579
580   ste =
581     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
582                             fib_index, 1);
583
584   if (ste)
585     {
586       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
587       if (!bibe)
588         return -1;
589     }
590   else
591     {
592       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
593
594       if (!bibe)
595         {
596           u16 out_port;
597           ip4_address_t out_addr;
598           if (nat64_alloc_out_addr_and_port
599               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
600                &out_port, thread_index))
601             return -1;
602
603           bibe =
604             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
605                                        sport, out_port, fib_index, proto, 0);
606           if (!bibe)
607             return -1;
608         }
609
610       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
611       ste =
612         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
613                                   &daddr.ip4, dport);
614       if (!ste)
615         return -1;
616     }
617
618   nat64_session_reset_timeout (ste, vm);
619
620   sport = udp->src_port = bibe->out_port;
621   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
622
623   memset (&daddr, 0, sizeof (daddr));
624   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
625
626   bibe = 0;
627   /* *INDENT-OFF* */
628   vec_foreach (db, nm->db)
629     {
630       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
631
632       if (bibe)
633         break;
634     }
635   /* *INDENT-ON* */
636
637   if (!bibe)
638     return -1;
639
640   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
641   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
642   udp->dst_port = bibe->in_port;
643
644   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
645   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
646   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
647   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
648   csum = ip_csum_add_even (csum, udp->src_port);
649   csum = ip_csum_add_even (csum, udp->dst_port);
650   *checksum = ip_csum_fold (csum);
651
652   return 0;
653 }
654
655 static int
656 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
657                                ip6_header_t * ip6, u32 thread_index)
658 {
659   nat64_main_t *nm = &nat64_main;
660   nat64_db_bib_entry_t *bibe;
661   nat64_db_st_entry_t *ste;
662   icmp46_header_t *icmp = ip6_next_header (ip6);
663   ip6_header_t *inner_ip6;
664   ip46_address_t saddr, daddr;
665   u32 sw_if_index, fib_index;
666   u8 proto;
667   udp_header_t *udp;
668   tcp_header_t *tcp;
669   u16 *checksum, sport, dport;
670   ip_csum_t csum;
671   nat64_db_t *db = &nm->db[thread_index];
672
673   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
674     return -1;
675
676   inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
677
678   proto = inner_ip6->protocol;
679
680   if (proto == IP_PROTOCOL_ICMP6)
681     return -1;
682
683   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
684   fib_index =
685     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
686
687   saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
688   saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
689   daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
690   daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
691
692   udp = ip6_next_header (inner_ip6);
693   tcp = ip6_next_header (inner_ip6);
694
695   sport = udp->src_port;
696   dport = udp->dst_port;
697
698   if (proto == IP_PROTOCOL_UDP)
699     checksum = &udp->checksum;
700   else
701     checksum = &tcp->checksum;
702
703   csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
704   csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
705   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
706   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
707   csum = ip_csum_sub_even (csum, sport);
708   csum = ip_csum_sub_even (csum, dport);
709
710   ste =
711     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
712                             fib_index, 1);
713   if (!ste)
714     return -1;
715
716   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
717   if (!bibe)
718     return -1;
719
720   dport = udp->dst_port = bibe->out_port;
721   nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
722
723   memset (&saddr, 0, sizeof (saddr));
724   memset (&daddr, 0, sizeof (daddr));
725   saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
726   daddr.ip4.as_u32 = bibe->out_addr.as_u32;
727
728   ste = 0;
729   /* *INDENT-OFF* */
730   vec_foreach (db, nm->db)
731     {
732       ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
733                                     0, 0);
734
735       if (ste)
736         break;
737     }
738   /* *INDENT-ON* */
739
740   if (!ste)
741     return -1;
742
743   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
744   if (!bibe)
745     return -1;
746
747   inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
748   inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
749   udp->src_port = bibe->in_port;
750
751   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
752   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
753   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
754   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
755   csum = ip_csum_add_even (csum, udp->src_port);
756   csum = ip_csum_add_even (csum, udp->dst_port);
757   *checksum = ip_csum_fold (csum);
758
759   if (!vec_len (nm->addr_pool))
760     return -1;
761
762   nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
763   ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
764   ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
765
766   icmp->checksum = 0;
767   csum = ip_csum_with_carry (0, ip6->payload_length);
768   csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
769   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
770   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
771   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
772   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
773   csum =
774     ip_incremental_checksum (csum, icmp,
775                              clib_net_to_host_u16 (ip6->payload_length));
776   icmp->checksum = ~ip_csum_fold (csum);
777
778   return 0;
779 }
780
781 static int
782 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
783                                     ip6_header_t * ip6, u32 thread_index)
784 {
785   nat64_main_t *nm = &nat64_main;
786   nat64_db_bib_entry_t *bibe;
787   nat64_db_st_entry_t *ste;
788   ip46_address_t saddr, daddr, addr;
789   u32 sw_if_index, fib_index;
790   u8 proto = ip6->protocol;
791   int i;
792   nat64_db_t *db = &nm->db[thread_index];
793
794   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
795   fib_index =
796     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
797
798   saddr.as_u64[0] = ip6->src_address.as_u64[0];
799   saddr.as_u64[1] = ip6->src_address.as_u64[1];
800   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
801   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
802
803   ste =
804     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
805
806   if (ste)
807     {
808       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
809       if (!bibe)
810         return -1;
811     }
812   else
813     {
814       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
815
816       if (!bibe)
817         {
818           /* Choose same out address as for TCP/UDP session to same dst */
819           unk_proto_st_walk_ctx_t ctx = {
820             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
821             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
822             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
823             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
824             .out_addr.as_u32 = 0,
825             .fib_index = fib_index,
826             .proto = proto,
827             .thread_index = thread_index,
828           };
829
830           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
831
832           if (!ctx.out_addr.as_u32)
833             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
834
835           /* Verify if out address is not already in use for protocol */
836           memset (&addr, 0, sizeof (addr));
837           addr.ip4.as_u32 = ctx.out_addr.as_u32;
838           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
839             ctx.out_addr.as_u32 = 0;
840
841           if (!ctx.out_addr.as_u32)
842             {
843               for (i = 0; i < vec_len (nm->addr_pool); i++)
844                 {
845                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
846                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
847                     break;
848                 }
849             }
850
851           if (!ctx.out_addr.as_u32)
852             return -1;
853
854           bibe =
855             nat64_db_bib_entry_create (db, &ip6->src_address,
856                                        &ctx.out_addr, 0, 0, fib_index, proto,
857                                        0);
858           if (!bibe)
859             return -1;
860         }
861
862       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
863       ste =
864         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
865       if (!ste)
866         return -1;
867     }
868
869   nat64_session_reset_timeout (ste, vm);
870
871   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
872
873   memset (&daddr, 0, sizeof (daddr));
874   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
875
876   bibe = 0;
877   /* *INDENT-OFF* */
878   vec_foreach (db, nm->db)
879     {
880       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
881
882       if (bibe)
883         break;
884     }
885   /* *INDENT-ON* */
886
887   if (!bibe)
888     return -1;
889
890   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
891   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
892
893   return 0;
894 }
895
896 static inline uword
897 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
898                              vlib_frame_t * frame, u8 is_slow_path)
899 {
900   u32 n_left_from, *from, *to_next;
901   nat64_in2out_next_t next_index;
902   u32 pkts_processed = 0;
903   u32 stats_node_index;
904   u32 thread_index = vlib_get_thread_index ();
905
906   stats_node_index =
907     is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index;
908
909   from = vlib_frame_vector_args (frame);
910   n_left_from = frame->n_vectors;
911   next_index = node->cached_next_index;
912
913   while (n_left_from > 0)
914     {
915       u32 n_left_to_next;
916
917       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
918
919       while (n_left_from > 0 && n_left_to_next > 0)
920         {
921           u32 bi0;
922           vlib_buffer_t *b0;
923           u32 next0;
924           ip6_header_t *ip60;
925           u16 l4_offset0, frag_offset0;
926           u8 l4_protocol0;
927           u32 proto0;
928           nat64_in2out_set_ctx_t ctx0;
929
930           /* speculatively enqueue b0 to the current next frame */
931           bi0 = from[0];
932           to_next[0] = bi0;
933           from += 1;
934           to_next += 1;
935           n_left_from -= 1;
936           n_left_to_next -= 1;
937
938           b0 = vlib_get_buffer (vm, bi0);
939           ip60 = vlib_buffer_get_current (b0);
940
941           ctx0.b = b0;
942           ctx0.vm = vm;
943           ctx0.thread_index = thread_index;
944
945           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
946
947           if (PREDICT_FALSE
948               (ip6_parse
949                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
950                 &frag_offset0)))
951             {
952               next0 = NAT64_IN2OUT_NEXT_DROP;
953               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
954               goto trace0;
955             }
956
957           proto0 = ip_proto_to_snat_proto (l4_protocol0);
958
959           if (is_slow_path)
960             {
961               if (PREDICT_TRUE (proto0 == ~0))
962                 {
963                   if (is_hairpinning (&ip60->dst_address))
964                     {
965                       next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
966                       if (nat64_in2out_unk_proto_hairpinning
967                           (vm, b0, ip60, thread_index))
968                         {
969                           next0 = NAT64_IN2OUT_NEXT_DROP;
970                           b0->error =
971                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
972                         }
973                       goto trace0;
974                     }
975
976                   if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0))
977                     {
978                       next0 = NAT64_IN2OUT_NEXT_DROP;
979                       b0->error =
980                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
981                       goto trace0;
982                     }
983                 }
984               goto trace0;
985             }
986           else
987             {
988               if (PREDICT_FALSE (proto0 == ~0))
989                 {
990                   next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
991                   goto trace0;
992                 }
993             }
994
995           if (PREDICT_FALSE
996               (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
997             {
998               next0 = NAT64_IN2OUT_NEXT_REASS;
999               goto trace0;
1000             }
1001
1002           if (proto0 == SNAT_PROTOCOL_ICMP)
1003             {
1004               if (is_hairpinning (&ip60->dst_address))
1005                 {
1006                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1007                   if (nat64_in2out_icmp_hairpinning
1008                       (vm, b0, ip60, thread_index))
1009                     {
1010                       next0 = NAT64_IN2OUT_NEXT_DROP;
1011                       b0->error =
1012                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1013                     }
1014                   goto trace0;
1015                 }
1016
1017               if (icmp6_to_icmp
1018                   (b0, nat64_in2out_icmp_set_cb, &ctx0,
1019                    nat64_in2out_inner_icmp_set_cb, &ctx0))
1020                 {
1021                   next0 = NAT64_IN2OUT_NEXT_DROP;
1022                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1023                   goto trace0;
1024                 }
1025             }
1026           else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
1027             {
1028               if (is_hairpinning (&ip60->dst_address))
1029                 {
1030                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1031                   if (nat64_in2out_tcp_udp_hairpinning
1032                       (vm, b0, ip60, thread_index))
1033                     {
1034                       next0 = NAT64_IN2OUT_NEXT_DROP;
1035                       b0->error =
1036                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1037                     }
1038                   goto trace0;
1039                 }
1040
1041               if (ip6_to_ip4_tcp_udp
1042                   (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0))
1043                 {
1044                   next0 = NAT64_IN2OUT_NEXT_DROP;
1045                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1046                   goto trace0;
1047                 }
1048             }
1049
1050         trace0:
1051           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1052                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1053             {
1054               nat64_in2out_trace_t *t =
1055                 vlib_add_trace (vm, node, b0, sizeof (*t));
1056               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1057               t->next_index = next0;
1058               t->is_slow_path = is_slow_path;
1059             }
1060
1061           pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1062
1063           /* verify speculative enqueue, maybe switch current next frame */
1064           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1065                                            n_left_to_next, bi0, next0);
1066         }
1067       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1068     }
1069   vlib_node_increment_counter (vm, stats_node_index,
1070                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1071                                pkts_processed);
1072   return frame->n_vectors;
1073 }
1074
1075 static uword
1076 nat64_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1077                       vlib_frame_t * frame)
1078 {
1079   return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1080 }
1081
1082 /* *INDENT-OFF* */
1083 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1084   .function = nat64_in2out_node_fn,
1085   .name = "nat64-in2out",
1086   .vector_size = sizeof (u32),
1087   .format_trace = format_nat64_in2out_trace,
1088   .type = VLIB_NODE_TYPE_INTERNAL,
1089   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1090   .error_strings = nat64_in2out_error_strings,
1091   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1092   /* edit / add dispositions here */
1093   .next_nodes = {
1094     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1095     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1096     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1097     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1098     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1099   },
1100 };
1101 /* *INDENT-ON* */
1102
1103 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_node, nat64_in2out_node_fn);
1104
1105 static uword
1106 nat64_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1107                                vlib_frame_t * frame)
1108 {
1109   return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1110 }
1111
1112 /* *INDENT-OFF* */
1113 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1114   .function = nat64_in2out_slowpath_node_fn,
1115   .name = "nat64-in2out-slowpath",
1116   .vector_size = sizeof (u32),
1117   .format_trace = format_nat64_in2out_trace,
1118   .type = VLIB_NODE_TYPE_INTERNAL,
1119   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1120   .error_strings = nat64_in2out_error_strings,
1121   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1122   /* edit / add dispositions here */
1123   .next_nodes = {
1124     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1125     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1126     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1127     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1128     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1129   },
1130 };
1131 /* *INDENT-ON* */
1132
1133 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
1134                               nat64_in2out_slowpath_node_fn);
1135
1136 typedef struct nat64_in2out_frag_set_ctx_t_
1137 {
1138   vlib_main_t *vm;
1139   u32 sess_index;
1140   u32 thread_index;
1141   u16 l4_offset;
1142   u8 proto;
1143   u8 first_frag;
1144 } nat64_in2out_frag_set_ctx_t;
1145
1146 static int
1147 nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
1148 {
1149   nat64_main_t *nm = &nat64_main;
1150   nat64_in2out_frag_set_ctx_t *ctx = arg;
1151   nat64_db_st_entry_t *ste;
1152   nat64_db_bib_entry_t *bibe;
1153   udp_header_t *udp;
1154   nat64_db_t *db = &nm->db[ctx->thread_index];
1155
1156   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1157   if (!ste)
1158     return -1;
1159
1160   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1161   if (!bibe)
1162     return -1;
1163
1164   nat64_session_reset_timeout (ste, ctx->vm);
1165
1166   if (ctx->first_frag)
1167     {
1168       udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1169
1170       if (ctx->proto == IP_PROTOCOL_TCP)
1171         {
1172           u16 *checksum;
1173           ip_csum_t csum;
1174           tcp_header_t *tcp = (tcp_header_t *) udp;
1175
1176           checksum = &tcp->checksum;
1177           csum = ip_csum_sub_even (*checksum, tcp->src_port);
1178           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
1179           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1180           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1181           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1182           csum = ip_csum_add_even (csum, bibe->out_port);
1183           csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
1184           csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
1185           *checksum = ip_csum_fold (csum);
1186         }
1187
1188       udp->src_port = bibe->out_port;
1189     }
1190
1191   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
1192   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
1193
1194   return 0;
1195 }
1196
1197 static int
1198 nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
1199                                nat64_in2out_frag_set_ctx_t * ctx)
1200 {
1201   nat64_main_t *nm = &nat64_main;
1202   nat64_db_st_entry_t *ste;
1203   nat64_db_bib_entry_t *bibe;
1204   udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1205   tcp_header_t *tcp = (tcp_header_t *) udp;
1206   u16 sport = udp->src_port;
1207   u16 dport = udp->dst_port;
1208   u16 *checksum;
1209   ip_csum_t csum;
1210   ip46_address_t daddr;
1211   nat64_db_t *db = &nm->db[ctx->thread_index];
1212
1213   if (ctx->first_frag)
1214     {
1215       if (ctx->proto == IP_PROTOCOL_UDP)
1216         checksum = &udp->checksum;
1217       else
1218         checksum = &tcp->checksum;
1219
1220       csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
1221       csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1222       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1223       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1224       csum = ip_csum_sub_even (csum, sport);
1225       csum = ip_csum_sub_even (csum, dport);
1226     }
1227
1228   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1229   if (!ste)
1230     return -1;
1231
1232   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1233   if (!bibe)
1234     return -1;
1235
1236   nat64_session_reset_timeout (ste, ctx->vm);
1237
1238   sport = bibe->out_port;
1239   dport = ste->r_port;
1240
1241   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
1242
1243   memset (&daddr, 0, sizeof (daddr));
1244   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1245
1246   bibe = 0;
1247   /* *INDENT-OFF* */
1248   vec_foreach (db, nm->db)
1249     {
1250       bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0);
1251
1252       if (bibe)
1253         break;
1254     }
1255   /* *INDENT-ON* */
1256
1257   if (!bibe)
1258     return -1;
1259
1260   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1261   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1262
1263   if (ctx->first_frag)
1264     {
1265       udp->dst_port = bibe->in_port;
1266       udp->src_port = sport;
1267       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
1268       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
1269       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
1270       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
1271       csum = ip_csum_add_even (csum, udp->src_port);
1272       csum = ip_csum_add_even (csum, udp->dst_port);
1273       *checksum = ip_csum_fold (csum);
1274     }
1275
1276   return 0;
1277 }
1278
1279 static uword
1280 nat64_in2out_reass_node_fn (vlib_main_t * vm,
1281                             vlib_node_runtime_t * node, vlib_frame_t * frame)
1282 {
1283   u32 n_left_from, *from, *to_next;
1284   nat64_in2out_next_t next_index;
1285   u32 pkts_processed = 0;
1286   u32 *fragments_to_drop = 0;
1287   u32 *fragments_to_loopback = 0;
1288   nat64_main_t *nm = &nat64_main;
1289   u32 thread_index = vlib_get_thread_index ();
1290
1291   from = vlib_frame_vector_args (frame);
1292   n_left_from = frame->n_vectors;
1293   next_index = node->cached_next_index;
1294
1295   while (n_left_from > 0)
1296     {
1297       u32 n_left_to_next;
1298
1299       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1300
1301       while (n_left_from > 0 && n_left_to_next > 0)
1302         {
1303           u32 bi0;
1304           vlib_buffer_t *b0;
1305           u32 next0;
1306           u8 cached0 = 0;
1307           ip6_header_t *ip60;
1308           u16 l4_offset0, frag_offset0;
1309           u8 l4_protocol0;
1310           nat_reass_ip6_t *reass0;
1311           ip6_frag_hdr_t *frag0;
1312           nat64_db_bib_entry_t *bibe0;
1313           nat64_db_st_entry_t *ste0;
1314           udp_header_t *udp0;
1315           snat_protocol_t proto0;
1316           u32 sw_if_index0, fib_index0;
1317           ip46_address_t saddr0, daddr0;
1318           nat64_in2out_frag_set_ctx_t ctx0;
1319           nat64_db_t *db = &nm->db[thread_index];
1320
1321           /* speculatively enqueue b0 to the current next frame */
1322           bi0 = from[0];
1323           to_next[0] = bi0;
1324           from += 1;
1325           to_next += 1;
1326           n_left_from -= 1;
1327           n_left_to_next -= 1;
1328
1329           b0 = vlib_get_buffer (vm, bi0);
1330           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1331
1332           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1333           fib_index0 =
1334             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
1335                                                  sw_if_index0);
1336
1337           ctx0.thread_index = thread_index;
1338
1339           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
1340             {
1341               next0 = NAT64_IN2OUT_NEXT_DROP;
1342               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1343               goto trace0;
1344             }
1345
1346           ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
1347
1348           if (PREDICT_FALSE
1349               (ip6_parse
1350                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1351                 &frag_offset0)))
1352             {
1353               next0 = NAT64_IN2OUT_NEXT_DROP;
1354               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1355               goto trace0;
1356             }
1357
1358           if (PREDICT_FALSE
1359               (!(l4_protocol0 == IP_PROTOCOL_TCP
1360                  || l4_protocol0 == IP_PROTOCOL_UDP)))
1361             {
1362               next0 = NAT64_IN2OUT_NEXT_DROP;
1363               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1364               goto trace0;
1365             }
1366
1367           udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
1368           frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
1369           proto0 = ip_proto_to_snat_proto (l4_protocol0);
1370
1371           reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
1372                                                  ip60->dst_address,
1373                                                  frag0->identification,
1374                                                  l4_protocol0,
1375                                                  1, &fragments_to_drop);
1376
1377           if (PREDICT_FALSE (!reass0))
1378             {
1379               next0 = NAT64_IN2OUT_NEXT_DROP;
1380               b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
1381               goto trace0;
1382             }
1383
1384           if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
1385             {
1386               ctx0.first_frag = 0;
1387               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
1388                 {
1389                   if (nat_ip6_reass_add_fragment (reass0, bi0))
1390                     {
1391                       b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
1392                       next0 = NAT64_IN2OUT_NEXT_DROP;
1393                       goto trace0;
1394                     }
1395                   cached0 = 1;
1396                   goto trace0;
1397                 }
1398             }
1399           else
1400             {
1401               ctx0.first_frag = 1;
1402
1403               saddr0.as_u64[0] = ip60->src_address.as_u64[0];
1404               saddr0.as_u64[1] = ip60->src_address.as_u64[1];
1405               daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
1406               daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
1407
1408               ste0 =
1409                 nat64_db_st_entry_find (db, &saddr0, &daddr0,
1410                                         udp0->src_port, udp0->dst_port,
1411                                         l4_protocol0, fib_index0, 1);
1412               if (!ste0)
1413                 {
1414                   bibe0 =
1415                     nat64_db_bib_entry_find (db, &saddr0, udp0->src_port,
1416                                              l4_protocol0, fib_index0, 1);
1417                   if (!bibe0)
1418                     {
1419                       u16 out_port0;
1420                       ip4_address_t out_addr0;
1421                       if (nat64_alloc_out_addr_and_port
1422                           (fib_index0, proto0, &out_addr0, &out_port0,
1423                            thread_index))
1424                         {
1425                           next0 = NAT64_IN2OUT_NEXT_DROP;
1426                           b0->error =
1427                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1428                           goto trace0;
1429                         }
1430
1431                       bibe0 =
1432                         nat64_db_bib_entry_create (db,
1433                                                    &ip60->src_address,
1434                                                    &out_addr0, udp0->src_port,
1435                                                    out_port0, fib_index0,
1436                                                    l4_protocol0, 0);
1437                       if (!bibe0)
1438                         {
1439                           next0 = NAT64_IN2OUT_NEXT_DROP;
1440                           b0->error =
1441                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1442                           goto trace0;
1443                         }
1444                     }
1445                   nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
1446                                      fib_index0);
1447                   ste0 =
1448                     nat64_db_st_entry_create (db, bibe0,
1449                                               &ip60->dst_address, &daddr0.ip4,
1450                                               udp0->dst_port);
1451                   if (!ste0)
1452                     {
1453                       next0 = NAT64_IN2OUT_NEXT_DROP;
1454                       b0->error =
1455                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1456                       goto trace0;
1457                     }
1458                 }
1459               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
1460
1461               nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
1462             }
1463
1464           ctx0.sess_index = reass0->sess_index;
1465           ctx0.proto = l4_protocol0;
1466           ctx0.vm = vm;
1467           ctx0.l4_offset = l4_offset0;
1468
1469           if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
1470             {
1471               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1472               if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
1473                 {
1474                   next0 = NAT64_IN2OUT_NEXT_DROP;
1475                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1476                 }
1477               goto trace0;
1478             }
1479           else
1480             {
1481               if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
1482                 {
1483                   next0 = NAT64_IN2OUT_NEXT_DROP;
1484                   b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1485                   goto trace0;
1486                 }
1487             }
1488
1489         trace0:
1490           if (PREDICT_FALSE
1491               ((node->flags & VLIB_NODE_FLAG_TRACE)
1492                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1493             {
1494               nat64_in2out_reass_trace_t *t =
1495                 vlib_add_trace (vm, node, b0, sizeof (*t));
1496               t->cached = cached0;
1497               t->sw_if_index = sw_if_index0;
1498               t->next_index = next0;
1499             }
1500
1501           if (cached0)
1502             {
1503               n_left_to_next++;
1504               to_next--;
1505             }
1506           else
1507             {
1508               pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1509
1510               /* verify speculative enqueue, maybe switch current next frame */
1511               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1512                                                to_next, n_left_to_next,
1513                                                bi0, next0);
1514             }
1515
1516           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1517             {
1518               from = vlib_frame_vector_args (frame);
1519               u32 len = vec_len (fragments_to_loopback);
1520               if (len <= VLIB_FRAME_SIZE)
1521                 {
1522                   clib_memcpy (from, fragments_to_loopback,
1523                                sizeof (u32) * len);
1524                   n_left_from = len;
1525                   vec_reset_length (fragments_to_loopback);
1526                 }
1527               else
1528                 {
1529                   clib_memcpy (from,
1530                                fragments_to_loopback + (len -
1531                                                         VLIB_FRAME_SIZE),
1532                                sizeof (u32) * VLIB_FRAME_SIZE);
1533                   n_left_from = VLIB_FRAME_SIZE;
1534                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1535                 }
1536             }
1537         }
1538
1539       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1540     }
1541
1542   vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
1543                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1544                                pkts_processed);
1545
1546   nat_send_all_to_node (vm, fragments_to_drop, node,
1547                         &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
1548                         NAT64_IN2OUT_NEXT_DROP);
1549
1550   vec_free (fragments_to_drop);
1551   vec_free (fragments_to_loopback);
1552   return frame->n_vectors;
1553 }
1554
1555 /* *INDENT-OFF* */
1556 VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
1557   .function = nat64_in2out_reass_node_fn,
1558   .name = "nat64-in2out-reass",
1559   .vector_size = sizeof (u32),
1560   .format_trace = format_nat64_in2out_reass_trace,
1561   .type = VLIB_NODE_TYPE_INTERNAL,
1562   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1563   .error_strings = nat64_in2out_error_strings,
1564   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1565   /* edit / add dispositions here */
1566   .next_nodes = {
1567     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1568     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1569     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1570     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1571     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1572   },
1573 };
1574 /* *INDENT-ON* */
1575
1576 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node,
1577                               nat64_in2out_reass_node_fn);
1578
1579 typedef struct
1580 {
1581   u32 next_worker_index;
1582   u8 do_handoff;
1583 } nat64_in2out_handoff_trace_t;
1584
1585 static u8 *
1586 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1587 {
1588   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1589   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1590   nat64_in2out_handoff_trace_t *t =
1591     va_arg (*args, nat64_in2out_handoff_trace_t *);
1592   char *m;
1593
1594   m = t->do_handoff ? "next worker" : "same worker";
1595   s = format (s, "NAT64-IN2OUT-HANDOFF: %s %d", m, t->next_worker_index);
1596
1597   return s;
1598 }
1599
1600 static inline uword
1601 nat64_in2out_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1602                               vlib_frame_t * frame)
1603 {
1604   nat64_main_t *nm = &nat64_main;
1605   vlib_thread_main_t *tm = vlib_get_thread_main ();
1606   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
1607   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1608   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1609     = 0;
1610   vlib_frame_queue_elt_t *hf = 0;
1611   vlib_frame_queue_t *fq;
1612   vlib_frame_t *f = 0, *d = 0;
1613   int i;
1614   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1615   u32 next_worker_index = 0;
1616   u32 current_worker_index = ~0;
1617   u32 thread_index = vlib_get_thread_index ();
1618   u32 fq_index;
1619   u32 to_node_index;
1620
1621   fq_index = nm->fq_in2out_index;
1622   to_node_index = nat64_in2out_node.index;
1623
1624   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1625     {
1626       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1627
1628       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1629                                tm->n_vlib_mains - 1,
1630                                (vlib_frame_queue_t *) (~0));
1631     }
1632
1633   from = vlib_frame_vector_args (frame);
1634   n_left_from = frame->n_vectors;
1635
1636   while (n_left_from > 0)
1637     {
1638       u32 bi0;
1639       vlib_buffer_t *b0;
1640       ip6_header_t *ip0;
1641       u8 do_handoff;
1642
1643       bi0 = from[0];
1644       from += 1;
1645       n_left_from -= 1;
1646
1647       b0 = vlib_get_buffer (vm, bi0);
1648
1649       ip0 = vlib_buffer_get_current (b0);
1650
1651       next_worker_index = nat64_get_worker_in2out (&ip0->src_address);
1652
1653       if (PREDICT_FALSE (next_worker_index != thread_index))
1654         {
1655           do_handoff = 1;
1656
1657           if (next_worker_index != current_worker_index)
1658             {
1659               fq =
1660                 is_vlib_frame_queue_congested (fq_index, next_worker_index,
1661                                                30,
1662                                                congested_handoff_queue_by_worker_index);
1663
1664               if (fq)
1665                 {
1666                   /* if this is 1st frame */
1667                   if (!d)
1668                     {
1669                       d = vlib_get_frame_to_node (vm, nm->error_node_index);
1670                       to_next_drop = vlib_frame_vector_args (d);
1671                     }
1672
1673                   to_next_drop[0] = bi0;
1674                   to_next_drop += 1;
1675                   d->n_vectors++;
1676                   goto trace0;
1677                 }
1678
1679               if (hf)
1680                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1681
1682               hf =
1683                 vlib_get_worker_handoff_queue_elt (fq_index,
1684                                                    next_worker_index,
1685                                                    handoff_queue_elt_by_worker_index);
1686               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1687               to_next_worker = &hf->buffer_index[hf->n_vectors];
1688               current_worker_index = next_worker_index;
1689             }
1690
1691           ASSERT (to_next_worker != 0);
1692
1693           /* enqueue to correct worker thread */
1694           to_next_worker[0] = bi0;
1695           to_next_worker++;
1696           n_left_to_next_worker--;
1697
1698           if (n_left_to_next_worker == 0)
1699             {
1700               hf->n_vectors = VLIB_FRAME_SIZE;
1701               vlib_put_frame_queue_elt (hf);
1702               current_worker_index = ~0;
1703               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1704               hf = 0;
1705             }
1706         }
1707       else
1708         {
1709           do_handoff = 0;
1710           /* if this is 1st frame */
1711           if (!f)
1712             {
1713               f = vlib_get_frame_to_node (vm, to_node_index);
1714               to_next = vlib_frame_vector_args (f);
1715             }
1716
1717           to_next[0] = bi0;
1718           to_next += 1;
1719           f->n_vectors++;
1720         }
1721
1722     trace0:
1723       if (PREDICT_FALSE
1724           ((node->flags & VLIB_NODE_FLAG_TRACE)
1725            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1726         {
1727           nat64_in2out_handoff_trace_t *t =
1728             vlib_add_trace (vm, node, b0, sizeof (*t));
1729           t->next_worker_index = next_worker_index;
1730           t->do_handoff = do_handoff;
1731         }
1732     }
1733
1734   if (f)
1735     vlib_put_frame_to_node (vm, to_node_index, f);
1736
1737   if (d)
1738     vlib_put_frame_to_node (vm, nm->error_node_index, d);
1739
1740   if (hf)
1741     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1742
1743   /* Ship frames to the worker nodes */
1744   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1745     {
1746       if (handoff_queue_elt_by_worker_index[i])
1747         {
1748           hf = handoff_queue_elt_by_worker_index[i];
1749           /*
1750            * It works better to let the handoff node
1751            * rate-adapt, always ship the handoff queue element.
1752            */
1753           if (1 || hf->n_vectors == hf->last_n_vectors)
1754             {
1755               vlib_put_frame_queue_elt (hf);
1756               handoff_queue_elt_by_worker_index[i] = 0;
1757             }
1758           else
1759             hf->last_n_vectors = hf->n_vectors;
1760         }
1761       congested_handoff_queue_by_worker_index[i] =
1762         (vlib_frame_queue_t *) (~0);
1763     }
1764   hf = 0;
1765   current_worker_index = ~0;
1766   return frame->n_vectors;
1767 }
1768
1769 /* *INDENT-OFF* */
1770 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1771   .function = nat64_in2out_handoff_node_fn,
1772   .name = "nat64-in2out-handoff",
1773   .vector_size = sizeof (u32),
1774   .format_trace = format_nat64_in2out_handoff_trace,
1775   .type = VLIB_NODE_TYPE_INTERNAL,
1776
1777   .n_next_nodes = 1,
1778
1779   .next_nodes = {
1780     [0] = "error-drop",
1781   },
1782 };
1783 /* *INDENT-ON* */
1784
1785 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_handoff_node,
1786                               nat64_in2out_handoff_node_fn);
1787
1788 /*
1789  * fd.io coding-style-patch-verification: ON
1790  *
1791  * Local Variables:
1792  * eval: (c-set-style "gnu")
1793  * End:
1794  */