cdfe9b8d80c9eee372e21109c7063c4a206d912b
[vpp.git] / src / plugins / nat / nat64_in2out.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv6 to IPv4 translation (inside to outside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/ip/ip6_to_ip4.h>
24 #include <vnet/fib/fib_table.h>
25
26 typedef struct
27 {
28   u32 sw_if_index;
29   u32 next_index;
30   u8 is_slow_path;
31 } nat64_in2out_trace_t;
32
33 static u8 *
34 format_nat64_in2out_trace (u8 * s, va_list * args)
35 {
36   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38   nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
39   char *tag;
40
41   tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
42
43   s =
44     format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
45             t->next_index);
46
47   return s;
48 }
49
50 typedef struct
51 {
52   u32 sw_if_index;
53   u32 next_index;
54   u8 cached;
55 } nat64_in2out_reass_trace_t;
56
57 static u8 *
58 format_nat64_in2out_reass_trace (u8 * s, va_list * args)
59 {
60   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
61   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
62   nat64_in2out_reass_trace_t *t =
63     va_arg (*args, nat64_in2out_reass_trace_t *);
64
65   s =
66     format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s",
67             t->sw_if_index, t->next_index,
68             t->cached ? "cached" : "translated");
69
70   return s;
71 }
72
73 vlib_node_registration_t nat64_in2out_node;
74 vlib_node_registration_t nat64_in2out_slowpath_node;
75 vlib_node_registration_t nat64_in2out_reass_node;
76 vlib_node_registration_t nat64_in2out_handoff_node;
77
78 #define foreach_nat64_in2out_error                       \
79 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
80 _(IN2OUT_PACKETS, "good in2out packets processed")       \
81 _(NO_TRANSLATION, "no translation")                      \
82 _(UNKNOWN, "unknown")                                    \
83 _(DROP_FRAGMENT, "Drop fragment")                        \
84 _(MAX_REASS, "Maximum reassemblies exceeded")            \
85 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
86
87
88 typedef enum
89 {
90 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
91   foreach_nat64_in2out_error
92 #undef _
93     NAT64_IN2OUT_N_ERROR,
94 } nat64_in2out_error_t;
95
96 static char *nat64_in2out_error_strings[] = {
97 #define _(sym,string) string,
98   foreach_nat64_in2out_error
99 #undef _
100 };
101
102 typedef enum
103 {
104   NAT64_IN2OUT_NEXT_IP4_LOOKUP,
105   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
106   NAT64_IN2OUT_NEXT_DROP,
107   NAT64_IN2OUT_NEXT_SLOWPATH,
108   NAT64_IN2OUT_NEXT_REASS,
109   NAT64_IN2OUT_N_NEXT,
110 } nat64_in2out_next_t;
111
112 typedef struct nat64_in2out_set_ctx_t_
113 {
114   vlib_buffer_t *b;
115   vlib_main_t *vm;
116   u32 thread_index;
117 } nat64_in2out_set_ctx_t;
118
119 static inline u8
120 nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr)
121 {
122   ip6_address_t *addr;
123   ip6_main_t *im6 = &ip6_main;
124   ip_lookup_main_t *lm6 = &im6->lookup_main;
125   ip_interface_address_t *ia = 0;
126
127   /* *INDENT-OFF* */
128   foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
129   ({
130         addr = ip_interface_address_get_address (lm6, ia);
131         if (0 == ip6_address_compare (addr, &ip6_addr))
132                 return 1;
133   }));
134   /* *INDENT-ON* */
135
136   return 0;
137 }
138
139 /**
140  * @brief Check whether is a hairpinning.
141  *
142  * If the destination IP address of the packet is an IPv4 address assigned to
143  * the NAT64 itself, then the packet is a hairpin packet.
144  *
145  * param dst_addr Destination address of the packet.
146  *
147  * @returns 1 if hairpinning, otherwise 0.
148  */
149 static_always_inline int
150 is_hairpinning (ip6_address_t * dst_addr)
151 {
152   nat64_main_t *nm = &nat64_main;
153   int i;
154
155   for (i = 0; i < vec_len (nm->addr_pool); i++)
156     {
157       if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
158         return 1;
159     }
160
161   return 0;
162 }
163
164 static int
165 nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
166                              void *arg)
167 {
168   nat64_main_t *nm = &nat64_main;
169   nat64_in2out_set_ctx_t *ctx = arg;
170   nat64_db_bib_entry_t *bibe;
171   nat64_db_st_entry_t *ste;
172   ip46_address_t saddr, daddr;
173   u32 sw_if_index, fib_index;
174   udp_header_t *udp = ip6_next_header (ip6);
175   u8 proto = ip6->protocol;
176   u16 sport = udp->src_port;
177   u16 dport = udp->dst_port;
178   nat64_db_t *db = &nm->db[ctx->thread_index];
179
180   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
181   fib_index =
182     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
183
184   saddr.as_u64[0] = ip6->src_address.as_u64[0];
185   saddr.as_u64[1] = ip6->src_address.as_u64[1];
186   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
187   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
188
189   ste =
190     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
191                             fib_index, 1);
192
193   if (ste)
194     {
195       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
196       if (!bibe)
197         return -1;
198     }
199   else
200     {
201       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
202
203       if (!bibe)
204         {
205           u16 out_port;
206           ip4_address_t out_addr;
207           if (nat64_alloc_out_addr_and_port
208               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
209                &out_port, ctx->thread_index))
210             return -1;
211
212           bibe =
213             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
214                                        sport, out_port, fib_index, proto, 0);
215           if (!bibe)
216             return -1;
217         }
218
219       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
220       ste =
221         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
222                                   &daddr.ip4, dport);
223       if (!ste)
224         return -1;
225     }
226
227   nat64_session_reset_timeout (ste, ctx->vm);
228
229   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
230   udp->src_port = bibe->out_port;
231
232   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
233
234   if (proto == IP_PROTOCOL_TCP)
235     {
236       u16 *checksum;
237       ip_csum_t csum;
238       tcp_header_t *tcp = ip6_next_header (ip6);
239
240       checksum = &tcp->checksum;
241       csum = ip_csum_sub_even (*checksum, sport);
242       csum = ip_csum_add_even (csum, udp->src_port);
243       *checksum = ip_csum_fold (csum);
244     }
245
246   return 0;
247 }
248
249 static int
250 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
251 {
252   nat64_main_t *nm = &nat64_main;
253   nat64_in2out_set_ctx_t *ctx = arg;
254   nat64_db_bib_entry_t *bibe;
255   nat64_db_st_entry_t *ste;
256   ip46_address_t saddr, daddr;
257   u32 sw_if_index, fib_index;
258   icmp46_header_t *icmp = ip6_next_header (ip6);
259   nat64_db_t *db = &nm->db[ctx->thread_index];
260
261   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
262   fib_index =
263     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
264
265   saddr.as_u64[0] = ip6->src_address.as_u64[0];
266   saddr.as_u64[1] = ip6->src_address.as_u64[1];
267   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
268   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
269
270   if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
271     {
272       u16 in_id = ((u16 *) (icmp))[2];
273       ste =
274         nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
275                                 IP_PROTOCOL_ICMP, fib_index, 1);
276
277       if (ste)
278         {
279           bibe =
280             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
281                                          ste->bibe_index);
282           if (!bibe)
283             return -1;
284         }
285       else
286         {
287           bibe =
288             nat64_db_bib_entry_find (db, &saddr, in_id,
289                                      IP_PROTOCOL_ICMP, fib_index, 1);
290
291           if (!bibe)
292             {
293               u16 out_id;
294               ip4_address_t out_addr;
295               if (nat64_alloc_out_addr_and_port
296                   (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id,
297                    ctx->thread_index))
298                 return -1;
299
300               bibe =
301                 nat64_db_bib_entry_create (db, &ip6->src_address,
302                                            &out_addr, in_id, out_id,
303                                            fib_index, IP_PROTOCOL_ICMP, 0);
304               if (!bibe)
305                 return -1;
306             }
307
308           nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
309           ste =
310             nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
311                                       &daddr.ip4, 0);
312           if (!ste)
313             return -1;
314         }
315
316       nat64_session_reset_timeout (ste, ctx->vm);
317
318       ip4->src_address.as_u32 = bibe->out_addr.as_u32;
319       ((u16 *) (icmp))[2] = bibe->out_port;
320
321       ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
322     }
323   else
324     {
325       if (!vec_len (nm->addr_pool))
326         return -1;
327
328       ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
329       nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
330     }
331
332   return 0;
333 }
334
335 static int
336 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
337                                 void *arg)
338 {
339   nat64_main_t *nm = &nat64_main;
340   nat64_in2out_set_ctx_t *ctx = arg;
341   nat64_db_st_entry_t *ste;
342   nat64_db_bib_entry_t *bibe;
343   ip46_address_t saddr, daddr;
344   u32 sw_if_index, fib_index;
345   u8 proto = ip6->protocol;
346   nat64_db_t *db = &nm->db[ctx->thread_index];
347
348   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
349   fib_index =
350     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
351
352   saddr.as_u64[0] = ip6->src_address.as_u64[0];
353   saddr.as_u64[1] = ip6->src_address.as_u64[1];
354   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
355   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
356
357   if (proto == IP_PROTOCOL_ICMP6)
358     {
359       icmp46_header_t *icmp = ip6_next_header (ip6);
360       u16 in_id = ((u16 *) (icmp))[2];
361       proto = IP_PROTOCOL_ICMP;
362
363       if (!
364           (icmp->type == ICMP4_echo_request
365            || icmp->type == ICMP4_echo_reply))
366         return -1;
367
368       ste =
369         nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
370                                 fib_index, 1);
371       if (!ste)
372         return -1;
373
374       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
375       if (!bibe)
376         return -1;
377
378       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
379       ((u16 *) (icmp))[2] = bibe->out_port;
380       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
381     }
382   else
383     {
384       udp_header_t *udp = ip6_next_header (ip6);
385       tcp_header_t *tcp = ip6_next_header (ip6);
386       u16 *checksum;
387       ip_csum_t csum;
388
389       u16 sport = udp->src_port;
390       u16 dport = udp->dst_port;
391
392       ste =
393         nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
394                                 fib_index, 1);
395       if (!ste)
396         return -1;
397
398       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
399       if (!bibe)
400         return -1;
401
402       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
403       udp->dst_port = bibe->out_port;
404       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
405
406       if (proto == IP_PROTOCOL_TCP)
407         checksum = &tcp->checksum;
408       else
409         checksum = &udp->checksum;
410       csum = ip_csum_sub_even (*checksum, dport);
411       csum = ip_csum_add_even (csum, udp->dst_port);
412       *checksum = ip_csum_fold (csum);
413     }
414
415   return 0;
416 }
417
418 typedef struct unk_proto_st_walk_ctx_t_
419 {
420   ip6_address_t src_addr;
421   ip6_address_t dst_addr;
422   ip4_address_t out_addr;
423   u32 fib_index;
424   u32 thread_index;
425   u8 proto;
426 } unk_proto_st_walk_ctx_t;
427
428 static int
429 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
430 {
431   nat64_main_t *nm = &nat64_main;
432   unk_proto_st_walk_ctx_t *ctx = arg;
433   nat64_db_bib_entry_t *bibe;
434   ip46_address_t saddr, daddr;
435   nat64_db_t *db = &nm->db[ctx->thread_index];
436
437   if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
438     {
439       bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
440       if (!bibe)
441         return -1;
442
443       if (ip46_address_is_equal (&bibe->in_addr, &ctx->src_addr)
444           && bibe->fib_index == ctx->fib_index)
445         {
446           memset (&saddr, 0, sizeof (saddr));
447           saddr.ip4.as_u32 = bibe->out_addr.as_u32;
448           memset (&daddr, 0, sizeof (daddr));
449           nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
450
451           if (nat64_db_st_entry_find
452               (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
453             return -1;
454
455           ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
456           return 1;
457         }
458     }
459
460   return 0;
461 }
462
463 static int
464 nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
465                                void *arg)
466 {
467   nat64_main_t *nm = &nat64_main;
468   nat64_in2out_set_ctx_t *s_ctx = arg;
469   nat64_db_bib_entry_t *bibe;
470   nat64_db_st_entry_t *ste;
471   ip46_address_t saddr, daddr, addr;
472   u32 sw_if_index, fib_index;
473   u8 proto = ip6->protocol;
474   int i;
475   nat64_db_t *db = &nm->db[s_ctx->thread_index];
476
477   sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
478   fib_index =
479     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
480
481   saddr.as_u64[0] = ip6->src_address.as_u64[0];
482   saddr.as_u64[1] = ip6->src_address.as_u64[1];
483   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
484   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
485
486   ste =
487     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
488
489   if (ste)
490     {
491       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
492       if (!bibe)
493         return -1;
494     }
495   else
496     {
497       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
498
499       if (!bibe)
500         {
501           /* Choose same out address as for TCP/UDP session to same dst */
502           unk_proto_st_walk_ctx_t ctx = {
503             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
504             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
505             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
506             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
507             .out_addr.as_u32 = 0,
508             .fib_index = fib_index,
509             .proto = proto,
510             .thread_index = s_ctx->thread_index,
511           };
512
513           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
514
515           if (!ctx.out_addr.as_u32)
516             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
517
518           /* Verify if out address is not already in use for protocol */
519           memset (&addr, 0, sizeof (addr));
520           addr.ip4.as_u32 = ctx.out_addr.as_u32;
521           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
522             ctx.out_addr.as_u32 = 0;
523
524           if (!ctx.out_addr.as_u32)
525             {
526               for (i = 0; i < vec_len (nm->addr_pool); i++)
527                 {
528                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
529                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
530                     break;
531                 }
532             }
533
534           if (!ctx.out_addr.as_u32)
535             return -1;
536
537           bibe =
538             nat64_db_bib_entry_create (db, &ip6->src_address,
539                                        &ctx.out_addr, 0, 0, fib_index, proto,
540                                        0);
541           if (!bibe)
542             return -1;
543         }
544
545       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
546       ste =
547         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
548       if (!ste)
549         return -1;
550     }
551
552   nat64_session_reset_timeout (ste, s_ctx->vm);
553
554   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
555   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
556
557   return 0;
558 }
559
560
561
562 static int
563 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
564                                   ip6_header_t * ip6, u32 thread_index)
565 {
566   nat64_main_t *nm = &nat64_main;
567   nat64_db_bib_entry_t *bibe;
568   nat64_db_st_entry_t *ste;
569   ip46_address_t saddr, daddr;
570   u32 sw_if_index, fib_index;
571   udp_header_t *udp = ip6_next_header (ip6);
572   tcp_header_t *tcp = ip6_next_header (ip6);
573   u8 proto = ip6->protocol;
574   u16 sport = udp->src_port;
575   u16 dport = udp->dst_port;
576   u16 *checksum;
577   ip_csum_t csum;
578   nat64_db_t *db = &nm->db[thread_index];
579
580   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
581   fib_index =
582     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
583
584   saddr.as_u64[0] = ip6->src_address.as_u64[0];
585   saddr.as_u64[1] = ip6->src_address.as_u64[1];
586   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
587   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
588
589   if (proto == IP_PROTOCOL_UDP)
590     checksum = &udp->checksum;
591   else
592     checksum = &tcp->checksum;
593
594   csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
595   csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
596   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
597   csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
598   csum = ip_csum_sub_even (csum, sport);
599   csum = ip_csum_sub_even (csum, dport);
600
601   ste =
602     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
603                             fib_index, 1);
604
605   if (ste)
606     {
607       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
608       if (!bibe)
609         return -1;
610     }
611   else
612     {
613       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
614
615       if (!bibe)
616         {
617           u16 out_port;
618           ip4_address_t out_addr;
619           if (nat64_alloc_out_addr_and_port
620               (fib_index, ip_proto_to_snat_proto (proto), &out_addr,
621                &out_port, thread_index))
622             return -1;
623
624           bibe =
625             nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr,
626                                        sport, out_port, fib_index, proto, 0);
627           if (!bibe)
628             return -1;
629         }
630
631       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
632       ste =
633         nat64_db_st_entry_create (db, bibe, &ip6->dst_address,
634                                   &daddr.ip4, dport);
635       if (!ste)
636         return -1;
637     }
638
639   nat64_session_reset_timeout (ste, vm);
640
641   sport = udp->src_port = bibe->out_port;
642   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
643
644   memset (&daddr, 0, sizeof (daddr));
645   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
646
647   bibe = 0;
648   /* *INDENT-OFF* */
649   vec_foreach (db, nm->db)
650     {
651       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
652
653       if (bibe)
654         break;
655     }
656   /* *INDENT-ON* */
657
658   if (!bibe)
659     return -1;
660
661   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
662   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
663   udp->dst_port = bibe->in_port;
664
665   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
666   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
667   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
668   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
669   csum = ip_csum_add_even (csum, udp->src_port);
670   csum = ip_csum_add_even (csum, udp->dst_port);
671   *checksum = ip_csum_fold (csum);
672
673   return 0;
674 }
675
676 static int
677 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
678                                ip6_header_t * ip6, u32 thread_index)
679 {
680   nat64_main_t *nm = &nat64_main;
681   nat64_db_bib_entry_t *bibe;
682   nat64_db_st_entry_t *ste;
683   icmp46_header_t *icmp = ip6_next_header (ip6);
684   ip6_header_t *inner_ip6;
685   ip46_address_t saddr, daddr;
686   u32 sw_if_index, fib_index;
687   u8 proto;
688   udp_header_t *udp;
689   tcp_header_t *tcp;
690   u16 *checksum, sport, dport;
691   ip_csum_t csum;
692   nat64_db_t *db = &nm->db[thread_index];
693
694   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
695     return -1;
696
697   inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
698
699   proto = inner_ip6->protocol;
700
701   if (proto == IP_PROTOCOL_ICMP6)
702     return -1;
703
704   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
705   fib_index =
706     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
707
708   saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
709   saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
710   daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
711   daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
712
713   udp = ip6_next_header (inner_ip6);
714   tcp = ip6_next_header (inner_ip6);
715
716   sport = udp->src_port;
717   dport = udp->dst_port;
718
719   if (proto == IP_PROTOCOL_UDP)
720     checksum = &udp->checksum;
721   else
722     checksum = &tcp->checksum;
723
724   csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
725   csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
726   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
727   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
728   csum = ip_csum_sub_even (csum, sport);
729   csum = ip_csum_sub_even (csum, dport);
730
731   ste =
732     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
733                             fib_index, 1);
734   if (!ste)
735     return -1;
736
737   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
738   if (!bibe)
739     return -1;
740
741   dport = udp->dst_port = bibe->out_port;
742   nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
743
744   memset (&saddr, 0, sizeof (saddr));
745   memset (&daddr, 0, sizeof (daddr));
746   saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
747   daddr.ip4.as_u32 = bibe->out_addr.as_u32;
748
749   ste = 0;
750   /* *INDENT-OFF* */
751   vec_foreach (db, nm->db)
752     {
753       ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
754                                     0, 0);
755
756       if (ste)
757         break;
758     }
759   /* *INDENT-ON* */
760
761   if (!ste)
762     return -1;
763
764   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
765   if (!bibe)
766     return -1;
767
768   inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
769   inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
770   udp->src_port = bibe->in_port;
771
772   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
773   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
774   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
775   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
776   csum = ip_csum_add_even (csum, udp->src_port);
777   csum = ip_csum_add_even (csum, udp->dst_port);
778   *checksum = ip_csum_fold (csum);
779
780   if (!vec_len (nm->addr_pool))
781     return -1;
782
783   nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
784   ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
785   ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
786
787   icmp->checksum = 0;
788   csum = ip_csum_with_carry (0, ip6->payload_length);
789   csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
790   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
791   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
792   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
793   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
794   csum =
795     ip_incremental_checksum (csum, icmp,
796                              clib_net_to_host_u16 (ip6->payload_length));
797   icmp->checksum = ~ip_csum_fold (csum);
798
799   return 0;
800 }
801
802 static int
803 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
804                                     ip6_header_t * ip6, u32 thread_index)
805 {
806   nat64_main_t *nm = &nat64_main;
807   nat64_db_bib_entry_t *bibe;
808   nat64_db_st_entry_t *ste;
809   ip46_address_t saddr, daddr, addr;
810   u32 sw_if_index, fib_index;
811   u8 proto = ip6->protocol;
812   int i;
813   nat64_db_t *db = &nm->db[thread_index];
814
815   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
816   fib_index =
817     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
818
819   saddr.as_u64[0] = ip6->src_address.as_u64[0];
820   saddr.as_u64[1] = ip6->src_address.as_u64[1];
821   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
822   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
823
824   ste =
825     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
826
827   if (ste)
828     {
829       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
830       if (!bibe)
831         return -1;
832     }
833   else
834     {
835       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
836
837       if (!bibe)
838         {
839           /* Choose same out address as for TCP/UDP session to same dst */
840           unk_proto_st_walk_ctx_t ctx = {
841             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
842             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
843             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
844             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
845             .out_addr.as_u32 = 0,
846             .fib_index = fib_index,
847             .proto = proto,
848             .thread_index = thread_index,
849           };
850
851           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
852
853           if (!ctx.out_addr.as_u32)
854             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
855
856           /* Verify if out address is not already in use for protocol */
857           memset (&addr, 0, sizeof (addr));
858           addr.ip4.as_u32 = ctx.out_addr.as_u32;
859           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
860             ctx.out_addr.as_u32 = 0;
861
862           if (!ctx.out_addr.as_u32)
863             {
864               for (i = 0; i < vec_len (nm->addr_pool); i++)
865                 {
866                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
867                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
868                     break;
869                 }
870             }
871
872           if (!ctx.out_addr.as_u32)
873             return -1;
874
875           bibe =
876             nat64_db_bib_entry_create (db, &ip6->src_address,
877                                        &ctx.out_addr, 0, 0, fib_index, proto,
878                                        0);
879           if (!bibe)
880             return -1;
881         }
882
883       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
884       ste =
885         nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0);
886       if (!ste)
887         return -1;
888     }
889
890   nat64_session_reset_timeout (ste, vm);
891
892   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
893
894   memset (&daddr, 0, sizeof (daddr));
895   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
896
897   bibe = 0;
898   /* *INDENT-OFF* */
899   vec_foreach (db, nm->db)
900     {
901       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
902
903       if (bibe)
904         break;
905     }
906   /* *INDENT-ON* */
907
908   if (!bibe)
909     return -1;
910
911   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
912   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
913
914   return 0;
915 }
916
917 static inline uword
918 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
919                              vlib_frame_t * frame, u8 is_slow_path)
920 {
921   u32 n_left_from, *from, *to_next;
922   nat64_in2out_next_t next_index;
923   u32 pkts_processed = 0;
924   u32 stats_node_index;
925   u32 thread_index = vm->thread_index;
926
927   stats_node_index =
928     is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index;
929
930   from = vlib_frame_vector_args (frame);
931   n_left_from = frame->n_vectors;
932   next_index = node->cached_next_index;
933
934   while (n_left_from > 0)
935     {
936       u32 n_left_to_next;
937
938       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
939
940       while (n_left_from > 0 && n_left_to_next > 0)
941         {
942           u32 bi0;
943           vlib_buffer_t *b0;
944           u32 next0;
945           ip6_header_t *ip60;
946           u16 l4_offset0, frag_offset0;
947           u8 l4_protocol0;
948           u32 proto0;
949           nat64_in2out_set_ctx_t ctx0;
950           u32 sw_if_index0;
951
952           /* speculatively enqueue b0 to the current next frame */
953           bi0 = from[0];
954           to_next[0] = bi0;
955           from += 1;
956           to_next += 1;
957           n_left_from -= 1;
958           n_left_to_next -= 1;
959
960           b0 = vlib_get_buffer (vm, bi0);
961           ip60 = vlib_buffer_get_current (b0);
962
963           ctx0.b = b0;
964           ctx0.vm = vm;
965           ctx0.thread_index = thread_index;
966
967           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
968
969           if (PREDICT_FALSE
970               (ip6_parse
971                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
972                 &frag_offset0)))
973             {
974               next0 = NAT64_IN2OUT_NEXT_DROP;
975               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
976               goto trace0;
977             }
978
979           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
980
981           if (nat64_not_translate (sw_if_index0, ip60->dst_address))
982             {
983               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
984               goto trace0;
985             }
986
987           proto0 = ip_proto_to_snat_proto (l4_protocol0);
988
989           if (is_slow_path)
990             {
991               if (PREDICT_TRUE (proto0 == ~0))
992                 {
993                   if (is_hairpinning (&ip60->dst_address))
994                     {
995                       next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
996                       if (nat64_in2out_unk_proto_hairpinning
997                           (vm, b0, ip60, thread_index))
998                         {
999                           next0 = NAT64_IN2OUT_NEXT_DROP;
1000                           b0->error =
1001                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1002                         }
1003                       goto trace0;
1004                     }
1005
1006                   if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0))
1007                     {
1008                       next0 = NAT64_IN2OUT_NEXT_DROP;
1009                       b0->error =
1010                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1011                       goto trace0;
1012                     }
1013                 }
1014               goto trace0;
1015             }
1016           else
1017             {
1018               if (PREDICT_FALSE (proto0 == ~0))
1019                 {
1020                   next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
1021                   goto trace0;
1022                 }
1023             }
1024
1025           if (PREDICT_FALSE
1026               (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION))
1027             {
1028               next0 = NAT64_IN2OUT_NEXT_REASS;
1029               goto trace0;
1030             }
1031
1032           if (proto0 == SNAT_PROTOCOL_ICMP)
1033             {
1034               if (is_hairpinning (&ip60->dst_address))
1035                 {
1036                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1037                   if (nat64_in2out_icmp_hairpinning
1038                       (vm, b0, ip60, thread_index))
1039                     {
1040                       next0 = NAT64_IN2OUT_NEXT_DROP;
1041                       b0->error =
1042                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1043                     }
1044                   goto trace0;
1045                 }
1046
1047               if (icmp6_to_icmp
1048                   (b0, nat64_in2out_icmp_set_cb, &ctx0,
1049                    nat64_in2out_inner_icmp_set_cb, &ctx0))
1050                 {
1051                   next0 = NAT64_IN2OUT_NEXT_DROP;
1052                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1053                   goto trace0;
1054                 }
1055             }
1056           else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
1057             {
1058               if (is_hairpinning (&ip60->dst_address))
1059                 {
1060                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1061                   if (nat64_in2out_tcp_udp_hairpinning
1062                       (vm, b0, ip60, thread_index))
1063                     {
1064                       next0 = NAT64_IN2OUT_NEXT_DROP;
1065                       b0->error =
1066                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1067                     }
1068                   goto trace0;
1069                 }
1070
1071               if (ip6_to_ip4_tcp_udp
1072                   (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0))
1073                 {
1074                   next0 = NAT64_IN2OUT_NEXT_DROP;
1075                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1076                   goto trace0;
1077                 }
1078             }
1079
1080         trace0:
1081           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1082                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1083             {
1084               nat64_in2out_trace_t *t =
1085                 vlib_add_trace (vm, node, b0, sizeof (*t));
1086               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1087               t->next_index = next0;
1088               t->is_slow_path = is_slow_path;
1089             }
1090
1091           pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1092
1093           /* verify speculative enqueue, maybe switch current next frame */
1094           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1095                                            n_left_to_next, bi0, next0);
1096         }
1097       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1098     }
1099   vlib_node_increment_counter (vm, stats_node_index,
1100                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1101                                pkts_processed);
1102   return frame->n_vectors;
1103 }
1104
1105 static uword
1106 nat64_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1107                       vlib_frame_t * frame)
1108 {
1109   return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1110 }
1111
1112 /* *INDENT-OFF* */
1113 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1114   .function = nat64_in2out_node_fn,
1115   .name = "nat64-in2out",
1116   .vector_size = sizeof (u32),
1117   .format_trace = format_nat64_in2out_trace,
1118   .type = VLIB_NODE_TYPE_INTERNAL,
1119   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1120   .error_strings = nat64_in2out_error_strings,
1121   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1122   /* edit / add dispositions here */
1123   .next_nodes = {
1124     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1125     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1126     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1127     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1128     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1129   },
1130 };
1131 /* *INDENT-ON* */
1132
1133 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_node, nat64_in2out_node_fn);
1134
1135 static uword
1136 nat64_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1137                                vlib_frame_t * frame)
1138 {
1139   return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1140 }
1141
1142 /* *INDENT-OFF* */
1143 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1144   .function = nat64_in2out_slowpath_node_fn,
1145   .name = "nat64-in2out-slowpath",
1146   .vector_size = sizeof (u32),
1147   .format_trace = format_nat64_in2out_trace,
1148   .type = VLIB_NODE_TYPE_INTERNAL,
1149   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1150   .error_strings = nat64_in2out_error_strings,
1151   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1152   /* edit / add dispositions here */
1153   .next_nodes = {
1154     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1155     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1156     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1157     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1158     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1159   },
1160 };
1161 /* *INDENT-ON* */
1162
1163 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node,
1164                               nat64_in2out_slowpath_node_fn);
1165
1166 typedef struct nat64_in2out_frag_set_ctx_t_
1167 {
1168   vlib_main_t *vm;
1169   u32 sess_index;
1170   u32 thread_index;
1171   u16 l4_offset;
1172   u8 proto;
1173   u8 first_frag;
1174 } nat64_in2out_frag_set_ctx_t;
1175
1176 static int
1177 nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
1178 {
1179   nat64_main_t *nm = &nat64_main;
1180   nat64_in2out_frag_set_ctx_t *ctx = arg;
1181   nat64_db_st_entry_t *ste;
1182   nat64_db_bib_entry_t *bibe;
1183   udp_header_t *udp;
1184   nat64_db_t *db = &nm->db[ctx->thread_index];
1185
1186   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1187   if (!ste)
1188     return -1;
1189
1190   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1191   if (!bibe)
1192     return -1;
1193
1194   nat64_session_reset_timeout (ste, ctx->vm);
1195
1196   if (ctx->first_frag)
1197     {
1198       udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1199
1200       if (ctx->proto == IP_PROTOCOL_TCP)
1201         {
1202           u16 *checksum;
1203           ip_csum_t csum;
1204           tcp_header_t *tcp = (tcp_header_t *) udp;
1205
1206           checksum = &tcp->checksum;
1207           csum = ip_csum_sub_even (*checksum, tcp->src_port);
1208           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]);
1209           csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1210           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1211           csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1212           csum = ip_csum_add_even (csum, bibe->out_port);
1213           csum = ip_csum_add_even (csum, bibe->out_addr.as_u32);
1214           csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32);
1215           *checksum = ip_csum_fold (csum);
1216         }
1217
1218       udp->src_port = bibe->out_port;
1219     }
1220
1221   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
1222   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
1223
1224   return 0;
1225 }
1226
1227 static int
1228 nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6,
1229                                nat64_in2out_frag_set_ctx_t * ctx)
1230 {
1231   nat64_main_t *nm = &nat64_main;
1232   nat64_db_st_entry_t *ste;
1233   nat64_db_bib_entry_t *bibe;
1234   udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset);
1235   tcp_header_t *tcp = (tcp_header_t *) udp;
1236   u16 sport = udp->src_port;
1237   u16 dport = udp->dst_port;
1238   u16 *checksum;
1239   ip_csum_t csum;
1240   ip46_address_t daddr;
1241   nat64_db_t *db = &nm->db[ctx->thread_index];
1242
1243   if (ctx->first_frag)
1244     {
1245       if (ctx->proto == IP_PROTOCOL_UDP)
1246         checksum = &udp->checksum;
1247       else
1248         checksum = &tcp->checksum;
1249
1250       csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
1251       csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
1252       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
1253       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
1254       csum = ip_csum_sub_even (csum, sport);
1255       csum = ip_csum_sub_even (csum, dport);
1256     }
1257
1258   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
1259   if (!ste)
1260     return -1;
1261
1262   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
1263   if (!bibe)
1264     return -1;
1265
1266   nat64_session_reset_timeout (ste, ctx->vm);
1267
1268   sport = bibe->out_port;
1269   dport = ste->r_port;
1270
1271   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index);
1272
1273   memset (&daddr, 0, sizeof (daddr));
1274   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1275
1276   bibe = 0;
1277   /* *INDENT-OFF* */
1278   vec_foreach (db, nm->db)
1279     {
1280       bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0);
1281
1282       if (bibe)
1283         break;
1284     }
1285   /* *INDENT-ON* */
1286
1287   if (!bibe)
1288     return -1;
1289
1290   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1291   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1292
1293   if (ctx->first_frag)
1294     {
1295       udp->dst_port = bibe->in_port;
1296       udp->src_port = sport;
1297       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
1298       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
1299       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
1300       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
1301       csum = ip_csum_add_even (csum, udp->src_port);
1302       csum = ip_csum_add_even (csum, udp->dst_port);
1303       *checksum = ip_csum_fold (csum);
1304     }
1305
1306   return 0;
1307 }
1308
1309 static uword
1310 nat64_in2out_reass_node_fn (vlib_main_t * vm,
1311                             vlib_node_runtime_t * node, vlib_frame_t * frame)
1312 {
1313   u32 n_left_from, *from, *to_next;
1314   nat64_in2out_next_t next_index;
1315   u32 pkts_processed = 0;
1316   u32 *fragments_to_drop = 0;
1317   u32 *fragments_to_loopback = 0;
1318   nat64_main_t *nm = &nat64_main;
1319   u32 thread_index = vm->thread_index;
1320
1321   from = vlib_frame_vector_args (frame);
1322   n_left_from = frame->n_vectors;
1323   next_index = node->cached_next_index;
1324
1325   while (n_left_from > 0)
1326     {
1327       u32 n_left_to_next;
1328
1329       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1330
1331       while (n_left_from > 0 && n_left_to_next > 0)
1332         {
1333           u32 bi0;
1334           vlib_buffer_t *b0;
1335           u32 next0;
1336           u8 cached0 = 0;
1337           ip6_header_t *ip60;
1338           u16 l4_offset0, frag_offset0;
1339           u8 l4_protocol0;
1340           nat_reass_ip6_t *reass0;
1341           ip6_frag_hdr_t *frag0;
1342           nat64_db_bib_entry_t *bibe0;
1343           nat64_db_st_entry_t *ste0;
1344           udp_header_t *udp0;
1345           snat_protocol_t proto0;
1346           u32 sw_if_index0, fib_index0;
1347           ip46_address_t saddr0, daddr0;
1348           nat64_in2out_frag_set_ctx_t ctx0;
1349           nat64_db_t *db = &nm->db[thread_index];
1350
1351           /* speculatively enqueue b0 to the current next frame */
1352           bi0 = from[0];
1353           to_next[0] = bi0;
1354           from += 1;
1355           to_next += 1;
1356           n_left_from -= 1;
1357           n_left_to_next -= 1;
1358
1359           b0 = vlib_get_buffer (vm, bi0);
1360           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1361
1362           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1363           fib_index0 =
1364             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
1365                                                  sw_if_index0);
1366
1367           ctx0.thread_index = thread_index;
1368
1369           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
1370             {
1371               next0 = NAT64_IN2OUT_NEXT_DROP;
1372               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1373               goto trace0;
1374             }
1375
1376           ip60 = (ip6_header_t *) vlib_buffer_get_current (b0);
1377
1378           if (PREDICT_FALSE
1379               (ip6_parse
1380                (ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1381                 &frag_offset0)))
1382             {
1383               next0 = NAT64_IN2OUT_NEXT_DROP;
1384               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1385               goto trace0;
1386             }
1387
1388           if (PREDICT_FALSE
1389               (!(l4_protocol0 == IP_PROTOCOL_TCP
1390                  || l4_protocol0 == IP_PROTOCOL_UDP)))
1391             {
1392               next0 = NAT64_IN2OUT_NEXT_DROP;
1393               b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT];
1394               goto trace0;
1395             }
1396
1397           udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0);
1398           frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0);
1399           proto0 = ip_proto_to_snat_proto (l4_protocol0);
1400
1401           reass0 = nat_ip6_reass_find_or_create (ip60->src_address,
1402                                                  ip60->dst_address,
1403                                                  frag0->identification,
1404                                                  l4_protocol0,
1405                                                  1, &fragments_to_drop);
1406
1407           if (PREDICT_FALSE (!reass0))
1408             {
1409               next0 = NAT64_IN2OUT_NEXT_DROP;
1410               b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS];
1411               goto trace0;
1412             }
1413
1414           if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0)))
1415             {
1416               ctx0.first_frag = 0;
1417               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
1418                 {
1419                   if (nat_ip6_reass_add_fragment (reass0, bi0))
1420                     {
1421                       b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG];
1422                       next0 = NAT64_IN2OUT_NEXT_DROP;
1423                       goto trace0;
1424                     }
1425                   cached0 = 1;
1426                   goto trace0;
1427                 }
1428             }
1429           else
1430             {
1431               ctx0.first_frag = 1;
1432
1433               saddr0.as_u64[0] = ip60->src_address.as_u64[0];
1434               saddr0.as_u64[1] = ip60->src_address.as_u64[1];
1435               daddr0.as_u64[0] = ip60->dst_address.as_u64[0];
1436               daddr0.as_u64[1] = ip60->dst_address.as_u64[1];
1437
1438               ste0 =
1439                 nat64_db_st_entry_find (db, &saddr0, &daddr0,
1440                                         udp0->src_port, udp0->dst_port,
1441                                         l4_protocol0, fib_index0, 1);
1442               if (!ste0)
1443                 {
1444                   bibe0 =
1445                     nat64_db_bib_entry_find (db, &saddr0, udp0->src_port,
1446                                              l4_protocol0, fib_index0, 1);
1447                   if (!bibe0)
1448                     {
1449                       u16 out_port0;
1450                       ip4_address_t out_addr0;
1451                       if (nat64_alloc_out_addr_and_port
1452                           (fib_index0, proto0, &out_addr0, &out_port0,
1453                            thread_index))
1454                         {
1455                           next0 = NAT64_IN2OUT_NEXT_DROP;
1456                           b0->error =
1457                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1458                           goto trace0;
1459                         }
1460
1461                       bibe0 =
1462                         nat64_db_bib_entry_create (db,
1463                                                    &ip60->src_address,
1464                                                    &out_addr0, udp0->src_port,
1465                                                    out_port0, fib_index0,
1466                                                    l4_protocol0, 0);
1467                       if (!bibe0)
1468                         {
1469                           next0 = NAT64_IN2OUT_NEXT_DROP;
1470                           b0->error =
1471                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1472                           goto trace0;
1473                         }
1474                     }
1475                   nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4,
1476                                      fib_index0);
1477                   ste0 =
1478                     nat64_db_st_entry_create (db, bibe0,
1479                                               &ip60->dst_address, &daddr0.ip4,
1480                                               udp0->dst_port);
1481                   if (!ste0)
1482                     {
1483                       next0 = NAT64_IN2OUT_NEXT_DROP;
1484                       b0->error =
1485                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1486                       goto trace0;
1487                     }
1488                 }
1489               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
1490
1491               nat_ip6_reass_get_frags (reass0, &fragments_to_loopback);
1492             }
1493
1494           ctx0.sess_index = reass0->sess_index;
1495           ctx0.proto = l4_protocol0;
1496           ctx0.vm = vm;
1497           ctx0.l4_offset = l4_offset0;
1498
1499           if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address)))
1500             {
1501               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1502               if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0))
1503                 {
1504                   next0 = NAT64_IN2OUT_NEXT_DROP;
1505                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1506                 }
1507               goto trace0;
1508             }
1509           else
1510             {
1511               if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0))
1512                 {
1513                   next0 = NAT64_IN2OUT_NEXT_DROP;
1514                   b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1515                   goto trace0;
1516                 }
1517             }
1518
1519         trace0:
1520           if (PREDICT_FALSE
1521               ((node->flags & VLIB_NODE_FLAG_TRACE)
1522                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1523             {
1524               nat64_in2out_reass_trace_t *t =
1525                 vlib_add_trace (vm, node, b0, sizeof (*t));
1526               t->cached = cached0;
1527               t->sw_if_index = sw_if_index0;
1528               t->next_index = next0;
1529             }
1530
1531           if (cached0)
1532             {
1533               n_left_to_next++;
1534               to_next--;
1535             }
1536           else
1537             {
1538               pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP;
1539
1540               /* verify speculative enqueue, maybe switch current next frame */
1541               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1542                                                to_next, n_left_to_next,
1543                                                bi0, next0);
1544             }
1545
1546           if (n_left_from == 0 && vec_len (fragments_to_loopback))
1547             {
1548               from = vlib_frame_vector_args (frame);
1549               u32 len = vec_len (fragments_to_loopback);
1550               if (len <= VLIB_FRAME_SIZE)
1551                 {
1552                   clib_memcpy (from, fragments_to_loopback,
1553                                sizeof (u32) * len);
1554                   n_left_from = len;
1555                   vec_reset_length (fragments_to_loopback);
1556                 }
1557               else
1558                 {
1559                   clib_memcpy (from,
1560                                fragments_to_loopback + (len -
1561                                                         VLIB_FRAME_SIZE),
1562                                sizeof (u32) * VLIB_FRAME_SIZE);
1563                   n_left_from = VLIB_FRAME_SIZE;
1564                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
1565                 }
1566             }
1567         }
1568
1569       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1570     }
1571
1572   vlib_node_increment_counter (vm, nat64_in2out_reass_node.index,
1573                                NAT64_IN2OUT_ERROR_IN2OUT_PACKETS,
1574                                pkts_processed);
1575
1576   nat_send_all_to_node (vm, fragments_to_drop, node,
1577                         &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT],
1578                         NAT64_IN2OUT_NEXT_DROP);
1579
1580   vec_free (fragments_to_drop);
1581   vec_free (fragments_to_loopback);
1582   return frame->n_vectors;
1583 }
1584
1585 /* *INDENT-OFF* */
1586 VLIB_REGISTER_NODE (nat64_in2out_reass_node) = {
1587   .function = nat64_in2out_reass_node_fn,
1588   .name = "nat64-in2out-reass",
1589   .vector_size = sizeof (u32),
1590   .format_trace = format_nat64_in2out_reass_trace,
1591   .type = VLIB_NODE_TYPE_INTERNAL,
1592   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1593   .error_strings = nat64_in2out_error_strings,
1594   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1595   /* edit / add dispositions here */
1596   .next_nodes = {
1597     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1598     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1599     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1600     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1601     [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass",
1602   },
1603 };
1604 /* *INDENT-ON* */
1605
1606 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node,
1607                               nat64_in2out_reass_node_fn);
1608
1609 typedef struct
1610 {
1611   u32 next_worker_index;
1612   u8 do_handoff;
1613 } nat64_in2out_handoff_trace_t;
1614
1615 static u8 *
1616 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1617 {
1618   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1619   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1620   nat64_in2out_handoff_trace_t *t =
1621     va_arg (*args, nat64_in2out_handoff_trace_t *);
1622   char *m;
1623
1624   m = t->do_handoff ? "next worker" : "same worker";
1625   s = format (s, "NAT64-IN2OUT-HANDOFF: %s %d", m, t->next_worker_index);
1626
1627   return s;
1628 }
1629
1630 static inline uword
1631 nat64_in2out_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1632                               vlib_frame_t * frame)
1633 {
1634   nat64_main_t *nm = &nat64_main;
1635   vlib_thread_main_t *tm = vlib_get_thread_main ();
1636   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
1637   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
1638   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
1639     = 0;
1640   vlib_frame_queue_elt_t *hf = 0;
1641   vlib_frame_queue_t *fq;
1642   vlib_frame_t *f = 0, *d = 0;
1643   int i;
1644   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
1645   u32 next_worker_index = 0;
1646   u32 current_worker_index = ~0;
1647   u32 thread_index = vm->thread_index;
1648   u32 fq_index;
1649   u32 to_node_index;
1650
1651   fq_index = nm->fq_in2out_index;
1652   to_node_index = nat64_in2out_node.index;
1653
1654   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
1655     {
1656       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
1657
1658       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
1659                                tm->n_vlib_mains - 1,
1660                                (vlib_frame_queue_t *) (~0));
1661     }
1662
1663   from = vlib_frame_vector_args (frame);
1664   n_left_from = frame->n_vectors;
1665
1666   while (n_left_from > 0)
1667     {
1668       u32 bi0;
1669       vlib_buffer_t *b0;
1670       ip6_header_t *ip0;
1671       u8 do_handoff;
1672
1673       bi0 = from[0];
1674       from += 1;
1675       n_left_from -= 1;
1676
1677       b0 = vlib_get_buffer (vm, bi0);
1678
1679       ip0 = vlib_buffer_get_current (b0);
1680
1681       next_worker_index = nat64_get_worker_in2out (&ip0->src_address);
1682
1683       if (PREDICT_FALSE (next_worker_index != thread_index))
1684         {
1685           do_handoff = 1;
1686
1687           if (next_worker_index != current_worker_index)
1688             {
1689               fq =
1690                 is_vlib_frame_queue_congested (fq_index, next_worker_index,
1691                                                30,
1692                                                congested_handoff_queue_by_worker_index);
1693
1694               if (fq)
1695                 {
1696                   /* if this is 1st frame */
1697                   if (!d)
1698                     {
1699                       d = vlib_get_frame_to_node (vm, nm->error_node_index);
1700                       to_next_drop = vlib_frame_vector_args (d);
1701                     }
1702
1703                   to_next_drop[0] = bi0;
1704                   to_next_drop += 1;
1705                   d->n_vectors++;
1706                   goto trace0;
1707                 }
1708
1709               if (hf)
1710                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1711
1712               hf =
1713                 vlib_get_worker_handoff_queue_elt (fq_index,
1714                                                    next_worker_index,
1715                                                    handoff_queue_elt_by_worker_index);
1716               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1717               to_next_worker = &hf->buffer_index[hf->n_vectors];
1718               current_worker_index = next_worker_index;
1719             }
1720
1721           ASSERT (to_next_worker != 0);
1722
1723           /* enqueue to correct worker thread */
1724           to_next_worker[0] = bi0;
1725           to_next_worker++;
1726           n_left_to_next_worker--;
1727
1728           if (n_left_to_next_worker == 0)
1729             {
1730               hf->n_vectors = VLIB_FRAME_SIZE;
1731               vlib_put_frame_queue_elt (hf);
1732               current_worker_index = ~0;
1733               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1734               hf = 0;
1735             }
1736         }
1737       else
1738         {
1739           do_handoff = 0;
1740           /* if this is 1st frame */
1741           if (!f)
1742             {
1743               f = vlib_get_frame_to_node (vm, to_node_index);
1744               to_next = vlib_frame_vector_args (f);
1745             }
1746
1747           to_next[0] = bi0;
1748           to_next += 1;
1749           f->n_vectors++;
1750         }
1751
1752     trace0:
1753       if (PREDICT_FALSE
1754           ((node->flags & VLIB_NODE_FLAG_TRACE)
1755            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1756         {
1757           nat64_in2out_handoff_trace_t *t =
1758             vlib_add_trace (vm, node, b0, sizeof (*t));
1759           t->next_worker_index = next_worker_index;
1760           t->do_handoff = do_handoff;
1761         }
1762     }
1763
1764   if (f)
1765     vlib_put_frame_to_node (vm, to_node_index, f);
1766
1767   if (d)
1768     vlib_put_frame_to_node (vm, nm->error_node_index, d);
1769
1770   if (hf)
1771     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1772
1773   /* Ship frames to the worker nodes */
1774   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1775     {
1776       if (handoff_queue_elt_by_worker_index[i])
1777         {
1778           hf = handoff_queue_elt_by_worker_index[i];
1779           /*
1780            * It works better to let the handoff node
1781            * rate-adapt, always ship the handoff queue element.
1782            */
1783           if (1 || hf->n_vectors == hf->last_n_vectors)
1784             {
1785               vlib_put_frame_queue_elt (hf);
1786               handoff_queue_elt_by_worker_index[i] = 0;
1787             }
1788           else
1789             hf->last_n_vectors = hf->n_vectors;
1790         }
1791       congested_handoff_queue_by_worker_index[i] =
1792         (vlib_frame_queue_t *) (~0);
1793     }
1794   hf = 0;
1795   current_worker_index = ~0;
1796   return frame->n_vectors;
1797 }
1798
1799 /* *INDENT-OFF* */
1800 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1801   .function = nat64_in2out_handoff_node_fn,
1802   .name = "nat64-in2out-handoff",
1803   .vector_size = sizeof (u32),
1804   .format_trace = format_nat64_in2out_handoff_trace,
1805   .type = VLIB_NODE_TYPE_INTERNAL,
1806
1807   .n_next_nodes = 1,
1808
1809   .next_nodes = {
1810     [0] = "error-drop",
1811   },
1812 };
1813 /* *INDENT-ON* */
1814
1815 VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_handoff_node,
1816                               nat64_in2out_handoff_node_fn);
1817
1818 /*
1819  * fd.io coding-style-patch-verification: ON
1820  *
1821  * Local Variables:
1822  * eval: (c-set-style "gnu")
1823  * End:
1824  */