nat: cleanup & reorganization
[vpp.git] / src / plugins / nat / nat64 / nat64_in2out.c
1 /*
2  * Copyright (c) 2020 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <nat/nat64/nat64.h>
17 #include <vnet/ip/ip6_to_ip4.h>
18 #include <vnet/fib/fib_table.h>
19 #include <nat/lib/nat_inlines.h>
20
21 typedef struct
22 {
23   u32 sw_if_index;
24   u32 next_index;
25   u8 is_slow_path;
26 } nat64_in2out_trace_t;
27
28 static u8 *
29 format_nat64_in2out_trace (u8 * s, va_list * args)
30 {
31   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
32   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
33   nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
34   char *tag;
35
36   tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
37
38   s =
39     format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
40             t->next_index);
41
42   return s;
43 }
44
45 #define foreach_nat64_in2out_error                       \
46 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
47 _(NO_TRANSLATION, "no translation")                      \
48 _(UNKNOWN, "unknown")
49
50
51 typedef enum
52 {
53 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
54   foreach_nat64_in2out_error
55 #undef _
56     NAT64_IN2OUT_N_ERROR,
57 } nat64_in2out_error_t;
58
59 static char *nat64_in2out_error_strings[] = {
60 #define _(sym,string) string,
61   foreach_nat64_in2out_error
62 #undef _
63 };
64
65 typedef enum
66 {
67   NAT64_IN2OUT_NEXT_IP4_LOOKUP,
68   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
69   NAT64_IN2OUT_NEXT_DROP,
70   NAT64_IN2OUT_NEXT_SLOWPATH,
71   NAT64_IN2OUT_N_NEXT,
72 } nat64_in2out_next_t;
73
74 typedef struct nat64_in2out_set_ctx_t_
75 {
76   vlib_buffer_t *b;
77   vlib_main_t *vm;
78   u32 thread_index;
79 } nat64_in2out_set_ctx_t;
80
81 static inline u8
82 nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr)
83 {
84   ip6_address_t *addr;
85   ip6_main_t *im6 = &ip6_main;
86   ip_lookup_main_t *lm6 = &im6->lookup_main;
87   ip_interface_address_t *ia = 0;
88
89   /* *INDENT-OFF* */
90   foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
91   ({
92         addr = ip_interface_address_get_address (lm6, ia);
93         if (0 == ip6_address_compare (addr, &ip6_addr))
94                 return 1;
95   }));
96   /* *INDENT-ON* */
97
98   return 0;
99 }
100
101 /**
102  * @brief Check whether is a hairpinning.
103  *
104  * If the destination IP address of the packet is an IPv4 address assigned to
105  * the NAT64 itself, then the packet is a hairpin packet.
106  *
107  * param dst_addr Destination address of the packet.
108  *
109  * @returns 1 if hairpinning, otherwise 0.
110  */
111 static_always_inline int
112 is_hairpinning (ip6_address_t * dst_addr)
113 {
114   nat64_main_t *nm = &nat64_main;
115   int i;
116
117   for (i = 0; i < vec_len (nm->addr_pool); i++)
118     {
119       if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
120         return 1;
121     }
122
123   return 0;
124 }
125
126 static int
127 nat64_in2out_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p, u16 l4_offset,
128                       u16 frag_hdr_offset, nat64_in2out_set_ctx_t * ctx)
129 {
130   ip6_header_t *ip6;
131   ip_csum_t csum = 0;
132   ip4_header_t *ip4;
133   u16 fragment_id;
134   u8 frag_more;
135   u16 frag_offset;
136   nat64_main_t *nm = &nat64_main;
137   nat64_db_bib_entry_t *bibe;
138   nat64_db_st_entry_t *ste;
139   ip46_address_t old_saddr, old_daddr;
140   ip4_address_t new_daddr;
141   u32 sw_if_index, fib_index;
142   u8 proto = vnet_buffer (p)->ip.reass.ip_proto;
143   u16 sport = vnet_buffer (p)->ip.reass.l4_src_port;
144   u16 dport = vnet_buffer (p)->ip.reass.l4_dst_port;
145   nat64_db_t *db = &nm->db[ctx->thread_index];
146
147   ip6 = vlib_buffer_get_current (p);
148
149   vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
150   ip4 = vlib_buffer_get_current (p);
151
152   u32 ip_version_traffic_class_and_flow_label =
153     ip6->ip_version_traffic_class_and_flow_label;
154   u16 payload_length = ip6->payload_length;
155   u8 hop_limit = ip6->hop_limit;
156
157   old_saddr.as_u64[0] = ip6->src_address.as_u64[0];
158   old_saddr.as_u64[1] = ip6->src_address.as_u64[1];
159   old_daddr.as_u64[0] = ip6->dst_address.as_u64[0];
160   old_daddr.as_u64[1] = ip6->dst_address.as_u64[1];
161
162   if (PREDICT_FALSE (frag_hdr_offset))
163     {
164       //Only the first fragment
165       ip6_frag_hdr_t *hdr =
166         (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
167       fragment_id = frag_id_6to4 (hdr->identification);
168       frag_more = ip6_frag_hdr_more (hdr);
169       frag_offset = ip6_frag_hdr_offset (hdr);
170     }
171   else
172     {
173       fragment_id = 0;
174       frag_offset = 0;
175       frag_more = 0;
176     }
177
178   ip4->ip_version_and_header_length =
179     IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
180   ip4->tos = ip6_translate_tos (ip_version_traffic_class_and_flow_label);
181   ip4->length =
182     u16_net_add (payload_length, sizeof (*ip4) + sizeof (*ip6) - l4_offset);
183   ip4->fragment_id = fragment_id;
184   ip4->flags_and_fragment_offset =
185     clib_host_to_net_u16 (frag_offset |
186                           (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
187   ip4->ttl = hop_limit;
188   ip4->protocol = (proto == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : proto;
189
190   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
191   fib_index =
192     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
193
194   ste =
195     nat64_db_st_entry_find (db, &old_saddr, &old_daddr, sport, dport, proto,
196                             fib_index, 1);
197
198   if (ste)
199     {
200       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
201       if (!bibe)
202         return -1;
203     }
204   else
205     {
206       bibe =
207         nat64_db_bib_entry_find (db, &old_saddr, sport, proto, fib_index, 1);
208
209       if (!bibe)
210         {
211           u16 out_port;
212           ip4_address_t out_addr;
213           if (nat64_alloc_out_addr_and_port
214               (fib_index, ip_proto_to_nat_proto (proto), &out_addr,
215                &out_port, ctx->thread_index))
216             return -1;
217
218           bibe =
219             nat64_db_bib_entry_create (ctx->thread_index, db,
220                                        &old_saddr.ip6, &out_addr, sport,
221                                        out_port, fib_index, proto, 0);
222           if (!bibe)
223             return -1;
224
225           vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0,
226                                    db->bib.bib_entries_num);
227         }
228
229       nat64_extract_ip4 (&old_daddr.ip6, &new_daddr, fib_index);
230       ste =
231         nat64_db_st_entry_create (ctx->thread_index, db, bibe,
232                                   &old_daddr.ip6, &new_daddr, dport);
233       if (!ste)
234         return -1;
235
236       vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
237                                db->st.st_entries_num);
238     }
239
240   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
241   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
242
243   ip4->checksum = ip4_header_checksum (ip4);
244
245   if (!vnet_buffer (p)->ip.reass.is_non_first_fragment)
246     {
247       udp_header_t *udp = (udp_header_t *) (ip4 + 1);
248       udp->src_port = bibe->out_port;
249
250       //UDP checksum is optional over IPv4
251       if (proto == IP_PROTOCOL_UDP)
252         {
253           udp->checksum = 0;
254         }
255       else
256         {
257           tcp_header_t *tcp = (tcp_header_t *) (ip4 + 1);
258           csum = ip_csum_sub_even (tcp->checksum, old_saddr.as_u64[0]);
259           csum = ip_csum_sub_even (csum, old_saddr.as_u64[1]);
260           csum = ip_csum_sub_even (csum, old_daddr.as_u64[0]);
261           csum = ip_csum_sub_even (csum, old_daddr.as_u64[1]);
262           csum = ip_csum_add_even (csum, ip4->dst_address.as_u32);
263           csum = ip_csum_add_even (csum, ip4->src_address.as_u32);
264           csum = ip_csum_sub_even (csum, sport);
265           csum = ip_csum_add_even (csum, udp->src_port);
266           mss_clamping (nm->mss_clamping, tcp, &csum);
267           tcp->checksum = ip_csum_fold (csum);
268
269           nat64_tcp_session_set_state (ste, tcp, 1);
270         }
271     }
272
273   nat64_session_reset_timeout (ste, ctx->vm);
274
275   return 0;
276 }
277
278 static int
279 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
280 {
281   nat64_main_t *nm = &nat64_main;
282   nat64_in2out_set_ctx_t *ctx = arg;
283   nat64_db_bib_entry_t *bibe;
284   nat64_db_st_entry_t *ste;
285   ip46_address_t saddr, daddr;
286   u32 sw_if_index, fib_index;
287   icmp46_header_t *icmp = ip6_next_header (ip6);
288   nat64_db_t *db = &nm->db[ctx->thread_index];
289
290   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
291   fib_index =
292     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
293
294   saddr.as_u64[0] = ip6->src_address.as_u64[0];
295   saddr.as_u64[1] = ip6->src_address.as_u64[1];
296   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
297   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
298
299   if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
300     {
301       u16 in_id = ((u16 *) (icmp))[2];
302       ste =
303         nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
304                                 IP_PROTOCOL_ICMP, fib_index, 1);
305
306       if (ste)
307         {
308           bibe =
309             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
310                                          ste->bibe_index);
311           if (!bibe)
312             return -1;
313         }
314       else
315         {
316           bibe =
317             nat64_db_bib_entry_find (db, &saddr, in_id,
318                                      IP_PROTOCOL_ICMP, fib_index, 1);
319
320           if (!bibe)
321             {
322               u16 out_id;
323               ip4_address_t out_addr;
324               if (nat64_alloc_out_addr_and_port
325                   (fib_index, NAT_PROTOCOL_ICMP, &out_addr, &out_id,
326                    ctx->thread_index))
327                 return -1;
328
329               bibe =
330                 nat64_db_bib_entry_create (ctx->thread_index, db,
331                                            &ip6->src_address, &out_addr,
332                                            in_id, out_id, fib_index,
333                                            IP_PROTOCOL_ICMP, 0);
334               if (!bibe)
335                 return -1;
336
337               vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0,
338                                        db->bib.bib_entries_num);
339             }
340
341           nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
342           ste =
343             nat64_db_st_entry_create (ctx->thread_index, db, bibe,
344                                       &ip6->dst_address, &daddr.ip4, 0);
345           if (!ste)
346             return -1;
347
348           vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
349                                    db->st.st_entries_num);
350         }
351
352       nat64_session_reset_timeout (ste, ctx->vm);
353
354       ip4->src_address.as_u32 = bibe->out_addr.as_u32;
355       ((u16 *) (icmp))[2] = bibe->out_port;
356
357       ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
358     }
359   else
360     {
361       if (!vec_len (nm->addr_pool))
362         return -1;
363
364       ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
365       nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
366     }
367
368   return 0;
369 }
370
371 static int
372 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
373                                 void *arg)
374 {
375   nat64_main_t *nm = &nat64_main;
376   nat64_in2out_set_ctx_t *ctx = arg;
377   nat64_db_st_entry_t *ste;
378   nat64_db_bib_entry_t *bibe;
379   ip46_address_t saddr, daddr;
380   u32 sw_if_index, fib_index;
381   u8 proto = ip6->protocol;
382   nat64_db_t *db = &nm->db[ctx->thread_index];
383
384   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
385   fib_index =
386     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
387
388   saddr.as_u64[0] = ip6->src_address.as_u64[0];
389   saddr.as_u64[1] = ip6->src_address.as_u64[1];
390   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
391   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
392
393   if (proto == IP_PROTOCOL_ICMP6)
394     {
395       icmp46_header_t *icmp = ip6_next_header (ip6);
396       u16 in_id = ((u16 *) (icmp))[2];
397       proto = IP_PROTOCOL_ICMP;
398
399       if (!
400           (icmp->type == ICMP4_echo_request
401            || icmp->type == ICMP4_echo_reply))
402         return -1;
403
404       ste =
405         nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
406                                 fib_index, 1);
407       if (!ste)
408         return -1;
409
410       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
411       if (!bibe)
412         return -1;
413
414       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
415       ((u16 *) (icmp))[2] = bibe->out_port;
416       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
417     }
418   else
419     {
420       udp_header_t *udp = ip6_next_header (ip6);
421       tcp_header_t *tcp = ip6_next_header (ip6);
422       u16 *checksum;
423       ip_csum_t csum;
424
425       u16 sport = udp->src_port;
426       u16 dport = udp->dst_port;
427
428       ste =
429         nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
430                                 fib_index, 1);
431       if (!ste)
432         return -1;
433
434       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
435       if (!bibe)
436         return -1;
437
438       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
439       udp->dst_port = bibe->out_port;
440       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
441
442       if (proto == IP_PROTOCOL_TCP)
443         checksum = &tcp->checksum;
444       else
445         checksum = &udp->checksum;
446       csum = ip_csum_sub_even (*checksum, dport);
447       csum = ip_csum_add_even (csum, udp->dst_port);
448       *checksum = ip_csum_fold (csum);
449     }
450
451   return 0;
452 }
453
454 typedef struct unk_proto_st_walk_ctx_t_
455 {
456   ip6_address_t src_addr;
457   ip6_address_t dst_addr;
458   ip4_address_t out_addr;
459   u32 fib_index;
460   u32 thread_index;
461   u8 proto;
462 } unk_proto_st_walk_ctx_t;
463
464 static int
465 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
466 {
467   nat64_main_t *nm = &nat64_main;
468   unk_proto_st_walk_ctx_t *ctx = arg;
469   nat64_db_bib_entry_t *bibe;
470   ip46_address_t saddr, daddr;
471   nat64_db_t *db = &nm->db[ctx->thread_index];
472
473   if (ip6_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
474     {
475       bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
476       if (!bibe)
477         return -1;
478
479       if (ip6_address_is_equal (&bibe->in_addr, &ctx->src_addr)
480           && bibe->fib_index == ctx->fib_index)
481         {
482           clib_memset (&saddr, 0, sizeof (saddr));
483           saddr.ip4.as_u32 = bibe->out_addr.as_u32;
484           clib_memset (&daddr, 0, sizeof (daddr));
485           nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
486
487           if (nat64_db_st_entry_find
488               (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
489             return -1;
490
491           ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
492           return 1;
493         }
494     }
495
496   return 0;
497 }
498
499 static int
500 nat64_in2out_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, u8 l4_protocol,
501                         u16 l4_offset, u16 frag_hdr_offset,
502                         nat64_in2out_set_ctx_t * s_ctx)
503 {
504   ip6_header_t *ip6;
505   ip4_header_t *ip4;
506   u16 fragment_id;
507   u16 frag_offset;
508   u8 frag_more;
509
510   ip6 = vlib_buffer_get_current (p);
511
512   ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
513
514   vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
515
516   if (PREDICT_FALSE (frag_hdr_offset))
517     {
518       //Only the first fragment
519       ip6_frag_hdr_t *hdr =
520         (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
521       fragment_id = frag_id_6to4 (hdr->identification);
522       frag_offset = ip6_frag_hdr_offset (hdr);
523       frag_more = ip6_frag_hdr_more (hdr);
524     }
525   else
526     {
527       fragment_id = 0;
528       frag_offset = 0;
529       frag_more = 0;
530     }
531
532   nat64_main_t *nm = &nat64_main;
533   nat64_db_bib_entry_t *bibe;
534   nat64_db_st_entry_t *ste;
535   ip46_address_t saddr, daddr, addr;
536   u32 sw_if_index, fib_index;
537   int i;
538   nat64_db_t *db = &nm->db[s_ctx->thread_index];
539
540   sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
541   fib_index =
542     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
543
544   saddr.as_u64[0] = ip6->src_address.as_u64[0];
545   saddr.as_u64[1] = ip6->src_address.as_u64[1];
546   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
547   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
548
549   ste =
550     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, l4_protocol, fib_index,
551                             1);
552
553   if (ste)
554     {
555       bibe = nat64_db_bib_entry_by_index (db, l4_protocol, ste->bibe_index);
556       if (!bibe)
557         return -1;
558     }
559   else
560     {
561       bibe =
562         nat64_db_bib_entry_find (db, &saddr, 0, l4_protocol, fib_index, 1);
563
564       if (!bibe)
565         {
566           /* Choose same out address as for TCP/UDP session to same dst */
567           unk_proto_st_walk_ctx_t ctx = {
568             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
569             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
570             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
571             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
572             .out_addr.as_u32 = 0,
573             .fib_index = fib_index,
574             .proto = l4_protocol,
575             .thread_index = s_ctx->thread_index,
576           };
577
578           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
579
580           if (!ctx.out_addr.as_u32)
581             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
582
583           /* Verify if out address is not already in use for protocol */
584           clib_memset (&addr, 0, sizeof (addr));
585           addr.ip4.as_u32 = ctx.out_addr.as_u32;
586           if (nat64_db_bib_entry_find (db, &addr, 0, l4_protocol, 0, 0))
587             ctx.out_addr.as_u32 = 0;
588
589           if (!ctx.out_addr.as_u32)
590             {
591               for (i = 0; i < vec_len (nm->addr_pool); i++)
592                 {
593                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
594                   if (!nat64_db_bib_entry_find
595                       (db, &addr, 0, l4_protocol, 0, 0))
596                     break;
597                 }
598             }
599
600           if (!ctx.out_addr.as_u32)
601             return -1;
602
603           bibe =
604             nat64_db_bib_entry_create (s_ctx->thread_index, db,
605                                        &ip6->src_address, &ctx.out_addr,
606                                        0, 0, fib_index, l4_protocol, 0);
607           if (!bibe)
608             return -1;
609
610           vlib_set_simple_counter (&nm->total_bibs, s_ctx->thread_index, 0,
611                                    db->bib.bib_entries_num);
612         }
613
614       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
615       ste =
616         nat64_db_st_entry_create (s_ctx->thread_index, db, bibe,
617                                   &ip6->dst_address, &daddr.ip4, 0);
618       if (!ste)
619         return -1;
620
621       vlib_set_simple_counter (&nm->total_sessions, s_ctx->thread_index, 0,
622                                db->st.st_entries_num);
623     }
624
625   nat64_session_reset_timeout (ste, s_ctx->vm);
626
627   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
628   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
629
630   ip4->ip_version_and_header_length =
631     IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
632   ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label);
633   ip4->length = u16_net_add (ip6->payload_length,
634                              sizeof (*ip4) + sizeof (*ip6) - l4_offset);
635   ip4->fragment_id = fragment_id;
636   ip4->flags_and_fragment_offset =
637     clib_host_to_net_u16 (frag_offset |
638                           (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
639   ip4->ttl = ip6->hop_limit;
640   ip4->protocol = l4_protocol;
641   ip4->checksum = ip4_header_checksum (ip4);
642
643   return 0;
644 }
645
646 static int
647 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
648                                   ip6_header_t * ip6, u32 l4_offset,
649                                   u32 thread_index)
650 {
651   nat64_main_t *nm = &nat64_main;
652   nat64_db_bib_entry_t *bibe;
653   nat64_db_st_entry_t *ste;
654   ip46_address_t saddr, daddr;
655   u32 sw_if_index, fib_index;
656   udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, l4_offset);
657   tcp_header_t *tcp = (tcp_header_t *) u8_ptr_add (ip6, l4_offset);
658   u8 proto = vnet_buffer (b)->ip.reass.ip_proto;
659   u16 sport = vnet_buffer (b)->ip.reass.l4_src_port;
660   u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port;
661   u16 *checksum = NULL;
662   ip_csum_t csum = 0;
663   nat64_db_t *db = &nm->db[thread_index];
664
665   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
666   fib_index =
667     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
668
669   saddr.as_u64[0] = ip6->src_address.as_u64[0];
670   saddr.as_u64[1] = ip6->src_address.as_u64[1];
671   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
672   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
673
674   if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
675     {
676       if (proto == IP_PROTOCOL_UDP)
677         checksum = &udp->checksum;
678       else
679         checksum = &tcp->checksum;
680       csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
681       csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
682       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
683       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
684     }
685
686   ste =
687     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
688                             fib_index, 1);
689
690   if (ste)
691     {
692       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
693       if (!bibe)
694         return -1;
695     }
696   else
697     {
698       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
699
700       if (!bibe)
701         {
702           u16 out_port;
703           ip4_address_t out_addr;
704           if (nat64_alloc_out_addr_and_port
705               (fib_index, ip_proto_to_nat_proto (proto), &out_addr,
706                &out_port, thread_index))
707             return -1;
708
709           bibe =
710             nat64_db_bib_entry_create (thread_index, db, &ip6->src_address,
711                                        &out_addr, sport, out_port, fib_index,
712                                        proto, 0);
713           if (!bibe)
714             return -1;
715
716           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
717                                    db->bib.bib_entries_num);
718         }
719
720       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
721       ste =
722         nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address,
723                                   &daddr.ip4, dport);
724       if (!ste)
725         return -1;
726
727       vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
728                                db->st.st_entries_num);
729     }
730
731   if (proto == IP_PROTOCOL_TCP)
732     nat64_tcp_session_set_state (ste, tcp, 1);
733
734   nat64_session_reset_timeout (ste, vm);
735
736   if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
737     {
738       udp->src_port = bibe->out_port;
739     }
740
741   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
742
743   clib_memset (&daddr, 0, sizeof (daddr));
744   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
745
746   bibe = 0;
747   /* *INDENT-OFF* */
748   vec_foreach (db, nm->db)
749     {
750       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
751
752       if (bibe)
753         break;
754     }
755   /* *INDENT-ON* */
756
757   if (!bibe)
758     return -1;
759
760   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
761   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
762
763   if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
764     {
765       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
766       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
767       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
768       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
769       csum = ip_csum_sub_even (csum, sport);
770       csum = ip_csum_sub_even (csum, dport);
771       udp->dst_port = bibe->in_port;
772       csum = ip_csum_add_even (csum, udp->src_port);
773       csum = ip_csum_add_even (csum, udp->dst_port);
774       *checksum = ip_csum_fold (csum);
775     }
776
777   return 0;
778 }
779
780 static int
781 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
782                                ip6_header_t * ip6, u32 thread_index)
783 {
784   nat64_main_t *nm = &nat64_main;
785   nat64_db_bib_entry_t *bibe;
786   nat64_db_st_entry_t *ste;
787   icmp46_header_t *icmp = ip6_next_header (ip6);
788   ip6_header_t *inner_ip6;
789   ip46_address_t saddr, daddr;
790   u32 sw_if_index, fib_index;
791   u8 proto;
792   udp_header_t *udp;
793   tcp_header_t *tcp;
794   u16 *checksum, sport, dport;
795   ip_csum_t csum;
796   nat64_db_t *db = &nm->db[thread_index];
797
798   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
799     return -1;
800
801   inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
802
803   proto = inner_ip6->protocol;
804
805   if (proto == IP_PROTOCOL_ICMP6)
806     return -1;
807
808   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
809   fib_index =
810     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
811
812   saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
813   saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
814   daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
815   daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
816
817   udp = ip6_next_header (inner_ip6);
818   tcp = ip6_next_header (inner_ip6);
819
820   sport = udp->src_port;
821   dport = udp->dst_port;
822
823   if (proto == IP_PROTOCOL_UDP)
824     checksum = &udp->checksum;
825   else
826     checksum = &tcp->checksum;
827
828   csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
829   csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
830   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
831   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
832   csum = ip_csum_sub_even (csum, sport);
833   csum = ip_csum_sub_even (csum, dport);
834
835   ste =
836     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
837                             fib_index, 1);
838   if (!ste)
839     return -1;
840
841   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
842   if (!bibe)
843     return -1;
844
845   dport = udp->dst_port = bibe->out_port;
846   nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
847
848   clib_memset (&saddr, 0, sizeof (saddr));
849   clib_memset (&daddr, 0, sizeof (daddr));
850   saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
851   daddr.ip4.as_u32 = bibe->out_addr.as_u32;
852
853   ste = 0;
854   /* *INDENT-OFF* */
855   vec_foreach (db, nm->db)
856     {
857       ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
858                                     0, 0);
859
860       if (ste)
861         break;
862     }
863   /* *INDENT-ON* */
864
865   if (!ste)
866     return -1;
867
868   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
869   if (!bibe)
870     return -1;
871
872   inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
873   inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
874   udp->src_port = bibe->in_port;
875
876   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
877   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
878   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
879   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
880   csum = ip_csum_add_even (csum, udp->src_port);
881   csum = ip_csum_add_even (csum, udp->dst_port);
882   *checksum = ip_csum_fold (csum);
883
884   if (!vec_len (nm->addr_pool))
885     return -1;
886
887   nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
888   ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
889   ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
890
891   icmp->checksum = 0;
892   csum = ip_csum_with_carry (0, ip6->payload_length);
893   csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
894   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
895   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
896   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
897   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
898   csum =
899     ip_incremental_checksum (csum, icmp,
900                              clib_net_to_host_u16 (ip6->payload_length));
901   icmp->checksum = ~ip_csum_fold (csum);
902
903   return 0;
904 }
905
906 static int
907 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
908                                     ip6_header_t * ip6, u32 thread_index)
909 {
910   nat64_main_t *nm = &nat64_main;
911   nat64_db_bib_entry_t *bibe;
912   nat64_db_st_entry_t *ste;
913   ip46_address_t saddr, daddr, addr;
914   u32 sw_if_index, fib_index;
915   u8 proto = ip6->protocol;
916   int i;
917   nat64_db_t *db = &nm->db[thread_index];
918
919   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
920   fib_index =
921     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
922
923   saddr.as_u64[0] = ip6->src_address.as_u64[0];
924   saddr.as_u64[1] = ip6->src_address.as_u64[1];
925   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
926   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
927
928   ste =
929     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
930
931   if (ste)
932     {
933       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
934       if (!bibe)
935         return -1;
936     }
937   else
938     {
939       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
940
941       if (!bibe)
942         {
943           /* Choose same out address as for TCP/UDP session to same dst */
944           unk_proto_st_walk_ctx_t ctx = {
945             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
946             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
947             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
948             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
949             .out_addr.as_u32 = 0,
950             .fib_index = fib_index,
951             .proto = proto,
952             .thread_index = thread_index,
953           };
954
955           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
956
957           if (!ctx.out_addr.as_u32)
958             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
959
960           /* Verify if out address is not already in use for protocol */
961           clib_memset (&addr, 0, sizeof (addr));
962           addr.ip4.as_u32 = ctx.out_addr.as_u32;
963           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
964             ctx.out_addr.as_u32 = 0;
965
966           if (!ctx.out_addr.as_u32)
967             {
968               for (i = 0; i < vec_len (nm->addr_pool); i++)
969                 {
970                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
971                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
972                     break;
973                 }
974             }
975
976           if (!ctx.out_addr.as_u32)
977             return -1;
978
979           bibe =
980             nat64_db_bib_entry_create (thread_index, db, &ip6->src_address,
981                                        &ctx.out_addr, 0, 0, fib_index, proto,
982                                        0);
983           if (!bibe)
984             return -1;
985
986           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
987                                    db->bib.bib_entries_num);
988         }
989
990       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
991       ste =
992         nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address,
993                                   &daddr.ip4, 0);
994       if (!ste)
995         return -1;
996
997       vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
998                                db->st.st_entries_num);
999     }
1000
1001   nat64_session_reset_timeout (ste, vm);
1002
1003   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
1004
1005   clib_memset (&daddr, 0, sizeof (daddr));
1006   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1007
1008   bibe = 0;
1009   /* *INDENT-OFF* */
1010   vec_foreach (db, nm->db)
1011     {
1012       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
1013
1014       if (bibe)
1015         break;
1016     }
1017   /* *INDENT-ON* */
1018
1019   if (!bibe)
1020     return -1;
1021
1022   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1023   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1024
1025   return 0;
1026 }
1027
1028 static inline uword
1029 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1030                              vlib_frame_t * frame, u8 is_slow_path)
1031 {
1032   u32 n_left_from, *from, *to_next;
1033   nat64_in2out_next_t next_index;
1034   u32 thread_index = vm->thread_index;
1035   nat64_main_t *nm = &nat64_main;
1036
1037   from = vlib_frame_vector_args (frame);
1038   n_left_from = frame->n_vectors;
1039   next_index = node->cached_next_index;
1040
1041   while (n_left_from > 0)
1042     {
1043       u32 n_left_to_next;
1044
1045       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1046
1047       while (n_left_from > 0 && n_left_to_next > 0)
1048         {
1049           u32 bi0;
1050           vlib_buffer_t *b0;
1051           u32 next0;
1052           ip6_header_t *ip60;
1053           u16 l4_offset0, frag_hdr_offset0;
1054           u8 l4_protocol0;
1055           u32 proto0;
1056           nat64_in2out_set_ctx_t ctx0;
1057           u32 sw_if_index0;
1058
1059           /* speculatively enqueue b0 to the current next frame */
1060           bi0 = from[0];
1061           to_next[0] = bi0;
1062           from += 1;
1063           to_next += 1;
1064           n_left_from -= 1;
1065           n_left_to_next -= 1;
1066
1067           b0 = vlib_get_buffer (vm, bi0);
1068           ip60 = vlib_buffer_get_current (b0);
1069
1070           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1071
1072           ctx0.b = b0;
1073           ctx0.vm = vm;
1074           ctx0.thread_index = thread_index;
1075
1076           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1077
1078           if (PREDICT_FALSE
1079               (ip6_parse
1080                (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1081                 &frag_hdr_offset0)))
1082             {
1083               next0 = NAT64_IN2OUT_NEXT_DROP;
1084               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1085               goto trace0;
1086             }
1087
1088           if (nat64_not_translate (sw_if_index0, ip60->dst_address))
1089             {
1090               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1091               goto trace0;
1092             }
1093
1094           proto0 = ip_proto_to_nat_proto (l4_protocol0);
1095
1096           if (is_slow_path)
1097             {
1098               if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_OTHER))
1099                 {
1100                   vlib_increment_simple_counter (&nm->counters.in2out.other,
1101                                                  thread_index, sw_if_index0,
1102                                                  1);
1103                   if (is_hairpinning (&ip60->dst_address))
1104                     {
1105                       next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1106                       if (nat64_in2out_unk_proto_hairpinning
1107                           (vm, b0, ip60, thread_index))
1108                         {
1109                           next0 = NAT64_IN2OUT_NEXT_DROP;
1110                           b0->error =
1111                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1112                         }
1113                       goto trace0;
1114                     }
1115
1116                   if (nat64_in2out_unk_proto
1117                       (vm, b0, l4_protocol0, l4_offset0, frag_hdr_offset0,
1118                        &ctx0))
1119                     {
1120                       next0 = NAT64_IN2OUT_NEXT_DROP;
1121                       b0->error =
1122                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1123                       goto trace0;
1124                     }
1125                 }
1126               goto trace0;
1127             }
1128           else
1129             {
1130               if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1131                 {
1132                   next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
1133                   goto trace0;
1134                 }
1135             }
1136
1137           if (proto0 == NAT_PROTOCOL_ICMP)
1138             {
1139               vlib_increment_simple_counter (&nm->counters.in2out.icmp,
1140                                              thread_index, sw_if_index0, 1);
1141               if (is_hairpinning (&ip60->dst_address))
1142                 {
1143                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1144                   if (nat64_in2out_icmp_hairpinning
1145                       (vm, b0, ip60, thread_index))
1146                     {
1147                       next0 = NAT64_IN2OUT_NEXT_DROP;
1148                       b0->error =
1149                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1150                     }
1151                   goto trace0;
1152                 }
1153
1154               if (icmp6_to_icmp
1155                   (vm, b0, nat64_in2out_icmp_set_cb, &ctx0,
1156                    nat64_in2out_inner_icmp_set_cb, &ctx0))
1157                 {
1158                   next0 = NAT64_IN2OUT_NEXT_DROP;
1159                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1160                   goto trace0;
1161                 }
1162             }
1163           else if (proto0 == NAT_PROTOCOL_TCP || proto0 == NAT_PROTOCOL_UDP)
1164             {
1165               if (proto0 == NAT_PROTOCOL_TCP)
1166                 vlib_increment_simple_counter (&nm->counters.in2out.tcp,
1167                                                thread_index, sw_if_index0, 1);
1168               else
1169                 vlib_increment_simple_counter (&nm->counters.in2out.udp,
1170                                                thread_index, sw_if_index0, 1);
1171
1172               if (is_hairpinning (&ip60->dst_address))
1173                 {
1174                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1175                   if (nat64_in2out_tcp_udp_hairpinning
1176                       (vm, b0, ip60, l4_offset0, thread_index))
1177                     {
1178                       next0 = NAT64_IN2OUT_NEXT_DROP;
1179                       b0->error =
1180                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1181                     }
1182                   goto trace0;
1183                 }
1184
1185               if (nat64_in2out_tcp_udp
1186                   (vm, b0, l4_offset0, frag_hdr_offset0, &ctx0))
1187                 {
1188                   next0 = NAT64_IN2OUT_NEXT_DROP;
1189                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1190                   goto trace0;
1191                 }
1192             }
1193
1194         trace0:
1195           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1196                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1197             {
1198               nat64_in2out_trace_t *t =
1199                 vlib_add_trace (vm, node, b0, sizeof (*t));
1200               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1201               t->next_index = next0;
1202               t->is_slow_path = is_slow_path;
1203             }
1204
1205           if (next0 == NAT64_IN2OUT_NEXT_DROP)
1206             {
1207               vlib_increment_simple_counter (&nm->counters.in2out.drops,
1208                                              thread_index, sw_if_index0, 1);
1209             }
1210
1211
1212           /* verify speculative enqueue, maybe switch current next frame */
1213           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1214                                            n_left_to_next, bi0, next0);
1215         }
1216       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1217     }
1218
1219   return frame->n_vectors;
1220 }
1221
1222 VLIB_NODE_FN (nat64_in2out_node) (vlib_main_t * vm,
1223                                   vlib_node_runtime_t * node,
1224                                   vlib_frame_t * frame)
1225 {
1226   return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1227 }
1228
1229 /* *INDENT-OFF* */
1230 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1231   .name = "nat64-in2out",
1232   .vector_size = sizeof (u32),
1233   .format_trace = format_nat64_in2out_trace,
1234   .type = VLIB_NODE_TYPE_INTERNAL,
1235   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1236   .error_strings = nat64_in2out_error_strings,
1237   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1238   /* edit / add dispositions here */
1239   .next_nodes = {
1240     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1241     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1242     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1243     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1244   },
1245 };
1246 /* *INDENT-ON* */
1247
1248 VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm,
1249                                            vlib_node_runtime_t * node,
1250                                            vlib_frame_t * frame)
1251 {
1252   return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1253 }
1254
1255 /* *INDENT-OFF* */
1256 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1257   .name = "nat64-in2out-slowpath",
1258   .vector_size = sizeof (u32),
1259   .format_trace = format_nat64_in2out_trace,
1260   .type = VLIB_NODE_TYPE_INTERNAL,
1261   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1262   .error_strings = nat64_in2out_error_strings,
1263   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1264   /* edit / add dispositions here */
1265   .next_nodes = {
1266     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1267     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1268     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1269     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1270   },
1271 };
1272 /* *INDENT-ON* */
1273
1274 typedef struct nat64_in2out_frag_set_ctx_t_
1275 {
1276   vlib_main_t *vm;
1277   u32 sess_index;
1278   u32 thread_index;
1279   u16 l4_offset;
1280   u8 proto;
1281   u8 first_frag;
1282 } nat64_in2out_frag_set_ctx_t;
1283
1284
1285 #define foreach_nat64_in2out_handoff_error                       \
1286 _(CONGESTION_DROP, "congestion drop")                            \
1287 _(SAME_WORKER, "same worker")                                    \
1288 _(DO_HANDOFF, "do handoff")
1289
1290 typedef enum
1291 {
1292 #define _(sym,str) NAT64_IN2OUT_HANDOFF_ERROR_##sym,
1293   foreach_nat64_in2out_handoff_error
1294 #undef _
1295     NAT64_IN2OUT_HANDOFF_N_ERROR,
1296 } nat64_in2out_handoff_error_t;
1297
1298 static char *nat64_in2out_handoff_error_strings[] = {
1299 #define _(sym,string) string,
1300   foreach_nat64_in2out_handoff_error
1301 #undef _
1302 };
1303
1304 typedef struct
1305 {
1306   u32 next_worker_index;
1307 } nat64_in2out_handoff_trace_t;
1308
1309 static u8 *
1310 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1311 {
1312   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1313   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1314   nat64_in2out_handoff_trace_t *t =
1315     va_arg (*args, nat64_in2out_handoff_trace_t *);
1316
1317   s =
1318     format (s, "NAT64-IN2OUT-HANDOFF: next-worker %d", t->next_worker_index);
1319
1320   return s;
1321 }
1322
1323 VLIB_NODE_FN (nat64_in2out_handoff_node) (vlib_main_t * vm,
1324                                           vlib_node_runtime_t * node,
1325                                           vlib_frame_t * frame)
1326 {
1327   nat64_main_t *nm = &nat64_main;
1328   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1329   u32 n_enq, n_left_from, *from;
1330   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1331   u32 fq_index;
1332   u32 thread_index = vm->thread_index;
1333   u32 do_handoff = 0, same_worker = 0;
1334
1335   from = vlib_frame_vector_args (frame);
1336   n_left_from = frame->n_vectors;
1337   vlib_get_buffers (vm, from, bufs, n_left_from);
1338
1339   b = bufs;
1340   ti = thread_indices;
1341
1342   fq_index = nm->fq_in2out_index;
1343
1344   while (n_left_from > 0)
1345     {
1346       ip6_header_t *ip0;
1347
1348       ip0 = vlib_buffer_get_current (b[0]);
1349       ti[0] = nat64_get_worker_in2out (&ip0->src_address);
1350
1351       if (ti[0] != thread_index)
1352         do_handoff++;
1353       else
1354         same_worker++;
1355
1356       if (PREDICT_FALSE
1357           ((node->flags & VLIB_NODE_FLAG_TRACE)
1358            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1359         {
1360           nat64_in2out_handoff_trace_t *t =
1361             vlib_add_trace (vm, node, b[0], sizeof (*t));
1362           t->next_worker_index = ti[0];
1363         }
1364
1365       n_left_from -= 1;
1366       ti += 1;
1367       b += 1;
1368     }
1369
1370   n_enq =
1371     vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1372                                    frame->n_vectors, 1);
1373
1374   if (n_enq < frame->n_vectors)
1375     vlib_node_increment_counter (vm, node->node_index,
1376                                  NAT64_IN2OUT_HANDOFF_ERROR_CONGESTION_DROP,
1377                                  frame->n_vectors - n_enq);
1378   vlib_node_increment_counter (vm, node->node_index,
1379                                NAT64_IN2OUT_HANDOFF_ERROR_SAME_WORKER,
1380                                same_worker);
1381   vlib_node_increment_counter (vm, node->node_index,
1382                                NAT64_IN2OUT_HANDOFF_ERROR_DO_HANDOFF,
1383                                do_handoff);
1384
1385   return frame->n_vectors;
1386 }
1387
1388 /* *INDENT-OFF* */
1389 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1390   .name = "nat64-in2out-handoff",
1391   .vector_size = sizeof (u32),
1392   .format_trace = format_nat64_in2out_handoff_trace,
1393   .type = VLIB_NODE_TYPE_INTERNAL,
1394   .n_errors = ARRAY_LEN(nat64_in2out_handoff_error_strings),
1395   .error_strings = nat64_in2out_handoff_error_strings,
1396
1397   .n_next_nodes = 1,
1398
1399   .next_nodes = {
1400     [0] = "error-drop",
1401   },
1402 };
1403 /* *INDENT-ON* */
1404
1405 /*
1406  * fd.io coding-style-patch-verification: ON
1407  *
1408  * Local Variables:
1409  * eval: (c-set-style "gnu")
1410  * End:
1411  */