nat: Include platform specific headers on FreeBSD
[vpp.git] / src / plugins / nat / nat64 / nat64_in2out.c
1 /*
2  * Copyright (c) 2020 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #include <nat/nat64/nat64.h>
17 #include <vnet/ip/ip6_to_ip4.h>
18 #include <vnet/fib/fib_table.h>
19 #include <nat/lib/nat_inlines.h>
20
21 typedef struct
22 {
23   u32 sw_if_index;
24   u32 next_index;
25   u8 is_slow_path;
26 } nat64_in2out_trace_t;
27
28 static u8 *
29 format_nat64_in2out_trace (u8 * s, va_list * args)
30 {
31   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
32   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
33   nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *);
34   char *tag;
35
36   tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out";
37
38   s =
39     format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index,
40             t->next_index);
41
42   return s;
43 }
44
45 #define foreach_nat64_in2out_error                       \
46 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
47 _(NO_TRANSLATION, "no translation")                      \
48 _(UNKNOWN, "unknown")
49
50
51 typedef enum
52 {
53 #define _(sym,str) NAT64_IN2OUT_ERROR_##sym,
54   foreach_nat64_in2out_error
55 #undef _
56     NAT64_IN2OUT_N_ERROR,
57 } nat64_in2out_error_t;
58
59 static char *nat64_in2out_error_strings[] = {
60 #define _(sym,string) string,
61   foreach_nat64_in2out_error
62 #undef _
63 };
64
65 typedef enum
66 {
67   NAT64_IN2OUT_NEXT_IP4_LOOKUP,
68   NAT64_IN2OUT_NEXT_IP6_LOOKUP,
69   NAT64_IN2OUT_NEXT_DROP,
70   NAT64_IN2OUT_NEXT_SLOWPATH,
71   NAT64_IN2OUT_N_NEXT,
72 } nat64_in2out_next_t;
73
74 typedef struct nat64_in2out_set_ctx_t_
75 {
76   vlib_buffer_t *b;
77   vlib_main_t *vm;
78   u32 thread_index;
79 } nat64_in2out_set_ctx_t;
80
81 static inline u8
82 nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr)
83 {
84   ip6_address_t *addr;
85   ip6_main_t *im6 = &ip6_main;
86   ip_lookup_main_t *lm6 = &im6->lookup_main;
87   ip_interface_address_t *ia = 0;
88
89   foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
90   ({
91         addr = ip_interface_address_get_address (lm6, ia);
92         if (0 == ip6_address_compare (addr, &ip6_addr))
93                 return 1;
94   }));
95
96   return 0;
97 }
98
99 /**
100  * @brief Check whether is a hairpinning.
101  *
102  * If the destination IP address of the packet is an IPv4 address assigned to
103  * the NAT64 itself, then the packet is a hairpin packet.
104  *
105  * param dst_addr Destination address of the packet.
106  *
107  * @returns 1 if hairpinning, otherwise 0.
108  */
109 static_always_inline int
110 is_hairpinning (ip6_address_t * dst_addr)
111 {
112   nat64_main_t *nm = &nat64_main;
113   int i;
114
115   for (i = 0; i < vec_len (nm->addr_pool); i++)
116     {
117       if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3])
118         return 1;
119     }
120
121   return 0;
122 }
123
124 static int
125 nat64_in2out_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p, u16 l4_offset,
126                       u16 frag_hdr_offset, nat64_in2out_set_ctx_t * ctx)
127 {
128   ip6_header_t *ip6;
129   ip_csum_t csum = 0;
130   ip4_header_t *ip4;
131   u16 fragment_id;
132   u8 frag_more;
133   u16 frag_offset;
134   nat64_main_t *nm = &nat64_main;
135   nat64_db_bib_entry_t *bibe;
136   nat64_db_st_entry_t *ste;
137   ip46_address_t old_saddr, old_daddr;
138   ip4_address_t new_daddr;
139   u32 sw_if_index, fib_index;
140   u8 proto = vnet_buffer (p)->ip.reass.ip_proto;
141   u16 sport = vnet_buffer (p)->ip.reass.l4_src_port;
142   u16 dport = vnet_buffer (p)->ip.reass.l4_dst_port;
143   nat64_db_t *db = &nm->db[ctx->thread_index];
144
145   ip6 = vlib_buffer_get_current (p);
146
147   vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
148   ip4 = vlib_buffer_get_current (p);
149
150   u32 ip_version_traffic_class_and_flow_label =
151     ip6->ip_version_traffic_class_and_flow_label;
152   u16 payload_length = ip6->payload_length;
153   u8 hop_limit = ip6->hop_limit;
154
155   old_saddr.as_u64[0] = ip6->src_address.as_u64[0];
156   old_saddr.as_u64[1] = ip6->src_address.as_u64[1];
157   old_daddr.as_u64[0] = ip6->dst_address.as_u64[0];
158   old_daddr.as_u64[1] = ip6->dst_address.as_u64[1];
159
160   if (PREDICT_FALSE (frag_hdr_offset))
161     {
162       //Only the first fragment
163       ip6_frag_hdr_t *hdr =
164         (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
165       fragment_id = frag_id_6to4 (hdr->identification);
166       frag_more = ip6_frag_hdr_more (hdr);
167       frag_offset = ip6_frag_hdr_offset (hdr);
168     }
169   else
170     {
171       fragment_id = 0;
172       frag_offset = 0;
173       frag_more = 0;
174     }
175
176   ip4->ip_version_and_header_length =
177     IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
178   ip4->tos = ip6_translate_tos (ip_version_traffic_class_and_flow_label);
179   ip4->length =
180     u16_net_add (payload_length, sizeof (*ip4) + sizeof (*ip6) - l4_offset);
181   ip4->fragment_id = fragment_id;
182   ip4->flags_and_fragment_offset =
183     clib_host_to_net_u16 (frag_offset |
184                           (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
185   ip4->ttl = hop_limit;
186   ip4->protocol = (proto == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : proto;
187
188   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
189   fib_index =
190     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
191
192   ste =
193     nat64_db_st_entry_find (db, &old_saddr, &old_daddr, sport, dport, proto,
194                             fib_index, 1);
195
196   if (ste)
197     {
198       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
199       if (!bibe)
200         return -1;
201     }
202   else
203     {
204       bibe =
205         nat64_db_bib_entry_find (db, &old_saddr, sport, proto, fib_index, 1);
206
207       if (!bibe)
208         {
209           u16 out_port;
210           ip4_address_t out_addr;
211           if (nat64_alloc_out_addr_and_port
212               (fib_index, ip_proto_to_nat_proto (proto), &out_addr,
213                &out_port, ctx->thread_index))
214             return -1;
215
216           bibe =
217             nat64_db_bib_entry_create (ctx->thread_index, db,
218                                        &old_saddr.ip6, &out_addr, sport,
219                                        out_port, fib_index, proto, 0);
220           if (!bibe)
221             return -1;
222
223           vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0,
224                                    db->bib.bib_entries_num);
225         }
226
227       nat64_extract_ip4 (&old_daddr.ip6, &new_daddr, fib_index);
228       ste =
229         nat64_db_st_entry_create (ctx->thread_index, db, bibe,
230                                   &old_daddr.ip6, &new_daddr, dport);
231       if (!ste)
232         return -1;
233
234       vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
235                                db->st.st_entries_num);
236     }
237
238   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
239   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
240
241   ip4->checksum = ip4_header_checksum (ip4);
242
243   if (!vnet_buffer (p)->ip.reass.is_non_first_fragment)
244     {
245       udp_header_t *udp = (udp_header_t *) (ip4 + 1);
246       udp->src_port = bibe->out_port;
247
248       //UDP checksum is optional over IPv4
249       if (proto == IP_PROTOCOL_UDP)
250         {
251           udp->checksum = 0;
252         }
253       else
254         {
255           tcp_header_t *tcp = (tcp_header_t *) (ip4 + 1);
256           csum = ip_csum_sub_even (tcp->checksum, old_saddr.as_u64[0]);
257           csum = ip_csum_sub_even (csum, old_saddr.as_u64[1]);
258           csum = ip_csum_sub_even (csum, old_daddr.as_u64[0]);
259           csum = ip_csum_sub_even (csum, old_daddr.as_u64[1]);
260           csum = ip_csum_add_even (csum, ip4->dst_address.as_u32);
261           csum = ip_csum_add_even (csum, ip4->src_address.as_u32);
262           csum = ip_csum_sub_even (csum, sport);
263           csum = ip_csum_add_even (csum, udp->src_port);
264           mss_clamping (nm->mss_clamping, tcp, &csum);
265           tcp->checksum = ip_csum_fold (csum);
266
267           nat64_tcp_session_set_state (ste, tcp, 1);
268         }
269     }
270
271   nat64_session_reset_timeout (ste, ctx->vm);
272
273   return 0;
274 }
275
276 static int
277 nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg)
278 {
279   nat64_main_t *nm = &nat64_main;
280   nat64_in2out_set_ctx_t *ctx = arg;
281   nat64_db_bib_entry_t *bibe;
282   nat64_db_st_entry_t *ste;
283   ip46_address_t saddr, daddr;
284   u32 sw_if_index, fib_index;
285   icmp46_header_t *icmp = ip6_next_header (ip6);
286   nat64_db_t *db = &nm->db[ctx->thread_index];
287
288   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
289   fib_index =
290     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
291
292   saddr.as_u64[0] = ip6->src_address.as_u64[0];
293   saddr.as_u64[1] = ip6->src_address.as_u64[1];
294   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
295   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
296
297   if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
298     {
299       u16 in_id = ((u16 *) (icmp))[2];
300       ste =
301         nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0,
302                                 IP_PROTOCOL_ICMP, fib_index, 1);
303
304       if (ste)
305         {
306           bibe =
307             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
308                                          ste->bibe_index);
309           if (!bibe)
310             return -1;
311         }
312       else
313         {
314           bibe =
315             nat64_db_bib_entry_find (db, &saddr, in_id,
316                                      IP_PROTOCOL_ICMP, fib_index, 1);
317
318           if (!bibe)
319             {
320               u16 out_id;
321               ip4_address_t out_addr;
322               if (nat64_alloc_out_addr_and_port
323                   (fib_index, NAT_PROTOCOL_ICMP, &out_addr, &out_id,
324                    ctx->thread_index))
325                 return -1;
326
327               bibe =
328                 nat64_db_bib_entry_create (ctx->thread_index, db,
329                                            &ip6->src_address, &out_addr,
330                                            in_id, out_id, fib_index,
331                                            IP_PROTOCOL_ICMP, 0);
332               if (!bibe)
333                 return -1;
334
335               vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0,
336                                        db->bib.bib_entries_num);
337             }
338
339           nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
340           ste =
341             nat64_db_st_entry_create (ctx->thread_index, db, bibe,
342                                       &ip6->dst_address, &daddr.ip4, 0);
343           if (!ste)
344             return -1;
345
346           vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
347                                    db->st.st_entries_num);
348         }
349
350       nat64_session_reset_timeout (ste, ctx->vm);
351
352       ip4->src_address.as_u32 = bibe->out_addr.as_u32;
353       ((u16 *) (icmp))[2] = bibe->out_port;
354
355       ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
356     }
357   else
358     {
359       if (!vec_len (nm->addr_pool))
360         return -1;
361
362       ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32;
363       nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index);
364     }
365
366   return 0;
367 }
368
369 static int
370 nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4,
371                                 void *arg)
372 {
373   nat64_main_t *nm = &nat64_main;
374   nat64_in2out_set_ctx_t *ctx = arg;
375   nat64_db_st_entry_t *ste;
376   nat64_db_bib_entry_t *bibe;
377   ip46_address_t saddr, daddr;
378   u32 sw_if_index, fib_index;
379   u8 proto = ip6->protocol;
380   nat64_db_t *db = &nm->db[ctx->thread_index];
381
382   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
383   fib_index =
384     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
385
386   saddr.as_u64[0] = ip6->src_address.as_u64[0];
387   saddr.as_u64[1] = ip6->src_address.as_u64[1];
388   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
389   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
390
391   if (proto == IP_PROTOCOL_ICMP6)
392     {
393       icmp46_header_t *icmp = ip6_next_header (ip6);
394       u16 in_id = ((u16 *) (icmp))[2];
395       proto = IP_PROTOCOL_ICMP;
396
397       if (!
398           (icmp->type == ICMP4_echo_request
399            || icmp->type == ICMP4_echo_reply))
400         return -1;
401
402       ste =
403         nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto,
404                                 fib_index, 1);
405       if (!ste)
406         return -1;
407
408       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
409       if (!bibe)
410         return -1;
411
412       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
413       ((u16 *) (icmp))[2] = bibe->out_port;
414       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
415     }
416   else
417     {
418       udp_header_t *udp = ip6_next_header (ip6);
419       tcp_header_t *tcp = ip6_next_header (ip6);
420       u16 *checksum;
421       ip_csum_t csum;
422
423       u16 sport = udp->src_port;
424       u16 dport = udp->dst_port;
425
426       ste =
427         nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
428                                 fib_index, 1);
429       if (!ste)
430         return -1;
431
432       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
433       if (!bibe)
434         return -1;
435
436       ip4->dst_address.as_u32 = bibe->out_addr.as_u32;
437       udp->dst_port = bibe->out_port;
438       ip4->src_address.as_u32 = ste->out_r_addr.as_u32;
439
440       if (proto == IP_PROTOCOL_TCP)
441         checksum = &tcp->checksum;
442       else
443         checksum = &udp->checksum;
444       csum = ip_csum_sub_even (*checksum, dport);
445       csum = ip_csum_add_even (csum, udp->dst_port);
446       *checksum = ip_csum_fold (csum);
447     }
448
449   return 0;
450 }
451
452 typedef struct unk_proto_st_walk_ctx_t_
453 {
454   ip6_address_t src_addr;
455   ip6_address_t dst_addr;
456   ip4_address_t out_addr;
457   u32 fib_index;
458   u32 thread_index;
459   u8 proto;
460 } unk_proto_st_walk_ctx_t;
461
462 static int
463 unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg)
464 {
465   nat64_main_t *nm = &nat64_main;
466   unk_proto_st_walk_ctx_t *ctx = arg;
467   nat64_db_bib_entry_t *bibe;
468   ip46_address_t saddr, daddr;
469   nat64_db_t *db = &nm->db[ctx->thread_index];
470
471   if (ip6_address_is_equal (&ste->in_r_addr, &ctx->dst_addr))
472     {
473       bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index);
474       if (!bibe)
475         return -1;
476
477       if (ip6_address_is_equal (&bibe->in_addr, &ctx->src_addr)
478           && bibe->fib_index == ctx->fib_index)
479         {
480           clib_memset (&saddr, 0, sizeof (saddr));
481           saddr.ip4.as_u32 = bibe->out_addr.as_u32;
482           clib_memset (&daddr, 0, sizeof (daddr));
483           nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index);
484
485           if (nat64_db_st_entry_find
486               (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0))
487             return -1;
488
489           ctx->out_addr.as_u32 = bibe->out_addr.as_u32;
490           return 1;
491         }
492     }
493
494   return 0;
495 }
496
497 static int
498 nat64_in2out_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, u8 l4_protocol,
499                         u16 l4_offset, u16 frag_hdr_offset,
500                         nat64_in2out_set_ctx_t * s_ctx)
501 {
502   ip6_header_t *ip6;
503   ip4_header_t *ip4;
504   u16 fragment_id;
505   u16 frag_offset;
506   u8 frag_more;
507
508   ip6 = vlib_buffer_get_current (p);
509
510   ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
511
512   vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
513
514   if (PREDICT_FALSE (frag_hdr_offset))
515     {
516       //Only the first fragment
517       ip6_frag_hdr_t *hdr =
518         (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset);
519       fragment_id = frag_id_6to4 (hdr->identification);
520       frag_offset = ip6_frag_hdr_offset (hdr);
521       frag_more = ip6_frag_hdr_more (hdr);
522     }
523   else
524     {
525       fragment_id = 0;
526       frag_offset = 0;
527       frag_more = 0;
528     }
529
530   nat64_main_t *nm = &nat64_main;
531   nat64_db_bib_entry_t *bibe;
532   nat64_db_st_entry_t *ste;
533   ip46_address_t saddr, daddr, addr;
534   u32 sw_if_index, fib_index;
535   int i;
536   nat64_db_t *db = &nm->db[s_ctx->thread_index];
537
538   sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX];
539   fib_index =
540     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
541
542   saddr.as_u64[0] = ip6->src_address.as_u64[0];
543   saddr.as_u64[1] = ip6->src_address.as_u64[1];
544   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
545   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
546
547   ste =
548     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, l4_protocol, fib_index,
549                             1);
550
551   if (ste)
552     {
553       bibe = nat64_db_bib_entry_by_index (db, l4_protocol, ste->bibe_index);
554       if (!bibe)
555         return -1;
556     }
557   else
558     {
559       bibe =
560         nat64_db_bib_entry_find (db, &saddr, 0, l4_protocol, fib_index, 1);
561
562       if (!bibe)
563         {
564           /* Choose same out address as for TCP/UDP session to same dst */
565           unk_proto_st_walk_ctx_t ctx = {
566             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
567             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
568             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
569             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
570             .out_addr.as_u32 = 0,
571             .fib_index = fib_index,
572             .proto = l4_protocol,
573             .thread_index = s_ctx->thread_index,
574           };
575
576           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
577
578           if (!ctx.out_addr.as_u32)
579             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
580
581           /* Verify if out address is not already in use for protocol */
582           clib_memset (&addr, 0, sizeof (addr));
583           addr.ip4.as_u32 = ctx.out_addr.as_u32;
584           if (nat64_db_bib_entry_find (db, &addr, 0, l4_protocol, 0, 0))
585             ctx.out_addr.as_u32 = 0;
586
587           if (!ctx.out_addr.as_u32)
588             {
589               for (i = 0; i < vec_len (nm->addr_pool); i++)
590                 {
591                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
592                   if (!nat64_db_bib_entry_find
593                       (db, &addr, 0, l4_protocol, 0, 0))
594                     break;
595                 }
596             }
597
598           if (!ctx.out_addr.as_u32)
599             return -1;
600
601           bibe =
602             nat64_db_bib_entry_create (s_ctx->thread_index, db,
603                                        &ip6->src_address, &ctx.out_addr,
604                                        0, 0, fib_index, l4_protocol, 0);
605           if (!bibe)
606             return -1;
607
608           vlib_set_simple_counter (&nm->total_bibs, s_ctx->thread_index, 0,
609                                    db->bib.bib_entries_num);
610         }
611
612       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
613       ste =
614         nat64_db_st_entry_create (s_ctx->thread_index, db, bibe,
615                                   &ip6->dst_address, &daddr.ip4, 0);
616       if (!ste)
617         return -1;
618
619       vlib_set_simple_counter (&nm->total_sessions, s_ctx->thread_index, 0,
620                                db->st.st_entries_num);
621     }
622
623   nat64_session_reset_timeout (ste, s_ctx->vm);
624
625   ip4->src_address.as_u32 = bibe->out_addr.as_u32;
626   ip4->dst_address.as_u32 = ste->out_r_addr.as_u32;
627
628   ip4->ip_version_and_header_length =
629     IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
630   ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label);
631   ip4->length = u16_net_add (ip6->payload_length,
632                              sizeof (*ip4) + sizeof (*ip6) - l4_offset);
633   ip4->fragment_id = fragment_id;
634   ip4->flags_and_fragment_offset =
635     clib_host_to_net_u16 (frag_offset |
636                           (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
637   ip4->ttl = ip6->hop_limit;
638   ip4->protocol = l4_protocol;
639   ip4->checksum = ip4_header_checksum (ip4);
640
641   return 0;
642 }
643
644 static int
645 nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
646                                   ip6_header_t * ip6, u32 l4_offset,
647                                   u32 thread_index)
648 {
649   nat64_main_t *nm = &nat64_main;
650   nat64_db_bib_entry_t *bibe;
651   nat64_db_st_entry_t *ste;
652   ip46_address_t saddr, daddr;
653   u32 sw_if_index, fib_index;
654   udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, l4_offset);
655   tcp_header_t *tcp = (tcp_header_t *) u8_ptr_add (ip6, l4_offset);
656   u8 proto = vnet_buffer (b)->ip.reass.ip_proto;
657   u16 sport = vnet_buffer (b)->ip.reass.l4_src_port;
658   u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port;
659   u16 *checksum = NULL;
660   ip_csum_t csum = 0;
661   nat64_db_t *db = &nm->db[thread_index];
662
663   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
664   fib_index =
665     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
666
667   saddr.as_u64[0] = ip6->src_address.as_u64[0];
668   saddr.as_u64[1] = ip6->src_address.as_u64[1];
669   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
670   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
671
672   if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
673     {
674       if (proto == IP_PROTOCOL_UDP)
675         checksum = &udp->checksum;
676       else
677         checksum = &tcp->checksum;
678       csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
679       csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
680       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
681       csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
682     }
683
684   ste =
685     nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
686                             fib_index, 1);
687
688   if (ste)
689     {
690       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
691       if (!bibe)
692         return -1;
693     }
694   else
695     {
696       bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1);
697
698       if (!bibe)
699         {
700           u16 out_port;
701           ip4_address_t out_addr;
702           if (nat64_alloc_out_addr_and_port
703               (fib_index, ip_proto_to_nat_proto (proto), &out_addr,
704                &out_port, thread_index))
705             return -1;
706
707           bibe =
708             nat64_db_bib_entry_create (thread_index, db, &ip6->src_address,
709                                        &out_addr, sport, out_port, fib_index,
710                                        proto, 0);
711           if (!bibe)
712             return -1;
713
714           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
715                                    db->bib.bib_entries_num);
716         }
717
718       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
719       ste =
720         nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address,
721                                   &daddr.ip4, dport);
722       if (!ste)
723         return -1;
724
725       vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
726                                db->st.st_entries_num);
727     }
728
729   if (proto == IP_PROTOCOL_TCP)
730     nat64_tcp_session_set_state (ste, tcp, 1);
731
732   nat64_session_reset_timeout (ste, vm);
733
734   if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
735     {
736       udp->src_port = bibe->out_port;
737     }
738
739   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
740
741   clib_memset (&daddr, 0, sizeof (daddr));
742   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
743
744   bibe = 0;
745   vec_foreach (db, nm->db)
746     {
747       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
748
749       if (bibe)
750         break;
751     }
752
753   if (!bibe)
754     return -1;
755
756   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
757   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
758
759   if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
760     {
761       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
762       csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
763       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
764       csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
765       csum = ip_csum_sub_even (csum, sport);
766       csum = ip_csum_sub_even (csum, dport);
767       udp->dst_port = bibe->in_port;
768       csum = ip_csum_add_even (csum, udp->src_port);
769       csum = ip_csum_add_even (csum, udp->dst_port);
770       *checksum = ip_csum_fold (csum);
771     }
772
773   return 0;
774 }
775
776 static int
777 nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
778                                ip6_header_t * ip6, u32 thread_index)
779 {
780   nat64_main_t *nm = &nat64_main;
781   nat64_db_bib_entry_t *bibe;
782   nat64_db_st_entry_t *ste;
783   icmp46_header_t *icmp = ip6_next_header (ip6);
784   ip6_header_t *inner_ip6;
785   ip46_address_t saddr, daddr;
786   u32 sw_if_index, fib_index;
787   u8 proto;
788   udp_header_t *udp;
789   tcp_header_t *tcp;
790   u16 *checksum, sport, dport;
791   ip_csum_t csum;
792   nat64_db_t *db = &nm->db[thread_index];
793
794   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
795     return -1;
796
797   inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
798
799   proto = inner_ip6->protocol;
800
801   if (proto == IP_PROTOCOL_ICMP6)
802     return -1;
803
804   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
805   fib_index =
806     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
807
808   saddr.as_u64[0] = inner_ip6->src_address.as_u64[0];
809   saddr.as_u64[1] = inner_ip6->src_address.as_u64[1];
810   daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0];
811   daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1];
812
813   udp = ip6_next_header (inner_ip6);
814   tcp = ip6_next_header (inner_ip6);
815
816   sport = udp->src_port;
817   dport = udp->dst_port;
818
819   if (proto == IP_PROTOCOL_UDP)
820     checksum = &udp->checksum;
821   else
822     checksum = &tcp->checksum;
823
824   csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]);
825   csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
826   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
827   csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
828   csum = ip_csum_sub_even (csum, sport);
829   csum = ip_csum_sub_even (csum, dport);
830
831   ste =
832     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
833                             fib_index, 1);
834   if (!ste)
835     return -1;
836
837   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
838   if (!bibe)
839     return -1;
840
841   dport = udp->dst_port = bibe->out_port;
842   nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index);
843
844   clib_memset (&saddr, 0, sizeof (saddr));
845   clib_memset (&daddr, 0, sizeof (daddr));
846   saddr.ip4.as_u32 = ste->out_r_addr.as_u32;
847   daddr.ip4.as_u32 = bibe->out_addr.as_u32;
848
849   ste = 0;
850   vec_foreach (db, nm->db)
851     {
852       ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
853                                     0, 0);
854
855       if (ste)
856         break;
857     }
858
859   if (!ste)
860     return -1;
861
862   bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
863   if (!bibe)
864     return -1;
865
866   inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
867   inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
868   udp->src_port = bibe->in_port;
869
870   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
871   csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
872   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
873   csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
874   csum = ip_csum_add_even (csum, udp->src_port);
875   csum = ip_csum_add_even (csum, udp->dst_port);
876   *checksum = ip_csum_fold (csum);
877
878   if (!vec_len (nm->addr_pool))
879     return -1;
880
881   nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index);
882   ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
883   ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
884
885   icmp->checksum = 0;
886   csum = ip_csum_with_carry (0, ip6->payload_length);
887   csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
888   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
889   csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
890   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
891   csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
892   csum =
893     ip_incremental_checksum (csum, icmp,
894                              clib_net_to_host_u16 (ip6->payload_length));
895   icmp->checksum = ~ip_csum_fold (csum);
896
897   return 0;
898 }
899
900 static int
901 nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
902                                     ip6_header_t * ip6, u32 thread_index)
903 {
904   nat64_main_t *nm = &nat64_main;
905   nat64_db_bib_entry_t *bibe;
906   nat64_db_st_entry_t *ste;
907   ip46_address_t saddr, daddr, addr;
908   u32 sw_if_index, fib_index;
909   u8 proto = ip6->protocol;
910   int i;
911   nat64_db_t *db = &nm->db[thread_index];
912
913   sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
914   fib_index =
915     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
916
917   saddr.as_u64[0] = ip6->src_address.as_u64[0];
918   saddr.as_u64[1] = ip6->src_address.as_u64[1];
919   daddr.as_u64[0] = ip6->dst_address.as_u64[0];
920   daddr.as_u64[1] = ip6->dst_address.as_u64[1];
921
922   ste =
923     nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1);
924
925   if (ste)
926     {
927       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
928       if (!bibe)
929         return -1;
930     }
931   else
932     {
933       bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1);
934
935       if (!bibe)
936         {
937           /* Choose same out address as for TCP/UDP session to same dst */
938           unk_proto_st_walk_ctx_t ctx = {
939             .src_addr.as_u64[0] = ip6->src_address.as_u64[0],
940             .src_addr.as_u64[1] = ip6->src_address.as_u64[1],
941             .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0],
942             .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1],
943             .out_addr.as_u32 = 0,
944             .fib_index = fib_index,
945             .proto = proto,
946             .thread_index = thread_index,
947           };
948
949           nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx);
950
951           if (!ctx.out_addr.as_u32)
952             nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx);
953
954           /* Verify if out address is not already in use for protocol */
955           clib_memset (&addr, 0, sizeof (addr));
956           addr.ip4.as_u32 = ctx.out_addr.as_u32;
957           if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
958             ctx.out_addr.as_u32 = 0;
959
960           if (!ctx.out_addr.as_u32)
961             {
962               for (i = 0; i < vec_len (nm->addr_pool); i++)
963                 {
964                   addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32;
965                   if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0))
966                     break;
967                 }
968             }
969
970           if (!ctx.out_addr.as_u32)
971             return -1;
972
973           bibe =
974             nat64_db_bib_entry_create (thread_index, db, &ip6->src_address,
975                                        &ctx.out_addr, 0, 0, fib_index, proto,
976                                        0);
977           if (!bibe)
978             return -1;
979
980           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
981                                    db->bib.bib_entries_num);
982         }
983
984       nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index);
985       ste =
986         nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address,
987                                   &daddr.ip4, 0);
988       if (!ste)
989         return -1;
990
991       vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
992                                db->st.st_entries_num);
993     }
994
995   nat64_session_reset_timeout (ste, vm);
996
997   nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index);
998
999   clib_memset (&daddr, 0, sizeof (daddr));
1000   daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
1001
1002   bibe = 0;
1003   vec_foreach (db, nm->db)
1004     {
1005       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
1006
1007       if (bibe)
1008         break;
1009     }
1010
1011   if (!bibe)
1012     return -1;
1013
1014   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
1015   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
1016
1017   return 0;
1018 }
1019
1020 static inline uword
1021 nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
1022                              vlib_frame_t * frame, u8 is_slow_path)
1023 {
1024   u32 n_left_from, *from, *to_next;
1025   nat64_in2out_next_t next_index;
1026   u32 thread_index = vm->thread_index;
1027   nat64_main_t *nm = &nat64_main;
1028
1029   from = vlib_frame_vector_args (frame);
1030   n_left_from = frame->n_vectors;
1031   next_index = node->cached_next_index;
1032
1033   while (n_left_from > 0)
1034     {
1035       u32 n_left_to_next;
1036
1037       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1038
1039       while (n_left_from > 0 && n_left_to_next > 0)
1040         {
1041           u32 bi0;
1042           vlib_buffer_t *b0;
1043           u32 next0;
1044           ip6_header_t *ip60;
1045           u16 l4_offset0, frag_hdr_offset0;
1046           u8 l4_protocol0;
1047           u32 proto0;
1048           nat64_in2out_set_ctx_t ctx0;
1049           u32 sw_if_index0;
1050
1051           /* speculatively enqueue b0 to the current next frame */
1052           bi0 = from[0];
1053           to_next[0] = bi0;
1054           from += 1;
1055           to_next += 1;
1056           n_left_from -= 1;
1057           n_left_to_next -= 1;
1058
1059           b0 = vlib_get_buffer (vm, bi0);
1060           ip60 = vlib_buffer_get_current (b0);
1061
1062           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1063
1064           ctx0.b = b0;
1065           ctx0.vm = vm;
1066           ctx0.thread_index = thread_index;
1067
1068           next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP;
1069
1070           if (PREDICT_FALSE
1071               (ip6_parse
1072                (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0,
1073                 &frag_hdr_offset0)))
1074             {
1075               next0 = NAT64_IN2OUT_NEXT_DROP;
1076               b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN];
1077               goto trace0;
1078             }
1079
1080           if (nat64_not_translate (sw_if_index0, ip60->dst_address))
1081             {
1082               next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1083               goto trace0;
1084             }
1085
1086           proto0 = ip_proto_to_nat_proto (l4_protocol0);
1087
1088           if (is_slow_path)
1089             {
1090               if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_OTHER))
1091                 {
1092                   vlib_increment_simple_counter (&nm->counters.in2out.other,
1093                                                  thread_index, sw_if_index0,
1094                                                  1);
1095                   if (is_hairpinning (&ip60->dst_address))
1096                     {
1097                       next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1098                       if (nat64_in2out_unk_proto_hairpinning
1099                           (vm, b0, ip60, thread_index))
1100                         {
1101                           next0 = NAT64_IN2OUT_NEXT_DROP;
1102                           b0->error =
1103                             node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1104                         }
1105                       goto trace0;
1106                     }
1107
1108                   if (nat64_in2out_unk_proto
1109                       (vm, b0, l4_protocol0, l4_offset0, frag_hdr_offset0,
1110                        &ctx0))
1111                     {
1112                       next0 = NAT64_IN2OUT_NEXT_DROP;
1113                       b0->error =
1114                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1115                       goto trace0;
1116                     }
1117                 }
1118               goto trace0;
1119             }
1120           else
1121             {
1122               if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
1123                 {
1124                   next0 = NAT64_IN2OUT_NEXT_SLOWPATH;
1125                   goto trace0;
1126                 }
1127             }
1128
1129           if (proto0 == NAT_PROTOCOL_ICMP)
1130             {
1131               vlib_increment_simple_counter (&nm->counters.in2out.icmp,
1132                                              thread_index, sw_if_index0, 1);
1133               if (is_hairpinning (&ip60->dst_address))
1134                 {
1135                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1136                   if (nat64_in2out_icmp_hairpinning
1137                       (vm, b0, ip60, thread_index))
1138                     {
1139                       next0 = NAT64_IN2OUT_NEXT_DROP;
1140                       b0->error =
1141                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1142                     }
1143                   goto trace0;
1144                 }
1145
1146               if (icmp6_to_icmp
1147                   (vm, b0, nat64_in2out_icmp_set_cb, &ctx0,
1148                    nat64_in2out_inner_icmp_set_cb, &ctx0))
1149                 {
1150                   next0 = NAT64_IN2OUT_NEXT_DROP;
1151                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1152                   goto trace0;
1153                 }
1154             }
1155           else if (proto0 == NAT_PROTOCOL_TCP || proto0 == NAT_PROTOCOL_UDP)
1156             {
1157               if (proto0 == NAT_PROTOCOL_TCP)
1158                 vlib_increment_simple_counter (&nm->counters.in2out.tcp,
1159                                                thread_index, sw_if_index0, 1);
1160               else
1161                 vlib_increment_simple_counter (&nm->counters.in2out.udp,
1162                                                thread_index, sw_if_index0, 1);
1163
1164               if (is_hairpinning (&ip60->dst_address))
1165                 {
1166                   next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP;
1167                   if (nat64_in2out_tcp_udp_hairpinning
1168                       (vm, b0, ip60, l4_offset0, thread_index))
1169                     {
1170                       next0 = NAT64_IN2OUT_NEXT_DROP;
1171                       b0->error =
1172                         node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1173                     }
1174                   goto trace0;
1175                 }
1176
1177               if (nat64_in2out_tcp_udp
1178                   (vm, b0, l4_offset0, frag_hdr_offset0, &ctx0))
1179                 {
1180                   next0 = NAT64_IN2OUT_NEXT_DROP;
1181                   b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION];
1182                   goto trace0;
1183                 }
1184             }
1185
1186         trace0:
1187           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1188                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1189             {
1190               nat64_in2out_trace_t *t =
1191                 vlib_add_trace (vm, node, b0, sizeof (*t));
1192               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
1193               t->next_index = next0;
1194               t->is_slow_path = is_slow_path;
1195             }
1196
1197           if (next0 == NAT64_IN2OUT_NEXT_DROP)
1198             {
1199               vlib_increment_simple_counter (&nm->counters.in2out.drops,
1200                                              thread_index, sw_if_index0, 1);
1201             }
1202
1203
1204           /* verify speculative enqueue, maybe switch current next frame */
1205           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1206                                            n_left_to_next, bi0, next0);
1207         }
1208       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1209     }
1210
1211   return frame->n_vectors;
1212 }
1213
1214 VLIB_NODE_FN (nat64_in2out_node) (vlib_main_t * vm,
1215                                   vlib_node_runtime_t * node,
1216                                   vlib_frame_t * frame)
1217 {
1218   return nat64_in2out_node_fn_inline (vm, node, frame, 0);
1219 }
1220
1221 VLIB_REGISTER_NODE (nat64_in2out_node) = {
1222   .name = "nat64-in2out",
1223   .vector_size = sizeof (u32),
1224   .format_trace = format_nat64_in2out_trace,
1225   .type = VLIB_NODE_TYPE_INTERNAL,
1226   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1227   .error_strings = nat64_in2out_error_strings,
1228   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1229   /* edit / add dispositions here */
1230   .next_nodes = {
1231     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1232     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1233     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1234     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1235   },
1236 };
1237
1238 VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm,
1239                                            vlib_node_runtime_t * node,
1240                                            vlib_frame_t * frame)
1241 {
1242   return nat64_in2out_node_fn_inline (vm, node, frame, 1);
1243 }
1244
1245 VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
1246   .name = "nat64-in2out-slowpath",
1247   .vector_size = sizeof (u32),
1248   .format_trace = format_nat64_in2out_trace,
1249   .type = VLIB_NODE_TYPE_INTERNAL,
1250   .n_errors = ARRAY_LEN (nat64_in2out_error_strings),
1251   .error_strings = nat64_in2out_error_strings,
1252   .n_next_nodes = NAT64_IN2OUT_N_NEXT,
1253   /* edit / add dispositions here */
1254   .next_nodes = {
1255     [NAT64_IN2OUT_NEXT_DROP] = "error-drop",
1256     [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
1257     [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
1258     [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
1259   },
1260 };
1261
1262 typedef struct nat64_in2out_frag_set_ctx_t_
1263 {
1264   vlib_main_t *vm;
1265   u32 sess_index;
1266   u32 thread_index;
1267   u16 l4_offset;
1268   u8 proto;
1269   u8 first_frag;
1270 } nat64_in2out_frag_set_ctx_t;
1271
1272
1273 #define foreach_nat64_in2out_handoff_error                       \
1274 _(CONGESTION_DROP, "congestion drop")                            \
1275 _(SAME_WORKER, "same worker")                                    \
1276 _(DO_HANDOFF, "do handoff")
1277
1278 typedef enum
1279 {
1280 #define _(sym,str) NAT64_IN2OUT_HANDOFF_ERROR_##sym,
1281   foreach_nat64_in2out_handoff_error
1282 #undef _
1283     NAT64_IN2OUT_HANDOFF_N_ERROR,
1284 } nat64_in2out_handoff_error_t;
1285
1286 static char *nat64_in2out_handoff_error_strings[] = {
1287 #define _(sym,string) string,
1288   foreach_nat64_in2out_handoff_error
1289 #undef _
1290 };
1291
1292 typedef struct
1293 {
1294   u32 next_worker_index;
1295 } nat64_in2out_handoff_trace_t;
1296
1297 static u8 *
1298 format_nat64_in2out_handoff_trace (u8 * s, va_list * args)
1299 {
1300   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1301   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1302   nat64_in2out_handoff_trace_t *t =
1303     va_arg (*args, nat64_in2out_handoff_trace_t *);
1304
1305   s =
1306     format (s, "NAT64-IN2OUT-HANDOFF: next-worker %d", t->next_worker_index);
1307
1308   return s;
1309 }
1310
1311 VLIB_NODE_FN (nat64_in2out_handoff_node) (vlib_main_t * vm,
1312                                           vlib_node_runtime_t * node,
1313                                           vlib_frame_t * frame)
1314 {
1315   nat64_main_t *nm = &nat64_main;
1316   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1317   u32 n_enq, n_left_from, *from;
1318   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1319   u32 fq_index;
1320   u32 thread_index = vm->thread_index;
1321   u32 do_handoff = 0, same_worker = 0;
1322
1323   from = vlib_frame_vector_args (frame);
1324   n_left_from = frame->n_vectors;
1325   vlib_get_buffers (vm, from, bufs, n_left_from);
1326
1327   b = bufs;
1328   ti = thread_indices;
1329
1330   fq_index = nm->fq_in2out_index;
1331
1332   while (n_left_from > 0)
1333     {
1334       ip6_header_t *ip0;
1335
1336       ip0 = vlib_buffer_get_current (b[0]);
1337       ti[0] = nat64_get_worker_in2out (&ip0->src_address);
1338
1339       if (ti[0] != thread_index)
1340         do_handoff++;
1341       else
1342         same_worker++;
1343
1344       if (PREDICT_FALSE
1345           ((node->flags & VLIB_NODE_FLAG_TRACE)
1346            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1347         {
1348           nat64_in2out_handoff_trace_t *t =
1349             vlib_add_trace (vm, node, b[0], sizeof (*t));
1350           t->next_worker_index = ti[0];
1351         }
1352
1353       n_left_from -= 1;
1354       ti += 1;
1355       b += 1;
1356     }
1357
1358   n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
1359                                          thread_indices, frame->n_vectors, 1);
1360
1361   if (n_enq < frame->n_vectors)
1362     vlib_node_increment_counter (vm, node->node_index,
1363                                  NAT64_IN2OUT_HANDOFF_ERROR_CONGESTION_DROP,
1364                                  frame->n_vectors - n_enq);
1365   vlib_node_increment_counter (vm, node->node_index,
1366                                NAT64_IN2OUT_HANDOFF_ERROR_SAME_WORKER,
1367                                same_worker);
1368   vlib_node_increment_counter (vm, node->node_index,
1369                                NAT64_IN2OUT_HANDOFF_ERROR_DO_HANDOFF,
1370                                do_handoff);
1371
1372   return frame->n_vectors;
1373 }
1374
1375 VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
1376   .name = "nat64-in2out-handoff",
1377   .vector_size = sizeof (u32),
1378   .format_trace = format_nat64_in2out_handoff_trace,
1379   .type = VLIB_NODE_TYPE_INTERNAL,
1380   .n_errors = ARRAY_LEN(nat64_in2out_handoff_error_strings),
1381   .error_strings = nat64_in2out_handoff_error_strings,
1382
1383   .n_next_nodes = 1,
1384
1385   .next_nodes = {
1386     [0] = "error-drop",
1387   },
1388 };
1389
1390 /*
1391  * fd.io coding-style-patch-verification: ON
1392  *
1393  * Local Variables:
1394  * eval: (c-set-style "gnu")
1395  * End:
1396  */