f7d4dd417f502d6d5be1542598dd320efd5534f9
[vpp.git] / src / plugins / nat / nat64_out2in.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv4 to IPv6 translation (otside to inside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/ip/ip4_to_ip6.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp.h>
26
27 typedef struct
28 {
29   u32 sw_if_index;
30   u32 next_index;
31 } nat64_out2in_trace_t;
32
33 static u8 *
34 format_nat64_out2in_trace (u8 * s, va_list * args)
35 {
36   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38   nat64_out2in_trace_t *t = va_arg (*args, nat64_out2in_trace_t *);
39
40   s =
41     format (s, "NAT64-out2in: sw_if_index %d, next index %d", t->sw_if_index,
42             t->next_index);
43
44   return s;
45 }
46
47 typedef struct
48 {
49   u32 sw_if_index;
50   u32 next_index;
51   u8 cached;
52 } nat64_out2in_reass_trace_t;
53
54 static u8 *
55 format_nat64_out2in_reass_trace (u8 * s, va_list * args)
56 {
57   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
58   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
59   nat64_out2in_reass_trace_t *t =
60     va_arg (*args, nat64_out2in_reass_trace_t *);
61
62   s =
63     format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
64             t->sw_if_index, t->next_index,
65             t->cached ? "cached" : "translated");
66
67   return s;
68 }
69
70 vlib_node_registration_t nat64_out2in_node;
71 vlib_node_registration_t nat64_out2in_reass_node;
72 vlib_node_registration_t nat64_out2in_handoff_node;
73
74 #define foreach_nat64_out2in_error                       \
75 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")          \
76 _(OUT2IN_PACKETS, "Good out2in packets processed")       \
77 _(NO_TRANSLATION, "No translation")                      \
78 _(UNKNOWN, "unknown")                                    \
79 _(DROP_FRAGMENT, "Drop fragment")                        \
80 _(MAX_REASS, "Maximum reassemblies exceeded")            \
81 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
82
83
84 typedef enum
85 {
86 #define _(sym,str) NAT64_OUT2IN_ERROR_##sym,
87   foreach_nat64_out2in_error
88 #undef _
89     NAT64_OUT2IN_N_ERROR,
90 } nat64_out2in_error_t;
91
92 static char *nat64_out2in_error_strings[] = {
93 #define _(sym,string) string,
94   foreach_nat64_out2in_error
95 #undef _
96 };
97
98 typedef enum
99 {
100   NAT64_OUT2IN_NEXT_IP6_LOOKUP,
101   NAT64_OUT2IN_NEXT_IP4_LOOKUP,
102   NAT64_OUT2IN_NEXT_DROP,
103   NAT64_OUT2IN_NEXT_REASS,
104   NAT64_OUT2IN_N_NEXT,
105 } nat64_out2in_next_t;
106
107 typedef struct nat64_out2in_set_ctx_t_
108 {
109   vlib_buffer_t *b;
110   vlib_main_t *vm;
111   u32 thread_index;
112 } nat64_out2in_set_ctx_t;
113
114 static int
115 nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
116                              void *arg)
117 {
118   nat64_main_t *nm = &nat64_main;
119   nat64_out2in_set_ctx_t *ctx = arg;
120   nat64_db_bib_entry_t *bibe;
121   nat64_db_st_entry_t *ste;
122   ip46_address_t saddr, daddr;
123   ip6_address_t ip6_saddr;
124   udp_header_t *udp = ip4_next_header (ip4);
125   tcp_header_t *tcp = ip4_next_header (ip4);
126   u8 proto = ip4->protocol;
127   u16 dport = udp->dst_port;
128   u16 sport = udp->src_port;
129   u32 sw_if_index, fib_index;
130   u16 *checksum;
131   ip_csum_t csum;
132   nat64_db_t *db = &nm->db[ctx->thread_index];
133
134   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
135   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
136
137   memset (&saddr, 0, sizeof (saddr));
138   saddr.ip4.as_u32 = ip4->src_address.as_u32;
139   memset (&daddr, 0, sizeof (daddr));
140   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
141
142   ste =
143     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
144                             fib_index, 0);
145   if (ste)
146     {
147       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
148       if (!bibe)
149         return -1;
150     }
151   else
152     {
153       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, fib_index, 0);
154
155       if (!bibe)
156         return -1;
157
158       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
159       ste =
160         nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, sport);
161     }
162
163   nat64_session_reset_timeout (ste, ctx->vm);
164
165   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
166   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
167
168   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
169   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
170   udp->dst_port = bibe->in_port;
171
172   if (proto == IP_PROTOCOL_UDP)
173     checksum = &udp->checksum;
174   else
175     checksum = &tcp->checksum;
176   csum = ip_csum_sub_even (*checksum, dport);
177   csum = ip_csum_add_even (csum, udp->dst_port);
178   *checksum = ip_csum_fold (csum);
179
180   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
181
182   return 0;
183 }
184
185 static int
186 nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
187 {
188   nat64_main_t *nm = &nat64_main;
189   nat64_out2in_set_ctx_t *ctx = arg;
190   nat64_db_bib_entry_t *bibe;
191   nat64_db_st_entry_t *ste;
192   ip46_address_t saddr, daddr;
193   ip6_address_t ip6_saddr;
194   u32 sw_if_index, fib_index;
195   icmp46_header_t *icmp = ip4_next_header (ip4);
196   nat64_db_t *db = &nm->db[ctx->thread_index];
197
198   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
199   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
200
201   memset (&saddr, 0, sizeof (saddr));
202   saddr.ip4.as_u32 = ip4->src_address.as_u32;
203   memset (&daddr, 0, sizeof (daddr));
204   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
205
206   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
207     {
208       u16 out_id = ((u16 *) (icmp))[2];
209       ste =
210         nat64_db_st_entry_find (db, &daddr, &saddr, out_id, 0,
211                                 IP_PROTOCOL_ICMP, fib_index, 0);
212
213       if (ste)
214         {
215           bibe =
216             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
217                                          ste->bibe_index);
218           if (!bibe)
219             return -1;
220         }
221       else
222         {
223           bibe =
224             nat64_db_bib_entry_find (db, &daddr, out_id,
225                                      IP_PROTOCOL_ICMP, fib_index, 0);
226           if (!bibe)
227             return -1;
228
229           nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
230           ste =
231             nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
232         }
233
234       nat64_session_reset_timeout (ste, ctx->vm);
235
236       ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
237       ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
238
239       ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
240       ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
241       ((u16 *) (icmp))[2] = bibe->in_port;
242
243       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
244     }
245   else
246     {
247       ip6_header_t *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
248
249       nat64_compose_ip6 (&ip6->src_address, &ip4->src_address,
250                          vnet_buffer (ctx->b)->sw_if_index[VLIB_TX]);
251       ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
252       ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
253     }
254
255   return 0;
256 }
257
258 static int
259 nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
260                                 void *arg)
261 {
262   nat64_main_t *nm = &nat64_main;
263   nat64_out2in_set_ctx_t *ctx = arg;
264   nat64_db_bib_entry_t *bibe;
265   nat64_db_st_entry_t *ste;
266   ip46_address_t saddr, daddr;
267   u32 sw_if_index, fib_index;
268   u8 proto = ip4->protocol;
269   nat64_db_t *db = &nm->db[ctx->thread_index];
270
271   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
272   fib_index =
273     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
274
275   memset (&saddr, 0, sizeof (saddr));
276   saddr.ip4.as_u32 = ip4->src_address.as_u32;
277   memset (&daddr, 0, sizeof (daddr));
278   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
279
280   if (proto == IP_PROTOCOL_ICMP6)
281     {
282       icmp46_header_t *icmp = ip4_next_header (ip4);
283       u16 out_id = ((u16 *) (icmp))[2];
284       proto = IP_PROTOCOL_ICMP;
285
286       if (!
287           (icmp->type == ICMP6_echo_request
288            || icmp->type == ICMP6_echo_reply))
289         return -1;
290
291       ste =
292         nat64_db_st_entry_find (db, &saddr, &daddr, out_id, 0, proto,
293                                 fib_index, 0);
294       if (!ste)
295         return -1;
296
297       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
298       if (!bibe)
299         return -1;
300
301       ip6->dst_address.as_u64[0] = ste->in_r_addr.as_u64[0];
302       ip6->dst_address.as_u64[1] = ste->in_r_addr.as_u64[1];
303       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
304       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
305       ((u16 *) (icmp))[2] = bibe->in_port;
306
307       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
308     }
309   else
310     {
311       udp_header_t *udp = ip4_next_header (ip4);
312       tcp_header_t *tcp = ip4_next_header (ip4);
313       u16 dport = udp->dst_port;
314       u16 sport = udp->src_port;
315       u16 *checksum;
316       ip_csum_t csum;
317
318       ste =
319         nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
320                                 fib_index, 0);
321       if (!ste)
322         return -1;
323
324       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
325       if (!bibe)
326         return -1;
327
328       nat64_compose_ip6 (&ip6->dst_address, &daddr.ip4, bibe->fib_index);
329       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
330       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
331       udp->src_port = bibe->in_port;
332
333       if (proto == IP_PROTOCOL_UDP)
334         checksum = &udp->checksum;
335       else
336         checksum = &tcp->checksum;
337       if (*checksum)
338         {
339           csum = ip_csum_sub_even (*checksum, sport);
340           csum = ip_csum_add_even (csum, udp->src_port);
341           *checksum = ip_csum_fold (csum);
342         }
343
344       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
345     }
346
347   return 0;
348 }
349
350 static int
351 nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
352                                void *arg)
353 {
354   nat64_main_t *nm = &nat64_main;
355   nat64_out2in_set_ctx_t *ctx = arg;
356   nat64_db_bib_entry_t *bibe;
357   nat64_db_st_entry_t *ste;
358   ip46_address_t saddr, daddr;
359   ip6_address_t ip6_saddr;
360   u32 sw_if_index, fib_index;
361   u8 proto = ip4->protocol;
362   nat64_db_t *db = &nm->db[ctx->thread_index];
363
364   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
365   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
366
367   memset (&saddr, 0, sizeof (saddr));
368   saddr.ip4.as_u32 = ip4->src_address.as_u32;
369   memset (&daddr, 0, sizeof (daddr));
370   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
371
372   ste =
373     nat64_db_st_entry_find (db, &daddr, &saddr, 0, 0, proto, fib_index, 0);
374   if (ste)
375     {
376       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
377       if (!bibe)
378         return -1;
379     }
380   else
381     {
382       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, fib_index, 0);
383
384       if (!bibe)
385         return -1;
386
387       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
388       ste = nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
389     }
390
391   nat64_session_reset_timeout (ste, ctx->vm);
392
393   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
394   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
395
396   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
397   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
398
399   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
400
401   return 0;
402 }
403
404 static uword
405 nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
406                       vlib_frame_t * frame)
407 {
408   u32 n_left_from, *from, *to_next;
409   nat64_out2in_next_t next_index;
410   u32 pkts_processed = 0;
411   u32 thread_index = vm->thread_index;
412
413   from = vlib_frame_vector_args (frame);
414   n_left_from = frame->n_vectors;
415   next_index = node->cached_next_index;
416   while (n_left_from > 0)
417     {
418       u32 n_left_to_next;
419
420       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
421
422       while (n_left_from > 0 && n_left_to_next > 0)
423         {
424           u32 bi0;
425           vlib_buffer_t *b0;
426           u32 next0;
427           ip4_header_t *ip40;
428           u32 proto0;
429           nat64_out2in_set_ctx_t ctx0;
430           udp_header_t *udp0;
431
432           /* speculatively enqueue b0 to the current next frame */
433           bi0 = from[0];
434           to_next[0] = bi0;
435           from += 1;
436           to_next += 1;
437           n_left_from -= 1;
438           n_left_to_next -= 1;
439
440           b0 = vlib_get_buffer (vm, bi0);
441           ip40 = vlib_buffer_get_current (b0);
442
443           ctx0.b = b0;
444           ctx0.vm = vm;
445           ctx0.thread_index = thread_index;
446
447           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
448
449           proto0 = ip_proto_to_snat_proto (ip40->protocol);
450
451           if (PREDICT_FALSE (proto0 == ~0))
452             {
453               if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
454                 {
455                   next0 = NAT64_OUT2IN_NEXT_DROP;
456                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
457                 }
458               goto trace0;
459             }
460
461           if (PREDICT_FALSE (ip4_is_fragment (ip40)))
462             {
463               next0 = NAT64_OUT2IN_NEXT_REASS;
464               goto trace0;
465             }
466
467           if (proto0 == SNAT_PROTOCOL_ICMP)
468             {
469               if (icmp_to_icmp6
470                   (b0, nat64_out2in_icmp_set_cb, &ctx0,
471                    nat64_out2in_inner_icmp_set_cb, &ctx0))
472                 {
473                   next0 = NAT64_OUT2IN_NEXT_DROP;
474                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
475                   goto trace0;
476                 }
477             }
478           else
479             {
480               if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
481                 {
482                   udp0 = ip4_next_header (ip40);
483                   /*
484                    * Send DHCP packets to the ipv4 stack, or we won't
485                    * be able to use dhcp client on the outside interface
486                    */
487                   if ((proto0 == SNAT_PROTOCOL_UDP)
488                       && (udp0->dst_port ==
489                           clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))
490                     {
491                       next0 = NAT64_OUT2IN_NEXT_IP4_LOOKUP;
492                       goto trace0;
493                     }
494                   next0 = NAT64_OUT2IN_NEXT_DROP;
495                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
496                   goto trace0;
497                 }
498             }
499
500         trace0:
501           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
502                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
503             {
504               nat64_out2in_trace_t *t =
505                 vlib_add_trace (vm, node, b0, sizeof (*t));
506               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
507               t->next_index = next0;
508             }
509
510           pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
511
512           /* verify speculative enqueue, maybe switch current next frame */
513           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
514                                            n_left_to_next, bi0, next0);
515         }
516       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
517     }
518   vlib_node_increment_counter (vm, nat64_out2in_node.index,
519                                NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
520                                pkts_processed);
521   return frame->n_vectors;
522 }
523
524 /* *INDENT-OFF* */
525 VLIB_REGISTER_NODE (nat64_out2in_node) = {
526   .function = nat64_out2in_node_fn,
527   .name = "nat64-out2in",
528   .vector_size = sizeof (u32),
529   .format_trace = format_nat64_out2in_trace,
530   .type = VLIB_NODE_TYPE_INTERNAL,
531   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
532   .error_strings = nat64_out2in_error_strings,
533   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
534   /* edit / add dispositions here */
535   .next_nodes = {
536     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
537     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
538     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
539     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
540   },
541 };
542 /* *INDENT-ON* */
543
544 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
545
546 typedef struct nat64_out2in_frag_set_ctx_t_
547 {
548   vlib_main_t *vm;
549   vlib_buffer_t *b;
550   u32 sess_index;
551   u32 thread_index;
552   u8 proto;
553   u8 first_frag;
554 } nat64_out2in_frag_set_ctx_t;
555
556 static int
557 nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
558 {
559   nat64_main_t *nm = &nat64_main;
560   nat64_out2in_frag_set_ctx_t *ctx = arg;
561   nat64_db_st_entry_t *ste;
562   nat64_db_bib_entry_t *bibe;
563   udp_header_t *udp = ip4_next_header (ip4);
564   ip_csum_t csum;
565   u16 *checksum;
566   nat64_db_t *db = &nm->db[ctx->thread_index];
567
568   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
569   if (!ste)
570     return -1;
571
572   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
573   if (!bibe)
574     return -1;
575
576   nat64_session_reset_timeout (ste, ctx->vm);
577
578   if (ctx->first_frag)
579     {
580       udp->dst_port = bibe->in_port;
581
582       if (ip4->protocol == IP_PROTOCOL_UDP)
583         {
584           checksum = &udp->checksum;
585
586           if (!checksum)
587             {
588               u16 udp_len =
589                 clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
590               csum = ip_incremental_checksum (0, udp, udp_len);
591               csum =
592                 ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
593               csum =
594                 ip_csum_with_carry (csum,
595                                     clib_host_to_net_u16 (IP_PROTOCOL_UDP));
596               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
597               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
598               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
599               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
600               *checksum = ~ip_csum_fold (csum);
601             }
602           else
603             {
604               csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
605               csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
606               csum = ip_csum_sub_even (csum, bibe->out_port);
607               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
608               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
609               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
610               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
611               csum = ip_csum_add_even (csum, bibe->in_port);
612               *checksum = ip_csum_fold (csum);
613             }
614         }
615       else
616         {
617           tcp_header_t *tcp = ip4_next_header (ip4);
618           checksum = &tcp->checksum;
619           csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
620           csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
621           csum = ip_csum_sub_even (csum, bibe->out_port);
622           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
623           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
624           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
625           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
626           csum = ip_csum_add_even (csum, bibe->in_port);
627           *checksum = ip_csum_fold (csum);
628         }
629
630     }
631
632   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
633   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
634
635   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
636   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
637
638   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
639
640   return 0;
641 }
642
643 static uword
644 nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
645                             vlib_frame_t * frame)
646 {
647   u32 n_left_from, *from, *to_next;
648   nat64_out2in_next_t next_index;
649   u32 pkts_processed = 0;
650   u32 *fragments_to_drop = 0;
651   u32 *fragments_to_loopback = 0;
652   nat64_main_t *nm = &nat64_main;
653   u32 thread_index = vm->thread_index;
654
655   from = vlib_frame_vector_args (frame);
656   n_left_from = frame->n_vectors;
657   next_index = node->cached_next_index;
658
659   while (n_left_from > 0)
660     {
661       u32 n_left_to_next;
662
663       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
664
665       while (n_left_from > 0 && n_left_to_next > 0)
666         {
667           u32 bi0;
668           vlib_buffer_t *b0;
669           u32 next0;
670           ip4_header_t *ip40;
671           u8 cached0 = 0;
672           u32 sw_if_index0, fib_index0;
673           udp_header_t *udp0;
674           nat_reass_ip4_t *reass0;
675           ip46_address_t saddr0, daddr0;
676           nat64_db_st_entry_t *ste0;
677           nat64_db_bib_entry_t *bibe0;
678           ip6_address_t ip6_saddr0;
679           nat64_out2in_frag_set_ctx_t ctx0;
680           nat64_db_t *db = &nm->db[thread_index];
681
682           /* speculatively enqueue b0 to the current next frame */
683           bi0 = from[0];
684           to_next[0] = bi0;
685           from += 1;
686           to_next += 1;
687           n_left_from -= 1;
688           n_left_to_next -= 1;
689
690           b0 = vlib_get_buffer (vm, bi0);
691           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
692
693           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
694           fib_index0 =
695             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
696                                                  sw_if_index0);
697
698           ctx0.thread_index = thread_index;
699
700           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
701             {
702               next0 = NAT64_OUT2IN_NEXT_DROP;
703               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
704               goto trace0;
705             }
706
707           ip40 = vlib_buffer_get_current (b0);
708
709           if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
710                                || ip40->protocol == IP_PROTOCOL_UDP)))
711             {
712               next0 = NAT64_OUT2IN_NEXT_DROP;
713               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
714               goto trace0;
715             }
716
717           udp0 = ip4_next_header (ip40);
718
719           reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
720                                                  ip40->dst_address,
721                                                  ip40->fragment_id,
722                                                  ip40->protocol,
723                                                  1, &fragments_to_drop);
724
725           if (PREDICT_FALSE (!reass0))
726             {
727               next0 = NAT64_OUT2IN_NEXT_DROP;
728               b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
729               goto trace0;
730             }
731
732           if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
733             {
734               ctx0.first_frag = 1;
735
736               memset (&saddr0, 0, sizeof (saddr0));
737               saddr0.ip4.as_u32 = ip40->src_address.as_u32;
738               memset (&daddr0, 0, sizeof (daddr0));
739               daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
740
741               ste0 =
742                 nat64_db_st_entry_find (db, &daddr0, &saddr0,
743                                         udp0->dst_port, udp0->src_port,
744                                         ip40->protocol, fib_index0, 0);
745               if (!ste0)
746                 {
747                   bibe0 =
748                     nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port,
749                                              ip40->protocol, fib_index0, 0);
750                   if (!bibe0)
751                     {
752                       next0 = NAT64_OUT2IN_NEXT_DROP;
753                       b0->error =
754                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
755                       goto trace0;
756                     }
757
758                   nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
759                                      bibe0->fib_index);
760                   ste0 =
761                     nat64_db_st_entry_create (db, bibe0, &ip6_saddr0,
762                                               &saddr0.ip4, udp0->src_port);
763
764                   if (!ste0)
765                     {
766                       next0 = NAT64_OUT2IN_NEXT_DROP;
767                       b0->error =
768                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
769                       goto trace0;
770                     }
771                 }
772               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
773               reass0->thread_index = thread_index;
774
775               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
776             }
777           else
778             {
779               ctx0.first_frag = 0;
780
781               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
782                 {
783                   if (nat_ip4_reass_add_fragment (reass0, bi0))
784                     {
785                       b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
786                       next0 = NAT64_OUT2IN_NEXT_DROP;
787                       goto trace0;
788                     }
789                   cached0 = 1;
790                   goto trace0;
791                 }
792             }
793
794           ctx0.sess_index = reass0->sess_index;
795           ctx0.proto = ip40->protocol;
796           ctx0.vm = vm;
797           ctx0.b = b0;
798
799           if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
800             {
801               next0 = NAT64_OUT2IN_NEXT_DROP;
802               b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
803               goto trace0;
804             }
805
806         trace0:
807           if (PREDICT_FALSE
808               ((node->flags & VLIB_NODE_FLAG_TRACE)
809                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
810             {
811               nat64_out2in_reass_trace_t *t =
812                 vlib_add_trace (vm, node, b0, sizeof (*t));
813               t->cached = cached0;
814               t->sw_if_index = sw_if_index0;
815               t->next_index = next0;
816             }
817
818           if (cached0)
819             {
820               n_left_to_next++;
821               to_next--;
822             }
823           else
824             {
825               pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
826
827               /* verify speculative enqueue, maybe switch current next frame */
828               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
829                                                to_next, n_left_to_next,
830                                                bi0, next0);
831             }
832
833           if (n_left_from == 0 && vec_len (fragments_to_loopback))
834             {
835               from = vlib_frame_vector_args (frame);
836               u32 len = vec_len (fragments_to_loopback);
837               if (len <= VLIB_FRAME_SIZE)
838                 {
839                   clib_memcpy (from, fragments_to_loopback,
840                                sizeof (u32) * len);
841                   n_left_from = len;
842                   vec_reset_length (fragments_to_loopback);
843                 }
844               else
845                 {
846                   clib_memcpy (from,
847                                fragments_to_loopback + (len -
848                                                         VLIB_FRAME_SIZE),
849                                sizeof (u32) * VLIB_FRAME_SIZE);
850                   n_left_from = VLIB_FRAME_SIZE;
851                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
852                 }
853             }
854         }
855
856       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
857     }
858
859   vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
860                                NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
861                                pkts_processed);
862
863   nat_send_all_to_node (vm, fragments_to_drop, node,
864                         &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
865                         NAT64_OUT2IN_NEXT_DROP);
866
867   vec_free (fragments_to_drop);
868   vec_free (fragments_to_loopback);
869   return frame->n_vectors;
870 }
871
872 /* *INDENT-OFF* */
873 VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
874   .function = nat64_out2in_reass_node_fn,
875   .name = "nat64-out2in-reass",
876   .vector_size = sizeof (u32),
877   .format_trace = format_nat64_out2in_reass_trace,
878   .type = VLIB_NODE_TYPE_INTERNAL,
879   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
880   .error_strings = nat64_out2in_error_strings,
881   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
882   /* edit / add dispositions here */
883   .next_nodes = {
884     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
885     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
886     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
887     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
888   },
889 };
890 /* *INDENT-ON* */
891
892 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_reass_node,
893                               nat64_out2in_reass_node_fn);
894
895 typedef struct
896 {
897   u32 next_worker_index;
898   u8 do_handoff;
899 } nat64_out2in_handoff_trace_t;
900
901 static u8 *
902 format_nat64_out2in_handoff_trace (u8 * s, va_list * args)
903 {
904   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
905   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
906   nat64_out2in_handoff_trace_t *t =
907     va_arg (*args, nat64_out2in_handoff_trace_t *);
908   char *m;
909
910   m = t->do_handoff ? "next worker" : "same worker";
911   s = format (s, "NAT64-OUT2IN-HANDOFF: %s %d", m, t->next_worker_index);
912
913   return s;
914 }
915
916 static inline uword
917 nat64_out2in_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
918                               vlib_frame_t * frame)
919 {
920   nat64_main_t *nm = &nat64_main;
921   vlib_thread_main_t *tm = vlib_get_thread_main ();
922   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
923   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
924   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
925     = 0;
926   vlib_frame_queue_elt_t *hf = 0;
927   vlib_frame_queue_t *fq;
928   vlib_frame_t *f = 0, *d = 0;
929   int i;
930   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
931   u32 next_worker_index = 0;
932   u32 current_worker_index = ~0;
933   u32 thread_index = vm->thread_index;
934   u32 fq_index;
935   u32 to_node_index;
936
937   fq_index = nm->fq_out2in_index;
938   to_node_index = nat64_out2in_node.index;
939
940   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
941     {
942       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
943
944       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
945                                tm->n_vlib_mains - 1,
946                                (vlib_frame_queue_t *) (~0));
947     }
948
949   from = vlib_frame_vector_args (frame);
950   n_left_from = frame->n_vectors;
951
952   while (n_left_from > 0)
953     {
954       u32 bi0;
955       vlib_buffer_t *b0;
956       ip4_header_t *ip0;
957       u8 do_handoff;
958
959       bi0 = from[0];
960       from += 1;
961       n_left_from -= 1;
962
963       b0 = vlib_get_buffer (vm, bi0);
964
965       ip0 = vlib_buffer_get_current (b0);
966
967       next_worker_index = nat64_get_worker_out2in (ip0);
968
969       if (PREDICT_FALSE (next_worker_index != thread_index))
970         {
971           do_handoff = 1;
972
973           if (next_worker_index != current_worker_index)
974             {
975               fq =
976                 is_vlib_frame_queue_congested (fq_index, next_worker_index,
977                                                30,
978                                                congested_handoff_queue_by_worker_index);
979
980               if (fq)
981                 {
982                   /* if this is 1st frame */
983                   if (!d)
984                     {
985                       d = vlib_get_frame_to_node (vm, nm->error_node_index);
986                       to_next_drop = vlib_frame_vector_args (d);
987                     }
988
989                   to_next_drop[0] = bi0;
990                   to_next_drop += 1;
991                   d->n_vectors++;
992                   goto trace0;
993                 }
994
995               if (hf)
996                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
997
998               hf =
999                 vlib_get_worker_handoff_queue_elt (fq_index,
1000                                                    next_worker_index,
1001                                                    handoff_queue_elt_by_worker_index);
1002               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1003               to_next_worker = &hf->buffer_index[hf->n_vectors];
1004               current_worker_index = next_worker_index;
1005             }
1006
1007           ASSERT (to_next_worker != 0);
1008
1009           /* enqueue to correct worker thread */
1010           to_next_worker[0] = bi0;
1011           to_next_worker++;
1012           n_left_to_next_worker--;
1013
1014           if (n_left_to_next_worker == 0)
1015             {
1016               hf->n_vectors = VLIB_FRAME_SIZE;
1017               vlib_put_frame_queue_elt (hf);
1018               current_worker_index = ~0;
1019               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1020               hf = 0;
1021             }
1022         }
1023       else
1024         {
1025           do_handoff = 0;
1026           /* if this is 1st frame */
1027           if (!f)
1028             {
1029               f = vlib_get_frame_to_node (vm, to_node_index);
1030               to_next = vlib_frame_vector_args (f);
1031             }
1032
1033           to_next[0] = bi0;
1034           to_next += 1;
1035           f->n_vectors++;
1036         }
1037
1038     trace0:
1039       if (PREDICT_FALSE
1040           ((node->flags & VLIB_NODE_FLAG_TRACE)
1041            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1042         {
1043           nat64_out2in_handoff_trace_t *t =
1044             vlib_add_trace (vm, node, b0, sizeof (*t));
1045           t->next_worker_index = next_worker_index;
1046           t->do_handoff = do_handoff;
1047         }
1048     }
1049
1050   if (f)
1051     vlib_put_frame_to_node (vm, to_node_index, f);
1052
1053   if (d)
1054     vlib_put_frame_to_node (vm, nm->error_node_index, d);
1055
1056   if (hf)
1057     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1058
1059   /* Ship frames to the worker nodes */
1060   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1061     {
1062       if (handoff_queue_elt_by_worker_index[i])
1063         {
1064           hf = handoff_queue_elt_by_worker_index[i];
1065           /*
1066            * It works better to let the handoff node
1067            * rate-adapt, always ship the handoff queue element.
1068            */
1069           if (1 || hf->n_vectors == hf->last_n_vectors)
1070             {
1071               vlib_put_frame_queue_elt (hf);
1072               handoff_queue_elt_by_worker_index[i] = 0;
1073             }
1074           else
1075             hf->last_n_vectors = hf->n_vectors;
1076         }
1077       congested_handoff_queue_by_worker_index[i] =
1078         (vlib_frame_queue_t *) (~0);
1079     }
1080   hf = 0;
1081   current_worker_index = ~0;
1082   return frame->n_vectors;
1083 }
1084
1085 /* *INDENT-OFF* */
1086 VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
1087   .function = nat64_out2in_handoff_node_fn,
1088   .name = "nat64-out2in-handoff",
1089   .vector_size = sizeof (u32),
1090   .format_trace = format_nat64_out2in_handoff_trace,
1091   .type = VLIB_NODE_TYPE_INTERNAL,
1092
1093   .n_next_nodes = 1,
1094
1095   .next_nodes = {
1096     [0] = "error-drop",
1097   },
1098 };
1099 /* *INDENT-ON* */
1100
1101 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_handoff_node,
1102                               nat64_out2in_handoff_node_fn);
1103 /*
1104  * fd.io coding-style-patch-verification: ON
1105  *
1106  * Local Variables:
1107  * eval: (c-set-style "gnu")
1108  * End:
1109  */