17513c5efb8c8da3338c2ec9ccf7c1b170bd23f6
[vpp.git] / src / plugins / nat / nat64_out2in.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv4 to IPv6 translation (otside to inside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <vnet/ip/ip4_to_ip6.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <vnet/udp/udp.h>
25
26 typedef struct
27 {
28   u32 sw_if_index;
29   u32 next_index;
30 } nat64_out2in_trace_t;
31
32 static u8 *
33 format_nat64_out2in_trace (u8 * s, va_list * args)
34 {
35   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
36   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
37   nat64_out2in_trace_t *t = va_arg (*args, nat64_out2in_trace_t *);
38
39   s =
40     format (s, "NAT64-out2in: sw_if_index %d, next index %d", t->sw_if_index,
41             t->next_index);
42
43   return s;
44 }
45
46 typedef struct
47 {
48   u32 sw_if_index;
49   u32 next_index;
50   u8 cached;
51 } nat64_out2in_reass_trace_t;
52
53 static u8 *
54 format_nat64_out2in_reass_trace (u8 * s, va_list * args)
55 {
56   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
57   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
58   nat64_out2in_reass_trace_t *t =
59     va_arg (*args, nat64_out2in_reass_trace_t *);
60
61   s =
62     format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
63             t->sw_if_index, t->next_index,
64             t->cached ? "cached" : "translated");
65
66   return s;
67 }
68
69 vlib_node_registration_t nat64_out2in_node;
70 vlib_node_registration_t nat64_out2in_reass_node;
71 vlib_node_registration_t nat64_out2in_handoff_node;
72
73 #define foreach_nat64_out2in_error                       \
74 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")          \
75 _(OUT2IN_PACKETS, "Good out2in packets processed")       \
76 _(NO_TRANSLATION, "No translation")                      \
77 _(UNKNOWN, "unknown")                                    \
78 _(DROP_FRAGMENT, "Drop fragment")                        \
79 _(MAX_REASS, "Maximum reassemblies exceeded")            \
80 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")
81
82
83 typedef enum
84 {
85 #define _(sym,str) NAT64_OUT2IN_ERROR_##sym,
86   foreach_nat64_out2in_error
87 #undef _
88     NAT64_OUT2IN_N_ERROR,
89 } nat64_out2in_error_t;
90
91 static char *nat64_out2in_error_strings[] = {
92 #define _(sym,string) string,
93   foreach_nat64_out2in_error
94 #undef _
95 };
96
97 typedef enum
98 {
99   NAT64_OUT2IN_NEXT_IP6_LOOKUP,
100   NAT64_OUT2IN_NEXT_IP4_LOOKUP,
101   NAT64_OUT2IN_NEXT_DROP,
102   NAT64_OUT2IN_NEXT_REASS,
103   NAT64_OUT2IN_N_NEXT,
104 } nat64_out2in_next_t;
105
106 typedef struct nat64_out2in_set_ctx_t_
107 {
108   vlib_buffer_t *b;
109   vlib_main_t *vm;
110   u32 thread_index;
111 } nat64_out2in_set_ctx_t;
112
113 static int
114 nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
115                              void *arg)
116 {
117   nat64_main_t *nm = &nat64_main;
118   nat64_out2in_set_ctx_t *ctx = arg;
119   nat64_db_bib_entry_t *bibe;
120   nat64_db_st_entry_t *ste;
121   ip46_address_t saddr, daddr;
122   ip6_address_t ip6_saddr;
123   udp_header_t *udp = ip4_next_header (ip4);
124   tcp_header_t *tcp = ip4_next_header (ip4);
125   u8 proto = ip4->protocol;
126   u16 dport = udp->dst_port;
127   u16 sport = udp->src_port;
128   u32 sw_if_index, fib_index;
129   u16 *checksum;
130   ip_csum_t csum;
131   nat64_db_t *db = &nm->db[ctx->thread_index];
132
133   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
134   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
135
136   memset (&saddr, 0, sizeof (saddr));
137   saddr.ip4.as_u32 = ip4->src_address.as_u32;
138   memset (&daddr, 0, sizeof (daddr));
139   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
140
141   ste =
142     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
143                             fib_index, 0);
144   if (ste)
145     {
146       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
147       if (!bibe)
148         return -1;
149     }
150   else
151     {
152       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, fib_index, 0);
153
154       if (!bibe)
155         return -1;
156
157       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
158       ste =
159         nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, sport);
160     }
161
162   nat64_session_reset_timeout (ste, ctx->vm);
163
164   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
165   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
166
167   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
168   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
169   udp->dst_port = bibe->in_port;
170
171   if (proto == IP_PROTOCOL_UDP)
172     checksum = &udp->checksum;
173   else
174     checksum = &tcp->checksum;
175   csum = ip_csum_sub_even (*checksum, dport);
176   csum = ip_csum_add_even (csum, udp->dst_port);
177   *checksum = ip_csum_fold (csum);
178
179   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
180
181   return 0;
182 }
183
184 static int
185 nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
186 {
187   nat64_main_t *nm = &nat64_main;
188   nat64_out2in_set_ctx_t *ctx = arg;
189   nat64_db_bib_entry_t *bibe;
190   nat64_db_st_entry_t *ste;
191   ip46_address_t saddr, daddr;
192   ip6_address_t ip6_saddr;
193   u32 sw_if_index, fib_index;
194   icmp46_header_t *icmp = ip4_next_header (ip4);
195   nat64_db_t *db = &nm->db[ctx->thread_index];
196
197   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
198   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
199
200   memset (&saddr, 0, sizeof (saddr));
201   saddr.ip4.as_u32 = ip4->src_address.as_u32;
202   memset (&daddr, 0, sizeof (daddr));
203   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
204
205   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
206     {
207       u16 out_id = ((u16 *) (icmp))[2];
208       ste =
209         nat64_db_st_entry_find (db, &daddr, &saddr, out_id, 0,
210                                 IP_PROTOCOL_ICMP, fib_index, 0);
211
212       if (ste)
213         {
214           bibe =
215             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
216                                          ste->bibe_index);
217           if (!bibe)
218             return -1;
219         }
220       else
221         {
222           bibe =
223             nat64_db_bib_entry_find (db, &daddr, out_id,
224                                      IP_PROTOCOL_ICMP, fib_index, 0);
225           if (!bibe)
226             return -1;
227
228           nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
229           ste =
230             nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
231         }
232
233       nat64_session_reset_timeout (ste, ctx->vm);
234
235       ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
236       ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
237
238       ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
239       ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
240       ((u16 *) (icmp))[2] = bibe->in_port;
241
242       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
243     }
244   else
245     {
246       ip6_header_t *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
247
248       nat64_compose_ip6 (&ip6->src_address, &ip4->src_address,
249                          vnet_buffer (ctx->b)->sw_if_index[VLIB_TX]);
250       ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
251       ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
252     }
253
254   return 0;
255 }
256
257 static int
258 nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
259                                 void *arg)
260 {
261   nat64_main_t *nm = &nat64_main;
262   nat64_out2in_set_ctx_t *ctx = arg;
263   nat64_db_bib_entry_t *bibe;
264   nat64_db_st_entry_t *ste;
265   ip46_address_t saddr, daddr;
266   u32 sw_if_index, fib_index;
267   u8 proto = ip4->protocol;
268   nat64_db_t *db = &nm->db[ctx->thread_index];
269
270   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
271   fib_index =
272     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
273
274   memset (&saddr, 0, sizeof (saddr));
275   saddr.ip4.as_u32 = ip4->src_address.as_u32;
276   memset (&daddr, 0, sizeof (daddr));
277   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
278
279   if (proto == IP_PROTOCOL_ICMP6)
280     {
281       icmp46_header_t *icmp = ip4_next_header (ip4);
282       u16 out_id = ((u16 *) (icmp))[2];
283       proto = IP_PROTOCOL_ICMP;
284
285       if (!
286           (icmp->type == ICMP6_echo_request
287            || icmp->type == ICMP6_echo_reply))
288         return -1;
289
290       ste =
291         nat64_db_st_entry_find (db, &saddr, &daddr, out_id, 0, proto,
292                                 fib_index, 0);
293       if (!ste)
294         return -1;
295
296       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
297       if (!bibe)
298         return -1;
299
300       ip6->dst_address.as_u64[0] = ste->in_r_addr.as_u64[0];
301       ip6->dst_address.as_u64[1] = ste->in_r_addr.as_u64[1];
302       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
303       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
304       ((u16 *) (icmp))[2] = bibe->in_port;
305
306       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
307     }
308   else
309     {
310       udp_header_t *udp = ip4_next_header (ip4);
311       tcp_header_t *tcp = ip4_next_header (ip4);
312       u16 dport = udp->dst_port;
313       u16 sport = udp->src_port;
314       u16 *checksum;
315       ip_csum_t csum;
316
317       ste =
318         nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
319                                 fib_index, 0);
320       if (!ste)
321         return -1;
322
323       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
324       if (!bibe)
325         return -1;
326
327       nat64_compose_ip6 (&ip6->dst_address, &daddr.ip4, bibe->fib_index);
328       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
329       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
330       udp->src_port = bibe->in_port;
331
332       if (proto == IP_PROTOCOL_UDP)
333         checksum = &udp->checksum;
334       else
335         checksum = &tcp->checksum;
336       if (*checksum)
337         {
338           csum = ip_csum_sub_even (*checksum, sport);
339           csum = ip_csum_add_even (csum, udp->src_port);
340           *checksum = ip_csum_fold (csum);
341         }
342
343       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
344     }
345
346   return 0;
347 }
348
349 static int
350 nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
351                                void *arg)
352 {
353   nat64_main_t *nm = &nat64_main;
354   nat64_out2in_set_ctx_t *ctx = arg;
355   nat64_db_bib_entry_t *bibe;
356   nat64_db_st_entry_t *ste;
357   ip46_address_t saddr, daddr;
358   ip6_address_t ip6_saddr;
359   u32 sw_if_index, fib_index;
360   u8 proto = ip4->protocol;
361   nat64_db_t *db = &nm->db[ctx->thread_index];
362
363   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
364   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
365
366   memset (&saddr, 0, sizeof (saddr));
367   saddr.ip4.as_u32 = ip4->src_address.as_u32;
368   memset (&daddr, 0, sizeof (daddr));
369   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
370
371   ste =
372     nat64_db_st_entry_find (db, &daddr, &saddr, 0, 0, proto, fib_index, 0);
373   if (ste)
374     {
375       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
376       if (!bibe)
377         return -1;
378     }
379   else
380     {
381       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, fib_index, 0);
382
383       if (!bibe)
384         return -1;
385
386       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
387       ste = nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
388     }
389
390   nat64_session_reset_timeout (ste, ctx->vm);
391
392   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
393   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
394
395   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
396   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
397
398   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
399
400   return 0;
401 }
402
403 static uword
404 nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
405                       vlib_frame_t * frame)
406 {
407   u32 n_left_from, *from, *to_next;
408   nat64_out2in_next_t next_index;
409   u32 pkts_processed = 0;
410   u32 thread_index = vlib_get_thread_index ();
411
412   from = vlib_frame_vector_args (frame);
413   n_left_from = frame->n_vectors;
414   next_index = node->cached_next_index;
415   while (n_left_from > 0)
416     {
417       u32 n_left_to_next;
418
419       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
420
421       while (n_left_from > 0 && n_left_to_next > 0)
422         {
423           u32 bi0;
424           vlib_buffer_t *b0;
425           u32 next0;
426           ip4_header_t *ip40;
427           u32 proto0;
428           nat64_out2in_set_ctx_t ctx0;
429           udp_header_t *udp0;
430
431           /* speculatively enqueue b0 to the current next frame */
432           bi0 = from[0];
433           to_next[0] = bi0;
434           from += 1;
435           to_next += 1;
436           n_left_from -= 1;
437           n_left_to_next -= 1;
438
439           b0 = vlib_get_buffer (vm, bi0);
440           ip40 = vlib_buffer_get_current (b0);
441
442           ctx0.b = b0;
443           ctx0.vm = vm;
444           ctx0.thread_index = thread_index;
445
446           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
447
448           proto0 = ip_proto_to_snat_proto (ip40->protocol);
449
450           if (PREDICT_FALSE (proto0 == ~0))
451             {
452               if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
453                 {
454                   next0 = NAT64_OUT2IN_NEXT_DROP;
455                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
456                 }
457               goto trace0;
458             }
459
460           if (PREDICT_FALSE (ip4_is_fragment (ip40)))
461             {
462               next0 = NAT64_OUT2IN_NEXT_REASS;
463               goto trace0;
464             }
465
466           if (proto0 == SNAT_PROTOCOL_ICMP)
467             {
468               if (icmp_to_icmp6
469                   (b0, nat64_out2in_icmp_set_cb, &ctx0,
470                    nat64_out2in_inner_icmp_set_cb, &ctx0))
471                 {
472                   next0 = NAT64_OUT2IN_NEXT_DROP;
473                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
474                   goto trace0;
475                 }
476             }
477           else
478             {
479               if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
480                 {
481                   udp0 = ip4_next_header (ip40);
482                   /*
483                    * Send DHCP packets to the ipv4 stack, or we won't
484                    * be able to use dhcp client on the outside interface
485                    */
486                   if ((proto0 == SNAT_PROTOCOL_UDP)
487                       && (udp0->dst_port ==
488                           clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))
489                     {
490                       next0 = NAT64_OUT2IN_NEXT_IP4_LOOKUP;
491                       goto trace0;
492                     }
493                   next0 = NAT64_OUT2IN_NEXT_DROP;
494                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
495                   goto trace0;
496                 }
497             }
498
499         trace0:
500           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
501                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
502             {
503               nat64_out2in_trace_t *t =
504                 vlib_add_trace (vm, node, b0, sizeof (*t));
505               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
506               t->next_index = next0;
507             }
508
509           pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
510
511           /* verify speculative enqueue, maybe switch current next frame */
512           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
513                                            n_left_to_next, bi0, next0);
514         }
515       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
516     }
517   vlib_node_increment_counter (vm, nat64_out2in_node.index,
518                                NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
519                                pkts_processed);
520   return frame->n_vectors;
521 }
522
523 /* *INDENT-OFF* */
524 VLIB_REGISTER_NODE (nat64_out2in_node) = {
525   .function = nat64_out2in_node_fn,
526   .name = "nat64-out2in",
527   .vector_size = sizeof (u32),
528   .format_trace = format_nat64_out2in_trace,
529   .type = VLIB_NODE_TYPE_INTERNAL,
530   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
531   .error_strings = nat64_out2in_error_strings,
532   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
533   /* edit / add dispositions here */
534   .next_nodes = {
535     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
536     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
537     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
538     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
539   },
540 };
541 /* *INDENT-ON* */
542
543 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
544
545 typedef struct nat64_out2in_frag_set_ctx_t_
546 {
547   vlib_main_t *vm;
548   vlib_buffer_t *b;
549   u32 sess_index;
550   u32 thread_index;
551   u8 proto;
552   u8 first_frag;
553 } nat64_out2in_frag_set_ctx_t;
554
555 static int
556 nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
557 {
558   nat64_main_t *nm = &nat64_main;
559   nat64_out2in_frag_set_ctx_t *ctx = arg;
560   nat64_db_st_entry_t *ste;
561   nat64_db_bib_entry_t *bibe;
562   udp_header_t *udp = ip4_next_header (ip4);
563   ip_csum_t csum;
564   u16 *checksum;
565   nat64_db_t *db = &nm->db[ctx->thread_index];
566
567   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
568   if (!ste)
569     return -1;
570
571   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
572   if (!bibe)
573     return -1;
574
575   nat64_session_reset_timeout (ste, ctx->vm);
576
577   if (ctx->first_frag)
578     {
579       udp->dst_port = bibe->in_port;
580
581       if (ip4->protocol == IP_PROTOCOL_UDP)
582         {
583           checksum = &udp->checksum;
584
585           if (!checksum)
586             {
587               u16 udp_len =
588                 clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
589               csum = ip_incremental_checksum (0, udp, udp_len);
590               csum =
591                 ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
592               csum =
593                 ip_csum_with_carry (csum,
594                                     clib_host_to_net_u16 (IP_PROTOCOL_UDP));
595               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
596               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
597               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
598               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
599               *checksum = ~ip_csum_fold (csum);
600             }
601           else
602             {
603               csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
604               csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
605               csum = ip_csum_sub_even (csum, bibe->out_port);
606               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
607               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
608               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
609               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
610               csum = ip_csum_add_even (csum, bibe->in_port);
611               *checksum = ip_csum_fold (csum);
612             }
613         }
614       else
615         {
616           tcp_header_t *tcp = ip4_next_header (ip4);
617           checksum = &tcp->checksum;
618           csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
619           csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
620           csum = ip_csum_sub_even (csum, bibe->out_port);
621           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
622           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
623           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
624           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
625           csum = ip_csum_add_even (csum, bibe->in_port);
626           *checksum = ip_csum_fold (csum);
627         }
628
629     }
630
631   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
632   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
633
634   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
635   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
636
637   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
638
639   return 0;
640 }
641
642 static uword
643 nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
644                             vlib_frame_t * frame)
645 {
646   u32 n_left_from, *from, *to_next;
647   nat64_out2in_next_t next_index;
648   u32 pkts_processed = 0;
649   u32 *fragments_to_drop = 0;
650   u32 *fragments_to_loopback = 0;
651   nat64_main_t *nm = &nat64_main;
652   u32 thread_index = vlib_get_thread_index ();
653
654   from = vlib_frame_vector_args (frame);
655   n_left_from = frame->n_vectors;
656   next_index = node->cached_next_index;
657
658   while (n_left_from > 0)
659     {
660       u32 n_left_to_next;
661
662       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
663
664       while (n_left_from > 0 && n_left_to_next > 0)
665         {
666           u32 bi0;
667           vlib_buffer_t *b0;
668           u32 next0;
669           ip4_header_t *ip40;
670           u8 cached0 = 0;
671           u32 sw_if_index0, fib_index0;
672           udp_header_t *udp0;
673           nat_reass_ip4_t *reass0;
674           ip46_address_t saddr0, daddr0;
675           nat64_db_st_entry_t *ste0;
676           nat64_db_bib_entry_t *bibe0;
677           ip6_address_t ip6_saddr0;
678           nat64_out2in_frag_set_ctx_t ctx0;
679           nat64_db_t *db = &nm->db[thread_index];
680
681           /* speculatively enqueue b0 to the current next frame */
682           bi0 = from[0];
683           to_next[0] = bi0;
684           from += 1;
685           to_next += 1;
686           n_left_from -= 1;
687           n_left_to_next -= 1;
688
689           b0 = vlib_get_buffer (vm, bi0);
690           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
691
692           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
693           fib_index0 =
694             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
695                                                  sw_if_index0);
696
697           ctx0.thread_index = thread_index;
698
699           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
700             {
701               next0 = NAT64_OUT2IN_NEXT_DROP;
702               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
703               goto trace0;
704             }
705
706           ip40 = vlib_buffer_get_current (b0);
707
708           if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
709                                || ip40->protocol == IP_PROTOCOL_UDP)))
710             {
711               next0 = NAT64_OUT2IN_NEXT_DROP;
712               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
713               goto trace0;
714             }
715
716           udp0 = ip4_next_header (ip40);
717
718           reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
719                                                  ip40->dst_address,
720                                                  ip40->fragment_id,
721                                                  ip40->protocol,
722                                                  1, &fragments_to_drop);
723
724           if (PREDICT_FALSE (!reass0))
725             {
726               next0 = NAT64_OUT2IN_NEXT_DROP;
727               b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
728               goto trace0;
729             }
730
731           if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
732             {
733               ctx0.first_frag = 1;
734
735               memset (&saddr0, 0, sizeof (saddr0));
736               saddr0.ip4.as_u32 = ip40->src_address.as_u32;
737               memset (&daddr0, 0, sizeof (daddr0));
738               daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
739
740               ste0 =
741                 nat64_db_st_entry_find (db, &daddr0, &saddr0,
742                                         udp0->dst_port, udp0->src_port,
743                                         ip40->protocol, fib_index0, 0);
744               if (!ste0)
745                 {
746                   bibe0 =
747                     nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port,
748                                              ip40->protocol, fib_index0, 0);
749                   if (!bibe0)
750                     {
751                       next0 = NAT64_OUT2IN_NEXT_DROP;
752                       b0->error =
753                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
754                       goto trace0;
755                     }
756
757                   nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
758                                      bibe0->fib_index);
759                   ste0 =
760                     nat64_db_st_entry_create (db, bibe0, &ip6_saddr0,
761                                               &saddr0.ip4, udp0->src_port);
762
763                   if (!ste0)
764                     {
765                       next0 = NAT64_OUT2IN_NEXT_DROP;
766                       b0->error =
767                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
768                       goto trace0;
769                     }
770                 }
771               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
772               reass0->thread_index = thread_index;
773
774               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
775             }
776           else
777             {
778               ctx0.first_frag = 0;
779
780               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
781                 {
782                   if (nat_ip4_reass_add_fragment (reass0, bi0))
783                     {
784                       b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
785                       next0 = NAT64_OUT2IN_NEXT_DROP;
786                       goto trace0;
787                     }
788                   cached0 = 1;
789                   goto trace0;
790                 }
791             }
792
793           ctx0.sess_index = reass0->sess_index;
794           ctx0.proto = ip40->protocol;
795           ctx0.vm = vm;
796           ctx0.b = b0;
797
798           if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
799             {
800               next0 = NAT64_OUT2IN_NEXT_DROP;
801               b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
802               goto trace0;
803             }
804
805         trace0:
806           if (PREDICT_FALSE
807               ((node->flags & VLIB_NODE_FLAG_TRACE)
808                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
809             {
810               nat64_out2in_reass_trace_t *t =
811                 vlib_add_trace (vm, node, b0, sizeof (*t));
812               t->cached = cached0;
813               t->sw_if_index = sw_if_index0;
814               t->next_index = next0;
815             }
816
817           if (cached0)
818             {
819               n_left_to_next++;
820               to_next--;
821             }
822           else
823             {
824               pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
825
826               /* verify speculative enqueue, maybe switch current next frame */
827               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
828                                                to_next, n_left_to_next,
829                                                bi0, next0);
830             }
831
832           if (n_left_from == 0 && vec_len (fragments_to_loopback))
833             {
834               from = vlib_frame_vector_args (frame);
835               u32 len = vec_len (fragments_to_loopback);
836               if (len <= VLIB_FRAME_SIZE)
837                 {
838                   clib_memcpy (from, fragments_to_loopback,
839                                sizeof (u32) * len);
840                   n_left_from = len;
841                   vec_reset_length (fragments_to_loopback);
842                 }
843               else
844                 {
845                   clib_memcpy (from,
846                                fragments_to_loopback + (len -
847                                                         VLIB_FRAME_SIZE),
848                                sizeof (u32) * VLIB_FRAME_SIZE);
849                   n_left_from = VLIB_FRAME_SIZE;
850                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
851                 }
852             }
853         }
854
855       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
856     }
857
858   vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
859                                NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
860                                pkts_processed);
861
862   nat_send_all_to_node (vm, fragments_to_drop, node,
863                         &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
864                         NAT64_OUT2IN_NEXT_DROP);
865
866   vec_free (fragments_to_drop);
867   vec_free (fragments_to_loopback);
868   return frame->n_vectors;
869 }
870
871 /* *INDENT-OFF* */
872 VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
873   .function = nat64_out2in_reass_node_fn,
874   .name = "nat64-out2in-reass",
875   .vector_size = sizeof (u32),
876   .format_trace = format_nat64_out2in_reass_trace,
877   .type = VLIB_NODE_TYPE_INTERNAL,
878   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
879   .error_strings = nat64_out2in_error_strings,
880   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
881   /* edit / add dispositions here */
882   .next_nodes = {
883     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
884     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
885     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
886     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
887   },
888 };
889 /* *INDENT-ON* */
890
891 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_reass_node,
892                               nat64_out2in_reass_node_fn);
893
894 typedef struct
895 {
896   u32 next_worker_index;
897   u8 do_handoff;
898 } nat64_out2in_handoff_trace_t;
899
900 static u8 *
901 format_nat64_out2in_handoff_trace (u8 * s, va_list * args)
902 {
903   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
904   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
905   nat64_out2in_handoff_trace_t *t =
906     va_arg (*args, nat64_out2in_handoff_trace_t *);
907   char *m;
908
909   m = t->do_handoff ? "next worker" : "same worker";
910   s = format (s, "NAT64-OUT2IN-HANDOFF: %s %d", m, t->next_worker_index);
911
912   return s;
913 }
914
915 static inline uword
916 nat64_out2in_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
917                               vlib_frame_t * frame)
918 {
919   nat64_main_t *nm = &nat64_main;
920   vlib_thread_main_t *tm = vlib_get_thread_main ();
921   u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
922   static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
923   static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
924     = 0;
925   vlib_frame_queue_elt_t *hf = 0;
926   vlib_frame_queue_t *fq;
927   vlib_frame_t *f = 0, *d = 0;
928   int i;
929   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
930   u32 next_worker_index = 0;
931   u32 current_worker_index = ~0;
932   u32 thread_index = vlib_get_thread_index ();
933   u32 fq_index;
934   u32 to_node_index;
935
936   fq_index = nm->fq_out2in_index;
937   to_node_index = nat64_out2in_node.index;
938
939   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
940     {
941       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
942
943       vec_validate_init_empty (congested_handoff_queue_by_worker_index,
944                                tm->n_vlib_mains - 1,
945                                (vlib_frame_queue_t *) (~0));
946     }
947
948   from = vlib_frame_vector_args (frame);
949   n_left_from = frame->n_vectors;
950
951   while (n_left_from > 0)
952     {
953       u32 bi0;
954       vlib_buffer_t *b0;
955       ip4_header_t *ip0;
956       u8 do_handoff;
957
958       bi0 = from[0];
959       from += 1;
960       n_left_from -= 1;
961
962       b0 = vlib_get_buffer (vm, bi0);
963
964       ip0 = vlib_buffer_get_current (b0);
965
966       next_worker_index = nat64_get_worker_out2in (ip0);
967
968       if (PREDICT_FALSE (next_worker_index != thread_index))
969         {
970           do_handoff = 1;
971
972           if (next_worker_index != current_worker_index)
973             {
974               fq =
975                 is_vlib_frame_queue_congested (fq_index, next_worker_index,
976                                                30,
977                                                congested_handoff_queue_by_worker_index);
978
979               if (fq)
980                 {
981                   /* if this is 1st frame */
982                   if (!d)
983                     {
984                       d = vlib_get_frame_to_node (vm, nm->error_node_index);
985                       to_next_drop = vlib_frame_vector_args (d);
986                     }
987
988                   to_next_drop[0] = bi0;
989                   to_next_drop += 1;
990                   d->n_vectors++;
991                   goto trace0;
992                 }
993
994               if (hf)
995                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
996
997               hf =
998                 vlib_get_worker_handoff_queue_elt (fq_index,
999                                                    next_worker_index,
1000                                                    handoff_queue_elt_by_worker_index);
1001               n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
1002               to_next_worker = &hf->buffer_index[hf->n_vectors];
1003               current_worker_index = next_worker_index;
1004             }
1005
1006           ASSERT (to_next_worker != 0);
1007
1008           /* enqueue to correct worker thread */
1009           to_next_worker[0] = bi0;
1010           to_next_worker++;
1011           n_left_to_next_worker--;
1012
1013           if (n_left_to_next_worker == 0)
1014             {
1015               hf->n_vectors = VLIB_FRAME_SIZE;
1016               vlib_put_frame_queue_elt (hf);
1017               current_worker_index = ~0;
1018               handoff_queue_elt_by_worker_index[next_worker_index] = 0;
1019               hf = 0;
1020             }
1021         }
1022       else
1023         {
1024           do_handoff = 0;
1025           /* if this is 1st frame */
1026           if (!f)
1027             {
1028               f = vlib_get_frame_to_node (vm, to_node_index);
1029               to_next = vlib_frame_vector_args (f);
1030             }
1031
1032           to_next[0] = bi0;
1033           to_next += 1;
1034           f->n_vectors++;
1035         }
1036
1037     trace0:
1038       if (PREDICT_FALSE
1039           ((node->flags & VLIB_NODE_FLAG_TRACE)
1040            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1041         {
1042           nat64_out2in_handoff_trace_t *t =
1043             vlib_add_trace (vm, node, b0, sizeof (*t));
1044           t->next_worker_index = next_worker_index;
1045           t->do_handoff = do_handoff;
1046         }
1047     }
1048
1049   if (f)
1050     vlib_put_frame_to_node (vm, to_node_index, f);
1051
1052   if (d)
1053     vlib_put_frame_to_node (vm, nm->error_node_index, d);
1054
1055   if (hf)
1056     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
1057
1058   /* Ship frames to the worker nodes */
1059   for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
1060     {
1061       if (handoff_queue_elt_by_worker_index[i])
1062         {
1063           hf = handoff_queue_elt_by_worker_index[i];
1064           /*
1065            * It works better to let the handoff node
1066            * rate-adapt, always ship the handoff queue element.
1067            */
1068           if (1 || hf->n_vectors == hf->last_n_vectors)
1069             {
1070               vlib_put_frame_queue_elt (hf);
1071               handoff_queue_elt_by_worker_index[i] = 0;
1072             }
1073           else
1074             hf->last_n_vectors = hf->n_vectors;
1075         }
1076       congested_handoff_queue_by_worker_index[i] =
1077         (vlib_frame_queue_t *) (~0);
1078     }
1079   hf = 0;
1080   current_worker_index = ~0;
1081   return frame->n_vectors;
1082 }
1083
1084 /* *INDENT-OFF* */
1085 VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
1086   .function = nat64_out2in_handoff_node_fn,
1087   .name = "nat64-out2in-handoff",
1088   .vector_size = sizeof (u32),
1089   .format_trace = format_nat64_out2in_handoff_trace,
1090   .type = VLIB_NODE_TYPE_INTERNAL,
1091
1092   .n_next_nodes = 1,
1093
1094   .next_nodes = {
1095     [0] = "error-drop",
1096   },
1097 };
1098 /* *INDENT-ON* */
1099
1100 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_handoff_node,
1101                               nat64_out2in_handoff_node_fn);
1102 /*
1103  * fd.io coding-style-patch-verification: ON
1104  *
1105  * Local Variables:
1106  * eval: (c-set-style "gnu")
1107  * End:
1108  */