ba35de1e2a1b33af0425a593690bbe7dd0f6ce9e
[vpp.git] / src / plugins / nat / nat64_out2in.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv4 to IPv6 translation (otside to inside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/ip/ip4_to_ip6.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp.h>
26
27 typedef struct
28 {
29   u32 sw_if_index;
30   u32 next_index;
31 } nat64_out2in_trace_t;
32
33 static u8 *
34 format_nat64_out2in_trace (u8 * s, va_list * args)
35 {
36   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38   nat64_out2in_trace_t *t = va_arg (*args, nat64_out2in_trace_t *);
39
40   s =
41     format (s, "NAT64-out2in: sw_if_index %d, next index %d", t->sw_if_index,
42             t->next_index);
43
44   return s;
45 }
46
47 typedef struct
48 {
49   u32 sw_if_index;
50   u32 next_index;
51   u8 cached;
52 } nat64_out2in_reass_trace_t;
53
54 static u8 *
55 format_nat64_out2in_reass_trace (u8 * s, va_list * args)
56 {
57   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
58   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
59   nat64_out2in_reass_trace_t *t =
60     va_arg (*args, nat64_out2in_reass_trace_t *);
61
62   s =
63     format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
64             t->sw_if_index, t->next_index,
65             t->cached ? "cached" : "translated");
66
67   return s;
68 }
69
70 vlib_node_registration_t nat64_out2in_node;
71 vlib_node_registration_t nat64_out2in_reass_node;
72 vlib_node_registration_t nat64_out2in_handoff_node;
73
74 #define foreach_nat64_out2in_error                       \
75 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
76 _(OUT2IN_PACKETS, "good out2in packets processed")       \
77 _(NO_TRANSLATION, "no translation")                      \
78 _(UNKNOWN, "unknown")                                    \
79 _(DROP_FRAGMENT, "drop fragment")                        \
80 _(MAX_REASS, "maximum reassemblies exceeded")            \
81 _(MAX_FRAG, "maximum fragments per reassembly exceeded") \
82 _(TCP_PACKETS, "TCP packets")                            \
83 _(UDP_PACKETS, "UDP packets")                            \
84 _(ICMP_PACKETS, "ICMP packets")                          \
85 _(OTHER_PACKETS, "other protocol packets")               \
86 _(FRAGMENTS, "fragments")                                \
87 _(CACHED_FRAGMENTS, "cached fragments")                  \
88 _(PROCESSED_FRAGMENTS, "processed fragments")
89
90
91 typedef enum
92 {
93 #define _(sym,str) NAT64_OUT2IN_ERROR_##sym,
94   foreach_nat64_out2in_error
95 #undef _
96     NAT64_OUT2IN_N_ERROR,
97 } nat64_out2in_error_t;
98
99 static char *nat64_out2in_error_strings[] = {
100 #define _(sym,string) string,
101   foreach_nat64_out2in_error
102 #undef _
103 };
104
105 typedef enum
106 {
107   NAT64_OUT2IN_NEXT_IP6_LOOKUP,
108   NAT64_OUT2IN_NEXT_IP4_LOOKUP,
109   NAT64_OUT2IN_NEXT_DROP,
110   NAT64_OUT2IN_NEXT_REASS,
111   NAT64_OUT2IN_N_NEXT,
112 } nat64_out2in_next_t;
113
114 typedef struct nat64_out2in_set_ctx_t_
115 {
116   vlib_buffer_t *b;
117   vlib_main_t *vm;
118   u32 thread_index;
119 } nat64_out2in_set_ctx_t;
120
121 static int
122 nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
123                              void *arg)
124 {
125   nat64_main_t *nm = &nat64_main;
126   nat64_out2in_set_ctx_t *ctx = arg;
127   nat64_db_bib_entry_t *bibe;
128   nat64_db_st_entry_t *ste;
129   ip46_address_t saddr, daddr;
130   ip6_address_t ip6_saddr;
131   udp_header_t *udp = ip4_next_header (ip4);
132   tcp_header_t *tcp = ip4_next_header (ip4);
133   u8 proto = ip4->protocol;
134   u16 dport = udp->dst_port;
135   u16 sport = udp->src_port;
136   u32 sw_if_index, fib_index;
137   u16 *checksum;
138   ip_csum_t csum;
139   nat64_db_t *db = &nm->db[ctx->thread_index];
140
141   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
142   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
143
144   clib_memset (&saddr, 0, sizeof (saddr));
145   saddr.ip4.as_u32 = ip4->src_address.as_u32;
146   clib_memset (&daddr, 0, sizeof (daddr));
147   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
148
149   ste =
150     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
151                             fib_index, 0);
152   if (ste)
153     {
154       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
155       if (!bibe)
156         return -1;
157     }
158   else
159     {
160       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, fib_index, 0);
161
162       if (!bibe)
163         return -1;
164
165       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
166       ste =
167         nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, sport);
168     }
169
170   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
171   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
172
173   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
174   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
175   udp->dst_port = bibe->in_port;
176
177   if (proto == IP_PROTOCOL_UDP)
178     checksum = &udp->checksum;
179   else
180     {
181       checksum = &tcp->checksum;
182       nat64_tcp_session_set_state (ste, tcp, 0);
183     }
184
185   csum = ip_csum_sub_even (*checksum, dport);
186   csum = ip_csum_add_even (csum, udp->dst_port);
187   *checksum = ip_csum_fold (csum);
188
189   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
190
191   nat64_session_reset_timeout (ste, ctx->vm);
192
193   return 0;
194 }
195
196 static int
197 nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
198 {
199   nat64_main_t *nm = &nat64_main;
200   nat64_out2in_set_ctx_t *ctx = arg;
201   nat64_db_bib_entry_t *bibe;
202   nat64_db_st_entry_t *ste;
203   ip46_address_t saddr, daddr;
204   ip6_address_t ip6_saddr;
205   u32 sw_if_index, fib_index;
206   icmp46_header_t *icmp = ip4_next_header (ip4);
207   nat64_db_t *db = &nm->db[ctx->thread_index];
208
209   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
210   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
211
212   clib_memset (&saddr, 0, sizeof (saddr));
213   saddr.ip4.as_u32 = ip4->src_address.as_u32;
214   clib_memset (&daddr, 0, sizeof (daddr));
215   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
216
217   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
218     {
219       u16 out_id = ((u16 *) (icmp))[2];
220       ste =
221         nat64_db_st_entry_find (db, &daddr, &saddr, out_id, 0,
222                                 IP_PROTOCOL_ICMP, fib_index, 0);
223
224       if (ste)
225         {
226           bibe =
227             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
228                                          ste->bibe_index);
229           if (!bibe)
230             return -1;
231         }
232       else
233         {
234           bibe =
235             nat64_db_bib_entry_find (db, &daddr, out_id,
236                                      IP_PROTOCOL_ICMP, fib_index, 0);
237           if (!bibe)
238             return -1;
239
240           nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
241           ste =
242             nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
243         }
244
245       nat64_session_reset_timeout (ste, ctx->vm);
246
247       ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
248       ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
249
250       ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
251       ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
252       ((u16 *) (icmp))[2] = bibe->in_port;
253
254       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
255     }
256   else
257     {
258       ip6_header_t *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
259
260       nat64_compose_ip6 (&ip6->src_address, &ip4->src_address,
261                          vnet_buffer (ctx->b)->sw_if_index[VLIB_TX]);
262       ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
263       ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
264     }
265
266   return 0;
267 }
268
269 static int
270 nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
271                                 void *arg)
272 {
273   nat64_main_t *nm = &nat64_main;
274   nat64_out2in_set_ctx_t *ctx = arg;
275   nat64_db_bib_entry_t *bibe;
276   nat64_db_st_entry_t *ste;
277   ip46_address_t saddr, daddr;
278   u32 sw_if_index, fib_index;
279   u8 proto = ip4->protocol;
280   nat64_db_t *db = &nm->db[ctx->thread_index];
281
282   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
283   fib_index =
284     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
285
286   clib_memset (&saddr, 0, sizeof (saddr));
287   saddr.ip4.as_u32 = ip4->src_address.as_u32;
288   clib_memset (&daddr, 0, sizeof (daddr));
289   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
290
291   if (proto == IP_PROTOCOL_ICMP6)
292     {
293       icmp46_header_t *icmp = ip4_next_header (ip4);
294       u16 out_id = ((u16 *) (icmp))[2];
295       proto = IP_PROTOCOL_ICMP;
296
297       if (!
298           (icmp->type == ICMP6_echo_request
299            || icmp->type == ICMP6_echo_reply))
300         return -1;
301
302       ste =
303         nat64_db_st_entry_find (db, &saddr, &daddr, out_id, 0, proto,
304                                 fib_index, 0);
305       if (!ste)
306         return -1;
307
308       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
309       if (!bibe)
310         return -1;
311
312       ip6->dst_address.as_u64[0] = ste->in_r_addr.as_u64[0];
313       ip6->dst_address.as_u64[1] = ste->in_r_addr.as_u64[1];
314       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
315       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
316       ((u16 *) (icmp))[2] = bibe->in_port;
317
318       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
319     }
320   else
321     {
322       udp_header_t *udp = ip4_next_header (ip4);
323       tcp_header_t *tcp = ip4_next_header (ip4);
324       u16 dport = udp->dst_port;
325       u16 sport = udp->src_port;
326       u16 *checksum;
327       ip_csum_t csum;
328
329       ste =
330         nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
331                                 fib_index, 0);
332       if (!ste)
333         return -1;
334
335       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
336       if (!bibe)
337         return -1;
338
339       nat64_compose_ip6 (&ip6->dst_address, &daddr.ip4, bibe->fib_index);
340       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
341       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
342       udp->src_port = bibe->in_port;
343
344       if (proto == IP_PROTOCOL_UDP)
345         checksum = &udp->checksum;
346       else
347         checksum = &tcp->checksum;
348       if (*checksum)
349         {
350           csum = ip_csum_sub_even (*checksum, sport);
351           csum = ip_csum_add_even (csum, udp->src_port);
352           *checksum = ip_csum_fold (csum);
353         }
354
355       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
356     }
357
358   return 0;
359 }
360
361 static int
362 nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
363                                void *arg)
364 {
365   nat64_main_t *nm = &nat64_main;
366   nat64_out2in_set_ctx_t *ctx = arg;
367   nat64_db_bib_entry_t *bibe;
368   nat64_db_st_entry_t *ste;
369   ip46_address_t saddr, daddr;
370   ip6_address_t ip6_saddr;
371   u32 sw_if_index, fib_index;
372   u8 proto = ip4->protocol;
373   nat64_db_t *db = &nm->db[ctx->thread_index];
374
375   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
376   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
377
378   clib_memset (&saddr, 0, sizeof (saddr));
379   saddr.ip4.as_u32 = ip4->src_address.as_u32;
380   clib_memset (&daddr, 0, sizeof (daddr));
381   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
382
383   ste =
384     nat64_db_st_entry_find (db, &daddr, &saddr, 0, 0, proto, fib_index, 0);
385   if (ste)
386     {
387       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
388       if (!bibe)
389         return -1;
390     }
391   else
392     {
393       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, fib_index, 0);
394
395       if (!bibe)
396         return -1;
397
398       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
399       ste = nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
400     }
401
402   nat64_session_reset_timeout (ste, ctx->vm);
403
404   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
405   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
406
407   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
408   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
409
410   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
411
412   return 0;
413 }
414
415 static uword
416 nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
417                       vlib_frame_t * frame)
418 {
419   u32 n_left_from, *from, *to_next;
420   nat64_out2in_next_t next_index;
421   u32 pkts_processed = 0;
422   u32 thread_index = vm->thread_index;
423   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
424     0, fragments = 0;
425
426   from = vlib_frame_vector_args (frame);
427   n_left_from = frame->n_vectors;
428   next_index = node->cached_next_index;
429   while (n_left_from > 0)
430     {
431       u32 n_left_to_next;
432
433       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
434
435       while (n_left_from > 0 && n_left_to_next > 0)
436         {
437           u32 bi0;
438           vlib_buffer_t *b0;
439           u32 next0;
440           ip4_header_t *ip40;
441           u32 proto0;
442           nat64_out2in_set_ctx_t ctx0;
443           udp_header_t *udp0;
444
445           /* speculatively enqueue b0 to the current next frame */
446           bi0 = from[0];
447           to_next[0] = bi0;
448           from += 1;
449           to_next += 1;
450           n_left_from -= 1;
451           n_left_to_next -= 1;
452
453           b0 = vlib_get_buffer (vm, bi0);
454           ip40 = vlib_buffer_get_current (b0);
455
456           ctx0.b = b0;
457           ctx0.vm = vm;
458           ctx0.thread_index = thread_index;
459
460           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
461
462           proto0 = ip_proto_to_snat_proto (ip40->protocol);
463
464           if (PREDICT_FALSE (proto0 == ~0))
465             {
466               if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
467                 {
468                   next0 = NAT64_OUT2IN_NEXT_DROP;
469                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
470                 }
471               other_packets++;
472               goto trace0;
473             }
474
475           if (PREDICT_FALSE (ip4_is_fragment (ip40)))
476             {
477               next0 = NAT64_OUT2IN_NEXT_REASS;
478               fragments++;
479               goto trace0;
480             }
481
482           if (proto0 == SNAT_PROTOCOL_ICMP)
483             {
484               icmp_packets++;
485               if (icmp_to_icmp6
486                   (b0, nat64_out2in_icmp_set_cb, &ctx0,
487                    nat64_out2in_inner_icmp_set_cb, &ctx0))
488                 {
489                   next0 = NAT64_OUT2IN_NEXT_DROP;
490                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
491                   goto trace0;
492                 }
493             }
494           else
495             {
496               if (proto0 == SNAT_PROTOCOL_TCP)
497                 tcp_packets++;
498               else
499                 udp_packets++;
500
501               if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
502                 {
503                   udp0 = ip4_next_header (ip40);
504                   /*
505                    * Send DHCP packets to the ipv4 stack, or we won't
506                    * be able to use dhcp client on the outside interface
507                    */
508                   if ((proto0 == SNAT_PROTOCOL_UDP)
509                       && (udp0->dst_port ==
510                           clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))
511                     {
512                       next0 = NAT64_OUT2IN_NEXT_IP4_LOOKUP;
513                       goto trace0;
514                     }
515                   next0 = NAT64_OUT2IN_NEXT_DROP;
516                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
517                   goto trace0;
518                 }
519             }
520
521         trace0:
522           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
523                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
524             {
525               nat64_out2in_trace_t *t =
526                 vlib_add_trace (vm, node, b0, sizeof (*t));
527               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
528               t->next_index = next0;
529             }
530
531           pkts_processed += next0 == NAT64_OUT2IN_NEXT_IP6_LOOKUP;
532
533           /* verify speculative enqueue, maybe switch current next frame */
534           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
535                                            n_left_to_next, bi0, next0);
536         }
537       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
538     }
539   vlib_node_increment_counter (vm, nat64_out2in_node.index,
540                                NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
541                                pkts_processed);
542   vlib_node_increment_counter (vm, nat64_out2in_node.index,
543                                NAT64_OUT2IN_ERROR_TCP_PACKETS, tcp_packets);
544   vlib_node_increment_counter (vm, nat64_out2in_node.index,
545                                NAT64_OUT2IN_ERROR_UDP_PACKETS, tcp_packets);
546   vlib_node_increment_counter (vm, nat64_out2in_node.index,
547                                NAT64_OUT2IN_ERROR_ICMP_PACKETS, icmp_packets);
548   vlib_node_increment_counter (vm, nat64_out2in_node.index,
549                                NAT64_OUT2IN_ERROR_OTHER_PACKETS,
550                                other_packets);
551   vlib_node_increment_counter (vm, nat64_out2in_node.index,
552                                NAT64_OUT2IN_ERROR_FRAGMENTS, fragments);
553
554   return frame->n_vectors;
555 }
556
557 /* *INDENT-OFF* */
558 VLIB_REGISTER_NODE (nat64_out2in_node) = {
559   .function = nat64_out2in_node_fn,
560   .name = "nat64-out2in",
561   .vector_size = sizeof (u32),
562   .format_trace = format_nat64_out2in_trace,
563   .type = VLIB_NODE_TYPE_INTERNAL,
564   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
565   .error_strings = nat64_out2in_error_strings,
566   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
567   /* edit / add dispositions here */
568   .next_nodes = {
569     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
570     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
571     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
572     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
573   },
574 };
575 /* *INDENT-ON* */
576
577 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
578
579 typedef struct nat64_out2in_frag_set_ctx_t_
580 {
581   vlib_main_t *vm;
582   vlib_buffer_t *b;
583   u32 sess_index;
584   u32 thread_index;
585   u8 proto;
586   u8 first_frag;
587 } nat64_out2in_frag_set_ctx_t;
588
589 static int
590 nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
591 {
592   nat64_main_t *nm = &nat64_main;
593   nat64_out2in_frag_set_ctx_t *ctx = arg;
594   nat64_db_st_entry_t *ste;
595   nat64_db_bib_entry_t *bibe;
596   udp_header_t *udp = ip4_next_header (ip4);
597   ip_csum_t csum;
598   u16 *checksum;
599   nat64_db_t *db = &nm->db[ctx->thread_index];
600
601   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
602   if (!ste)
603     return -1;
604
605   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
606   if (!bibe)
607     return -1;
608
609   if (ctx->first_frag)
610     {
611       udp->dst_port = bibe->in_port;
612
613       if (ip4->protocol == IP_PROTOCOL_UDP)
614         {
615           checksum = &udp->checksum;
616
617           if (!checksum)
618             {
619               u16 udp_len =
620                 clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
621               csum = ip_incremental_checksum (0, udp, udp_len);
622               csum =
623                 ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
624               csum =
625                 ip_csum_with_carry (csum,
626                                     clib_host_to_net_u16 (IP_PROTOCOL_UDP));
627               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
628               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
629               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
630               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
631               *checksum = ~ip_csum_fold (csum);
632             }
633           else
634             {
635               csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
636               csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
637               csum = ip_csum_sub_even (csum, bibe->out_port);
638               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
639               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
640               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
641               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
642               csum = ip_csum_add_even (csum, bibe->in_port);
643               *checksum = ip_csum_fold (csum);
644             }
645         }
646       else
647         {
648           tcp_header_t *tcp = ip4_next_header (ip4);
649           nat64_tcp_session_set_state (ste, tcp, 0);
650           checksum = &tcp->checksum;
651           csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
652           csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
653           csum = ip_csum_sub_even (csum, bibe->out_port);
654           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
655           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
656           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
657           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
658           csum = ip_csum_add_even (csum, bibe->in_port);
659           *checksum = ip_csum_fold (csum);
660         }
661
662     }
663
664   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
665   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
666
667   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
668   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
669
670   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
671
672   nat64_session_reset_timeout (ste, ctx->vm);
673
674   return 0;
675 }
676
677 static uword
678 nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
679                             vlib_frame_t * frame)
680 {
681   u32 n_left_from, *from, *to_next;
682   nat64_out2in_next_t next_index;
683   u32 pkts_processed = 0, cached_fragments = 0;
684   u32 *fragments_to_drop = 0;
685   u32 *fragments_to_loopback = 0;
686   nat64_main_t *nm = &nat64_main;
687   u32 thread_index = vm->thread_index;
688
689   from = vlib_frame_vector_args (frame);
690   n_left_from = frame->n_vectors;
691   next_index = node->cached_next_index;
692
693   while (n_left_from > 0)
694     {
695       u32 n_left_to_next;
696
697       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
698
699       while (n_left_from > 0 && n_left_to_next > 0)
700         {
701           u32 bi0;
702           vlib_buffer_t *b0;
703           u32 next0;
704           ip4_header_t *ip40;
705           u8 cached0 = 0;
706           u32 sw_if_index0, fib_index0;
707           udp_header_t *udp0;
708           nat_reass_ip4_t *reass0;
709           ip46_address_t saddr0, daddr0;
710           nat64_db_st_entry_t *ste0;
711           nat64_db_bib_entry_t *bibe0;
712           ip6_address_t ip6_saddr0;
713           nat64_out2in_frag_set_ctx_t ctx0;
714           nat64_db_t *db = &nm->db[thread_index];
715
716           /* speculatively enqueue b0 to the current next frame */
717           bi0 = from[0];
718           to_next[0] = bi0;
719           from += 1;
720           to_next += 1;
721           n_left_from -= 1;
722           n_left_to_next -= 1;
723
724           b0 = vlib_get_buffer (vm, bi0);
725           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
726
727           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
728           fib_index0 =
729             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
730                                                  sw_if_index0);
731
732           ctx0.thread_index = thread_index;
733
734           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
735             {
736               next0 = NAT64_OUT2IN_NEXT_DROP;
737               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
738               goto trace0;
739             }
740
741           ip40 = vlib_buffer_get_current (b0);
742
743           if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
744                                || ip40->protocol == IP_PROTOCOL_UDP)))
745             {
746               next0 = NAT64_OUT2IN_NEXT_DROP;
747               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
748               goto trace0;
749             }
750
751           udp0 = ip4_next_header (ip40);
752
753           reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
754                                                  ip40->dst_address,
755                                                  ip40->fragment_id,
756                                                  ip40->protocol,
757                                                  1, &fragments_to_drop);
758
759           if (PREDICT_FALSE (!reass0))
760             {
761               next0 = NAT64_OUT2IN_NEXT_DROP;
762               b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
763               goto trace0;
764             }
765
766           if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
767             {
768               ctx0.first_frag = 1;
769
770               clib_memset (&saddr0, 0, sizeof (saddr0));
771               saddr0.ip4.as_u32 = ip40->src_address.as_u32;
772               clib_memset (&daddr0, 0, sizeof (daddr0));
773               daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
774
775               ste0 =
776                 nat64_db_st_entry_find (db, &daddr0, &saddr0,
777                                         udp0->dst_port, udp0->src_port,
778                                         ip40->protocol, fib_index0, 0);
779               if (!ste0)
780                 {
781                   bibe0 =
782                     nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port,
783                                              ip40->protocol, fib_index0, 0);
784                   if (!bibe0)
785                     {
786                       next0 = NAT64_OUT2IN_NEXT_DROP;
787                       b0->error =
788                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
789                       goto trace0;
790                     }
791
792                   nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
793                                      bibe0->fib_index);
794                   ste0 =
795                     nat64_db_st_entry_create (db, bibe0, &ip6_saddr0,
796                                               &saddr0.ip4, udp0->src_port);
797
798                   if (!ste0)
799                     {
800                       next0 = NAT64_OUT2IN_NEXT_DROP;
801                       b0->error =
802                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
803                       goto trace0;
804                     }
805                 }
806               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
807               reass0->thread_index = thread_index;
808
809               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
810             }
811           else
812             {
813               ctx0.first_frag = 0;
814
815               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
816                 {
817                   if (nat_ip4_reass_add_fragment
818                       (reass0, bi0, &fragments_to_drop))
819                     {
820                       b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
821                       next0 = NAT64_OUT2IN_NEXT_DROP;
822                       goto trace0;
823                     }
824                   cached0 = 1;
825                   goto trace0;
826                 }
827             }
828
829           ctx0.sess_index = reass0->sess_index;
830           ctx0.proto = ip40->protocol;
831           ctx0.vm = vm;
832           ctx0.b = b0;
833
834           if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
835             {
836               next0 = NAT64_OUT2IN_NEXT_DROP;
837               b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
838               goto trace0;
839             }
840
841         trace0:
842           if (PREDICT_FALSE
843               ((node->flags & VLIB_NODE_FLAG_TRACE)
844                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
845             {
846               nat64_out2in_reass_trace_t *t =
847                 vlib_add_trace (vm, node, b0, sizeof (*t));
848               t->cached = cached0;
849               t->sw_if_index = sw_if_index0;
850               t->next_index = next0;
851             }
852
853           if (cached0)
854             {
855               n_left_to_next++;
856               to_next--;
857               cached_fragments++;
858             }
859           else
860             {
861               pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
862
863               /* verify speculative enqueue, maybe switch current next frame */
864               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
865                                                to_next, n_left_to_next,
866                                                bi0, next0);
867             }
868
869           if (n_left_from == 0 && vec_len (fragments_to_loopback))
870             {
871               from = vlib_frame_vector_args (frame);
872               u32 len = vec_len (fragments_to_loopback);
873               if (len <= VLIB_FRAME_SIZE)
874                 {
875                   clib_memcpy_fast (from, fragments_to_loopback,
876                                     sizeof (u32) * len);
877                   n_left_from = len;
878                   vec_reset_length (fragments_to_loopback);
879                 }
880               else
881                 {
882                   clib_memcpy_fast (from, fragments_to_loopback +
883                                     (len - VLIB_FRAME_SIZE),
884                                     sizeof (u32) * VLIB_FRAME_SIZE);
885                   n_left_from = VLIB_FRAME_SIZE;
886                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
887                 }
888             }
889         }
890
891       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
892     }
893
894   vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
895                                NAT64_OUT2IN_ERROR_PROCESSED_FRAGMENTS,
896                                pkts_processed);
897   vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
898                                NAT64_OUT2IN_ERROR_CACHED_FRAGMENTS,
899                                cached_fragments);
900
901   nat_send_all_to_node (vm, fragments_to_drop, node,
902                         &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
903                         NAT64_OUT2IN_NEXT_DROP);
904
905   vec_free (fragments_to_drop);
906   vec_free (fragments_to_loopback);
907   return frame->n_vectors;
908 }
909
910 /* *INDENT-OFF* */
911 VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
912   .function = nat64_out2in_reass_node_fn,
913   .name = "nat64-out2in-reass",
914   .vector_size = sizeof (u32),
915   .format_trace = format_nat64_out2in_reass_trace,
916   .type = VLIB_NODE_TYPE_INTERNAL,
917   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
918   .error_strings = nat64_out2in_error_strings,
919   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
920   /* edit / add dispositions here */
921   .next_nodes = {
922     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
923     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
924     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
925     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
926   },
927 };
928 /* *INDENT-ON* */
929
930 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_reass_node,
931                               nat64_out2in_reass_node_fn);
932
933 #define foreach_nat64_out2in_handoff_error                       \
934 _(CONGESTION_DROP, "congestion drop")                            \
935 _(SAME_WORKER, "same worker")                                    \
936 _(DO_HANDOFF, "do handoff")
937
938 typedef enum
939 {
940 #define _(sym,str) NAT64_OUT2IN_HANDOFF_ERROR_##sym,
941   foreach_nat64_out2in_handoff_error
942 #undef _
943     NAT64_OUT2IN_HANDOFF_N_ERROR,
944 } nat64_out2in_handoff_error_t;
945
946 static char *nat64_out2in_handoff_error_strings[] = {
947 #define _(sym,string) string,
948   foreach_nat64_out2in_handoff_error
949 #undef _
950 };
951
952 typedef struct
953 {
954   u32 next_worker_index;
955 } nat64_out2in_handoff_trace_t;
956
957 static u8 *
958 format_nat64_out2in_handoff_trace (u8 * s, va_list * args)
959 {
960   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
961   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
962   nat64_out2in_handoff_trace_t *t =
963     va_arg (*args, nat64_out2in_handoff_trace_t *);
964
965   s =
966     format (s, "NAT64-OUT2IN-HANDOFF: next-worker %d", t->next_worker_index);
967
968   return s;
969 }
970
971 static inline uword
972 nat64_out2in_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
973                               vlib_frame_t * frame)
974 {
975   nat64_main_t *nm = &nat64_main;
976   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
977   u32 n_enq, n_left_from, *from;
978   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
979   u32 fq_index;
980   u32 thread_index = vm->thread_index;
981   u32 do_handoff = 0, same_worker = 0;
982
983   from = vlib_frame_vector_args (frame);
984   n_left_from = frame->n_vectors;
985   vlib_get_buffers (vm, from, bufs, n_left_from);
986
987   b = bufs;
988   ti = thread_indices;
989
990   fq_index = nm->fq_out2in_index;
991
992   while (n_left_from > 0)
993     {
994       ip4_header_t *ip0;
995
996       ip0 = vlib_buffer_get_current (b[0]);
997       ti[0] = nat64_get_worker_out2in (ip0);
998
999       if (ti[0] != thread_index)
1000         do_handoff++;
1001       else
1002         same_worker++;
1003
1004       if (PREDICT_FALSE
1005           ((node->flags & VLIB_NODE_FLAG_TRACE)
1006            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1007         {
1008           nat64_out2in_handoff_trace_t *t =
1009             vlib_add_trace (vm, node, b[0], sizeof (*t));
1010           t->next_worker_index = ti[0];
1011         }
1012
1013       n_left_from -= 1;
1014       ti += 1;
1015       b += 1;
1016     }
1017
1018   n_enq =
1019     vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1020                                    frame->n_vectors, 1);
1021
1022   if (n_enq < frame->n_vectors)
1023     vlib_node_increment_counter (vm, node->node_index,
1024                                  NAT64_OUT2IN_HANDOFF_ERROR_CONGESTION_DROP,
1025                                  frame->n_vectors - n_enq);
1026   vlib_node_increment_counter (vm, node->node_index,
1027                                NAT64_OUT2IN_HANDOFF_ERROR_SAME_WORKER,
1028                                same_worker);
1029   vlib_node_increment_counter (vm, node->node_index,
1030                                NAT64_OUT2IN_HANDOFF_ERROR_DO_HANDOFF,
1031                                do_handoff);
1032
1033   return frame->n_vectors;
1034 }
1035
1036 /* *INDENT-OFF* */
1037 VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
1038   .function = nat64_out2in_handoff_node_fn,
1039   .name = "nat64-out2in-handoff",
1040   .vector_size = sizeof (u32),
1041   .format_trace = format_nat64_out2in_handoff_trace,
1042   .type = VLIB_NODE_TYPE_INTERNAL,
1043   .n_errors = ARRAY_LEN(nat64_out2in_handoff_error_strings),
1044   .error_strings = nat64_out2in_handoff_error_strings,
1045
1046   .n_next_nodes = 1,
1047
1048   .next_nodes = {
1049     [0] = "error-drop",
1050   },
1051 };
1052 /* *INDENT-ON* */
1053
1054 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_handoff_node,
1055                               nat64_out2in_handoff_node_fn);
1056 /*
1057  * fd.io coding-style-patch-verification: ON
1058  *
1059  * Local Variables:
1060  * eval: (c-set-style "gnu")
1061  * End:
1062  */