NAT: VPP-1552 code migration from old multiarch scheme
[vpp.git] / src / plugins / nat / nat64_out2in.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv4 to IPv6 translation (otside to inside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/ip/ip4_to_ip6.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp.h>
26
27 typedef struct
28 {
29   u32 sw_if_index;
30   u32 next_index;
31 } nat64_out2in_trace_t;
32
33 static u8 *
34 format_nat64_out2in_trace (u8 * s, va_list * args)
35 {
36   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38   nat64_out2in_trace_t *t = va_arg (*args, nat64_out2in_trace_t *);
39
40   s =
41     format (s, "NAT64-out2in: sw_if_index %d, next index %d", t->sw_if_index,
42             t->next_index);
43
44   return s;
45 }
46
47 typedef struct
48 {
49   u32 sw_if_index;
50   u32 next_index;
51   u8 cached;
52 } nat64_out2in_reass_trace_t;
53
54 static u8 *
55 format_nat64_out2in_reass_trace (u8 * s, va_list * args)
56 {
57   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
58   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
59   nat64_out2in_reass_trace_t *t =
60     va_arg (*args, nat64_out2in_reass_trace_t *);
61
62   s =
63     format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
64             t->sw_if_index, t->next_index,
65             t->cached ? "cached" : "translated");
66
67   return s;
68 }
69
70
71 #define foreach_nat64_out2in_error                       \
72 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
73 _(OUT2IN_PACKETS, "good out2in packets processed")       \
74 _(NO_TRANSLATION, "no translation")                      \
75 _(UNKNOWN, "unknown")                                    \
76 _(DROP_FRAGMENT, "drop fragment")                        \
77 _(MAX_REASS, "maximum reassemblies exceeded")            \
78 _(MAX_FRAG, "maximum fragments per reassembly exceeded") \
79 _(TCP_PACKETS, "TCP packets")                            \
80 _(UDP_PACKETS, "UDP packets")                            \
81 _(ICMP_PACKETS, "ICMP packets")                          \
82 _(OTHER_PACKETS, "other protocol packets")               \
83 _(FRAGMENTS, "fragments")                                \
84 _(CACHED_FRAGMENTS, "cached fragments")                  \
85 _(PROCESSED_FRAGMENTS, "processed fragments")
86
87
88 typedef enum
89 {
90 #define _(sym,str) NAT64_OUT2IN_ERROR_##sym,
91   foreach_nat64_out2in_error
92 #undef _
93     NAT64_OUT2IN_N_ERROR,
94 } nat64_out2in_error_t;
95
96 static char *nat64_out2in_error_strings[] = {
97 #define _(sym,string) string,
98   foreach_nat64_out2in_error
99 #undef _
100 };
101
102 typedef enum
103 {
104   NAT64_OUT2IN_NEXT_IP6_LOOKUP,
105   NAT64_OUT2IN_NEXT_IP4_LOOKUP,
106   NAT64_OUT2IN_NEXT_DROP,
107   NAT64_OUT2IN_NEXT_REASS,
108   NAT64_OUT2IN_N_NEXT,
109 } nat64_out2in_next_t;
110
111 typedef struct nat64_out2in_set_ctx_t_
112 {
113   vlib_buffer_t *b;
114   vlib_main_t *vm;
115   u32 thread_index;
116 } nat64_out2in_set_ctx_t;
117
118 static int
119 nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
120                              void *arg)
121 {
122   nat64_main_t *nm = &nat64_main;
123   nat64_out2in_set_ctx_t *ctx = arg;
124   nat64_db_bib_entry_t *bibe;
125   nat64_db_st_entry_t *ste;
126   ip46_address_t saddr, daddr;
127   ip6_address_t ip6_saddr;
128   udp_header_t *udp = ip4_next_header (ip4);
129   tcp_header_t *tcp = ip4_next_header (ip4);
130   u8 proto = ip4->protocol;
131   u16 dport = udp->dst_port;
132   u16 sport = udp->src_port;
133   u32 sw_if_index, fib_index;
134   u16 *checksum;
135   ip_csum_t csum;
136   nat64_db_t *db = &nm->db[ctx->thread_index];
137
138   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
139   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
140
141   clib_memset (&saddr, 0, sizeof (saddr));
142   saddr.ip4.as_u32 = ip4->src_address.as_u32;
143   clib_memset (&daddr, 0, sizeof (daddr));
144   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
145
146   ste =
147     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
148                             fib_index, 0);
149   if (ste)
150     {
151       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
152       if (!bibe)
153         return -1;
154     }
155   else
156     {
157       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, fib_index, 0);
158
159       if (!bibe)
160         return -1;
161
162       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
163       ste =
164         nat64_db_st_entry_create (ctx->thread_index, db, bibe, &ip6_saddr,
165                                   &saddr.ip4, sport);
166
167       if (!ste)
168         return -1;
169
170       vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
171                                db->st.st_entries_num);
172     }
173
174   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
175   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
176
177   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
178   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
179   udp->dst_port = bibe->in_port;
180
181   if (proto == IP_PROTOCOL_UDP)
182     checksum = &udp->checksum;
183   else
184     {
185       checksum = &tcp->checksum;
186       nat64_tcp_session_set_state (ste, tcp, 0);
187     }
188
189   csum = ip_csum_sub_even (*checksum, dport);
190   csum = ip_csum_add_even (csum, udp->dst_port);
191   *checksum = ip_csum_fold (csum);
192
193   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
194
195   nat64_session_reset_timeout (ste, ctx->vm);
196
197   return 0;
198 }
199
200 static int
201 nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
202 {
203   nat64_main_t *nm = &nat64_main;
204   nat64_out2in_set_ctx_t *ctx = arg;
205   nat64_db_bib_entry_t *bibe;
206   nat64_db_st_entry_t *ste;
207   ip46_address_t saddr, daddr;
208   ip6_address_t ip6_saddr;
209   u32 sw_if_index, fib_index;
210   icmp46_header_t *icmp = ip4_next_header (ip4);
211   nat64_db_t *db = &nm->db[ctx->thread_index];
212
213   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
214   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
215
216   clib_memset (&saddr, 0, sizeof (saddr));
217   saddr.ip4.as_u32 = ip4->src_address.as_u32;
218   clib_memset (&daddr, 0, sizeof (daddr));
219   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
220
221   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
222     {
223       u16 out_id = ((u16 *) (icmp))[2];
224       ste =
225         nat64_db_st_entry_find (db, &daddr, &saddr, out_id, 0,
226                                 IP_PROTOCOL_ICMP, fib_index, 0);
227
228       if (ste)
229         {
230           bibe =
231             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
232                                          ste->bibe_index);
233           if (!bibe)
234             return -1;
235         }
236       else
237         {
238           bibe =
239             nat64_db_bib_entry_find (db, &daddr, out_id,
240                                      IP_PROTOCOL_ICMP, fib_index, 0);
241           if (!bibe)
242             return -1;
243
244           nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
245           ste =
246             nat64_db_st_entry_create (ctx->thread_index, db,
247                                       bibe, &ip6_saddr, &saddr.ip4, 0);
248
249           if (!ste)
250             return -1;
251
252           vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
253                                    db->st.st_entries_num);
254         }
255
256       nat64_session_reset_timeout (ste, ctx->vm);
257
258       ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
259       ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
260
261       ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
262       ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
263       ((u16 *) (icmp))[2] = bibe->in_port;
264
265       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
266     }
267   else
268     {
269       ip6_header_t *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
270
271       nat64_compose_ip6 (&ip6->src_address, &ip4->src_address,
272                          vnet_buffer (ctx->b)->sw_if_index[VLIB_TX]);
273       ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
274       ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
275     }
276
277   return 0;
278 }
279
280 static int
281 nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
282                                 void *arg)
283 {
284   nat64_main_t *nm = &nat64_main;
285   nat64_out2in_set_ctx_t *ctx = arg;
286   nat64_db_bib_entry_t *bibe;
287   nat64_db_st_entry_t *ste;
288   ip46_address_t saddr, daddr;
289   u32 sw_if_index, fib_index;
290   u8 proto = ip4->protocol;
291   nat64_db_t *db = &nm->db[ctx->thread_index];
292
293   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
294   fib_index =
295     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
296
297   clib_memset (&saddr, 0, sizeof (saddr));
298   saddr.ip4.as_u32 = ip4->src_address.as_u32;
299   clib_memset (&daddr, 0, sizeof (daddr));
300   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
301
302   if (proto == IP_PROTOCOL_ICMP6)
303     {
304       icmp46_header_t *icmp = ip4_next_header (ip4);
305       u16 out_id = ((u16 *) (icmp))[2];
306       proto = IP_PROTOCOL_ICMP;
307
308       if (!
309           (icmp->type == ICMP6_echo_request
310            || icmp->type == ICMP6_echo_reply))
311         return -1;
312
313       ste =
314         nat64_db_st_entry_find (db, &saddr, &daddr, out_id, 0, proto,
315                                 fib_index, 0);
316       if (!ste)
317         return -1;
318
319       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
320       if (!bibe)
321         return -1;
322
323       ip6->dst_address.as_u64[0] = ste->in_r_addr.as_u64[0];
324       ip6->dst_address.as_u64[1] = ste->in_r_addr.as_u64[1];
325       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
326       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
327       ((u16 *) (icmp))[2] = bibe->in_port;
328
329       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
330     }
331   else
332     {
333       udp_header_t *udp = ip4_next_header (ip4);
334       tcp_header_t *tcp = ip4_next_header (ip4);
335       u16 dport = udp->dst_port;
336       u16 sport = udp->src_port;
337       u16 *checksum;
338       ip_csum_t csum;
339
340       ste =
341         nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
342                                 fib_index, 0);
343       if (!ste)
344         return -1;
345
346       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
347       if (!bibe)
348         return -1;
349
350       nat64_compose_ip6 (&ip6->dst_address, &daddr.ip4, bibe->fib_index);
351       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
352       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
353       udp->src_port = bibe->in_port;
354
355       if (proto == IP_PROTOCOL_UDP)
356         checksum = &udp->checksum;
357       else
358         checksum = &tcp->checksum;
359       if (*checksum)
360         {
361           csum = ip_csum_sub_even (*checksum, sport);
362           csum = ip_csum_add_even (csum, udp->src_port);
363           *checksum = ip_csum_fold (csum);
364         }
365
366       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
367     }
368
369   return 0;
370 }
371
372 static int
373 nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
374                                void *arg)
375 {
376   nat64_main_t *nm = &nat64_main;
377   nat64_out2in_set_ctx_t *ctx = arg;
378   nat64_db_bib_entry_t *bibe;
379   nat64_db_st_entry_t *ste;
380   ip46_address_t saddr, daddr;
381   ip6_address_t ip6_saddr;
382   u32 sw_if_index, fib_index;
383   u8 proto = ip4->protocol;
384   nat64_db_t *db = &nm->db[ctx->thread_index];
385
386   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
387   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
388
389   clib_memset (&saddr, 0, sizeof (saddr));
390   saddr.ip4.as_u32 = ip4->src_address.as_u32;
391   clib_memset (&daddr, 0, sizeof (daddr));
392   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
393
394   ste =
395     nat64_db_st_entry_find (db, &daddr, &saddr, 0, 0, proto, fib_index, 0);
396   if (ste)
397     {
398       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
399       if (!bibe)
400         return -1;
401     }
402   else
403     {
404       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, fib_index, 0);
405
406       if (!bibe)
407         return -1;
408
409       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
410       ste = nat64_db_st_entry_create (ctx->thread_index, db,
411                                       bibe, &ip6_saddr, &saddr.ip4, 0);
412
413       if (!ste)
414         return -1;
415
416       vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
417                                db->st.st_entries_num);
418     }
419
420   nat64_session_reset_timeout (ste, ctx->vm);
421
422   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
423   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
424
425   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
426   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
427
428   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
429
430   return 0;
431 }
432
433 VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm,
434                                   vlib_node_runtime_t * node,
435                                   vlib_frame_t * frame)
436 {
437   u32 n_left_from, *from, *to_next;
438   nat64_out2in_next_t next_index;
439   nat64_main_t *nm = &nat64_main;
440   u32 pkts_processed = 0;
441   u32 thread_index = vm->thread_index;
442   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
443     0, fragments = 0;
444
445   from = vlib_frame_vector_args (frame);
446   n_left_from = frame->n_vectors;
447   next_index = node->cached_next_index;
448   while (n_left_from > 0)
449     {
450       u32 n_left_to_next;
451
452       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
453
454       while (n_left_from > 0 && n_left_to_next > 0)
455         {
456           u32 bi0;
457           vlib_buffer_t *b0;
458           u32 next0;
459           ip4_header_t *ip40;
460           u32 proto0;
461           nat64_out2in_set_ctx_t ctx0;
462           udp_header_t *udp0;
463
464           /* speculatively enqueue b0 to the current next frame */
465           bi0 = from[0];
466           to_next[0] = bi0;
467           from += 1;
468           to_next += 1;
469           n_left_from -= 1;
470           n_left_to_next -= 1;
471
472           b0 = vlib_get_buffer (vm, bi0);
473           ip40 = vlib_buffer_get_current (b0);
474
475           ctx0.b = b0;
476           ctx0.vm = vm;
477           ctx0.thread_index = thread_index;
478
479           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
480
481           proto0 = ip_proto_to_snat_proto (ip40->protocol);
482
483           if (PREDICT_FALSE (proto0 == ~0))
484             {
485               if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
486                 {
487                   next0 = NAT64_OUT2IN_NEXT_DROP;
488                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
489                 }
490               other_packets++;
491               goto trace0;
492             }
493
494           if (PREDICT_FALSE (ip4_is_fragment (ip40)))
495             {
496               next0 = NAT64_OUT2IN_NEXT_REASS;
497               fragments++;
498               goto trace0;
499             }
500
501           if (proto0 == SNAT_PROTOCOL_ICMP)
502             {
503               icmp_packets++;
504               if (icmp_to_icmp6
505                   (b0, nat64_out2in_icmp_set_cb, &ctx0,
506                    nat64_out2in_inner_icmp_set_cb, &ctx0))
507                 {
508                   next0 = NAT64_OUT2IN_NEXT_DROP;
509                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
510                   goto trace0;
511                 }
512             }
513           else
514             {
515               if (proto0 == SNAT_PROTOCOL_TCP)
516                 tcp_packets++;
517               else
518                 udp_packets++;
519
520               if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
521                 {
522                   udp0 = ip4_next_header (ip40);
523                   /*
524                    * Send DHCP packets to the ipv4 stack, or we won't
525                    * be able to use dhcp client on the outside interface
526                    */
527                   if ((proto0 == SNAT_PROTOCOL_UDP)
528                       && (udp0->dst_port ==
529                           clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))
530                     {
531                       next0 = NAT64_OUT2IN_NEXT_IP4_LOOKUP;
532                       goto trace0;
533                     }
534                   next0 = NAT64_OUT2IN_NEXT_DROP;
535                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
536                   goto trace0;
537                 }
538             }
539
540         trace0:
541           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
542                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
543             {
544               nat64_out2in_trace_t *t =
545                 vlib_add_trace (vm, node, b0, sizeof (*t));
546               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
547               t->next_index = next0;
548             }
549
550           pkts_processed += next0 == NAT64_OUT2IN_NEXT_IP6_LOOKUP;
551
552           /* verify speculative enqueue, maybe switch current next frame */
553           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
554                                            n_left_to_next, bi0, next0);
555         }
556       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
557     }
558   vlib_node_increment_counter (vm, nm->out2in_node_index,
559                                NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
560                                pkts_processed);
561   vlib_node_increment_counter (vm, nm->out2in_node_index,
562                                NAT64_OUT2IN_ERROR_TCP_PACKETS, tcp_packets);
563   vlib_node_increment_counter (vm, nm->out2in_node_index,
564                                NAT64_OUT2IN_ERROR_UDP_PACKETS, tcp_packets);
565   vlib_node_increment_counter (vm, nm->out2in_node_index,
566                                NAT64_OUT2IN_ERROR_ICMP_PACKETS, icmp_packets);
567   vlib_node_increment_counter (vm, nm->out2in_node_index,
568                                NAT64_OUT2IN_ERROR_OTHER_PACKETS,
569                                other_packets);
570   vlib_node_increment_counter (vm, nm->out2in_node_index,
571                                NAT64_OUT2IN_ERROR_FRAGMENTS, fragments);
572
573   return frame->n_vectors;
574 }
575
576 /* *INDENT-OFF* */
577 VLIB_REGISTER_NODE (nat64_out2in_node) = {
578   .name = "nat64-out2in",
579   .vector_size = sizeof (u32),
580   .format_trace = format_nat64_out2in_trace,
581   .type = VLIB_NODE_TYPE_INTERNAL,
582   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
583   .error_strings = nat64_out2in_error_strings,
584   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
585   /* edit / add dispositions here */
586   .next_nodes = {
587     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
588     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
589     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
590     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
591   },
592 };
593 /* *INDENT-ON* */
594
595 typedef struct nat64_out2in_frag_set_ctx_t_
596 {
597   vlib_main_t *vm;
598   vlib_buffer_t *b;
599   u32 sess_index;
600   u32 thread_index;
601   u8 proto;
602   u8 first_frag;
603 } nat64_out2in_frag_set_ctx_t;
604
605 static int
606 nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
607 {
608   nat64_main_t *nm = &nat64_main;
609   nat64_out2in_frag_set_ctx_t *ctx = arg;
610   nat64_db_st_entry_t *ste;
611   nat64_db_bib_entry_t *bibe;
612   udp_header_t *udp = ip4_next_header (ip4);
613   ip_csum_t csum;
614   u16 *checksum;
615   nat64_db_t *db = &nm->db[ctx->thread_index];
616
617   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
618   if (!ste)
619     return -1;
620
621   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
622   if (!bibe)
623     return -1;
624
625   if (ctx->first_frag)
626     {
627       udp->dst_port = bibe->in_port;
628
629       if (ip4->protocol == IP_PROTOCOL_UDP)
630         {
631           checksum = &udp->checksum;
632
633           if (!checksum)
634             {
635               u16 udp_len =
636                 clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
637               csum = ip_incremental_checksum (0, udp, udp_len);
638               csum =
639                 ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
640               csum =
641                 ip_csum_with_carry (csum,
642                                     clib_host_to_net_u16 (IP_PROTOCOL_UDP));
643               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
644               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
645               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
646               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
647               *checksum = ~ip_csum_fold (csum);
648             }
649           else
650             {
651               csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
652               csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
653               csum = ip_csum_sub_even (csum, bibe->out_port);
654               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
655               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
656               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
657               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
658               csum = ip_csum_add_even (csum, bibe->in_port);
659               *checksum = ip_csum_fold (csum);
660             }
661         }
662       else
663         {
664           tcp_header_t *tcp = ip4_next_header (ip4);
665           nat64_tcp_session_set_state (ste, tcp, 0);
666           checksum = &tcp->checksum;
667           csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
668           csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
669           csum = ip_csum_sub_even (csum, bibe->out_port);
670           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
671           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
672           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
673           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
674           csum = ip_csum_add_even (csum, bibe->in_port);
675           *checksum = ip_csum_fold (csum);
676         }
677
678     }
679
680   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
681   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
682
683   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
684   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
685
686   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
687
688   nat64_session_reset_timeout (ste, ctx->vm);
689
690   return 0;
691 }
692
693 VLIB_NODE_FN (nat64_out2in_reass_node) (vlib_main_t * vm,
694                                         vlib_node_runtime_t * node,
695                                         vlib_frame_t * frame)
696 {
697   u32 n_left_from, *from, *to_next;
698   nat64_out2in_next_t next_index;
699   u32 pkts_processed = 0, cached_fragments = 0;
700   u32 *fragments_to_drop = 0;
701   u32 *fragments_to_loopback = 0;
702   nat64_main_t *nm = &nat64_main;
703   u32 thread_index = vm->thread_index;
704
705   from = vlib_frame_vector_args (frame);
706   n_left_from = frame->n_vectors;
707   next_index = node->cached_next_index;
708
709   while (n_left_from > 0)
710     {
711       u32 n_left_to_next;
712
713       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
714
715       while (n_left_from > 0 && n_left_to_next > 0)
716         {
717           u32 bi0;
718           vlib_buffer_t *b0;
719           u32 next0;
720           ip4_header_t *ip40;
721           u8 cached0 = 0;
722           u32 sw_if_index0, fib_index0;
723           udp_header_t *udp0;
724           nat_reass_ip4_t *reass0;
725           ip46_address_t saddr0, daddr0;
726           nat64_db_st_entry_t *ste0;
727           nat64_db_bib_entry_t *bibe0;
728           ip6_address_t ip6_saddr0;
729           nat64_out2in_frag_set_ctx_t ctx0;
730           nat64_db_t *db = &nm->db[thread_index];
731
732           /* speculatively enqueue b0 to the current next frame */
733           bi0 = from[0];
734           to_next[0] = bi0;
735           from += 1;
736           to_next += 1;
737           n_left_from -= 1;
738           n_left_to_next -= 1;
739
740           b0 = vlib_get_buffer (vm, bi0);
741           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
742
743           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
744           fib_index0 =
745             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
746                                                  sw_if_index0);
747
748           ctx0.thread_index = thread_index;
749
750           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
751             {
752               next0 = NAT64_OUT2IN_NEXT_DROP;
753               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
754               goto trace0;
755             }
756
757           ip40 = vlib_buffer_get_current (b0);
758
759           if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
760                                || ip40->protocol == IP_PROTOCOL_UDP)))
761             {
762               next0 = NAT64_OUT2IN_NEXT_DROP;
763               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
764               goto trace0;
765             }
766
767           udp0 = ip4_next_header (ip40);
768
769           reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
770                                                  ip40->dst_address,
771                                                  ip40->fragment_id,
772                                                  ip40->protocol,
773                                                  1, &fragments_to_drop);
774
775           if (PREDICT_FALSE (!reass0))
776             {
777               next0 = NAT64_OUT2IN_NEXT_DROP;
778               b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
779               goto trace0;
780             }
781
782           if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
783             {
784               ctx0.first_frag = 1;
785
786               clib_memset (&saddr0, 0, sizeof (saddr0));
787               saddr0.ip4.as_u32 = ip40->src_address.as_u32;
788               clib_memset (&daddr0, 0, sizeof (daddr0));
789               daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
790
791               ste0 =
792                 nat64_db_st_entry_find (db, &daddr0, &saddr0,
793                                         udp0->dst_port, udp0->src_port,
794                                         ip40->protocol, fib_index0, 0);
795               if (!ste0)
796                 {
797                   bibe0 =
798                     nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port,
799                                              ip40->protocol, fib_index0, 0);
800                   if (!bibe0)
801                     {
802                       next0 = NAT64_OUT2IN_NEXT_DROP;
803                       b0->error =
804                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
805                       goto trace0;
806                     }
807
808                   nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
809                                      bibe0->fib_index);
810                   ste0 =
811                     nat64_db_st_entry_create (thread_index,
812                                               db, bibe0, &ip6_saddr0,
813                                               &saddr0.ip4, udp0->src_port);
814
815                   if (!ste0)
816                     {
817                       next0 = NAT64_OUT2IN_NEXT_DROP;
818                       b0->error =
819                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
820                       goto trace0;
821                     }
822
823                   vlib_set_simple_counter (&nm->total_sessions, thread_index,
824                                            0, db->st.st_entries_num);
825                 }
826               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
827               reass0->thread_index = thread_index;
828
829               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
830             }
831           else
832             {
833               ctx0.first_frag = 0;
834
835               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
836                 {
837                   if (nat_ip4_reass_add_fragment
838                       (thread_index, reass0, bi0, &fragments_to_drop))
839                     {
840                       b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
841                       next0 = NAT64_OUT2IN_NEXT_DROP;
842                       goto trace0;
843                     }
844                   cached0 = 1;
845                   goto trace0;
846                 }
847             }
848
849           ctx0.sess_index = reass0->sess_index;
850           ctx0.proto = ip40->protocol;
851           ctx0.vm = vm;
852           ctx0.b = b0;
853
854           if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
855             {
856               next0 = NAT64_OUT2IN_NEXT_DROP;
857               b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
858               goto trace0;
859             }
860
861         trace0:
862           if (PREDICT_FALSE
863               ((node->flags & VLIB_NODE_FLAG_TRACE)
864                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
865             {
866               nat64_out2in_reass_trace_t *t =
867                 vlib_add_trace (vm, node, b0, sizeof (*t));
868               t->cached = cached0;
869               t->sw_if_index = sw_if_index0;
870               t->next_index = next0;
871             }
872
873           if (cached0)
874             {
875               n_left_to_next++;
876               to_next--;
877               cached_fragments++;
878             }
879           else
880             {
881               pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
882
883               /* verify speculative enqueue, maybe switch current next frame */
884               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
885                                                to_next, n_left_to_next,
886                                                bi0, next0);
887             }
888
889           if (n_left_from == 0 && vec_len (fragments_to_loopback))
890             {
891               from = vlib_frame_vector_args (frame);
892               u32 len = vec_len (fragments_to_loopback);
893               if (len <= VLIB_FRAME_SIZE)
894                 {
895                   clib_memcpy_fast (from, fragments_to_loopback,
896                                     sizeof (u32) * len);
897                   n_left_from = len;
898                   vec_reset_length (fragments_to_loopback);
899                 }
900               else
901                 {
902                   clib_memcpy_fast (from, fragments_to_loopback +
903                                     (len - VLIB_FRAME_SIZE),
904                                     sizeof (u32) * VLIB_FRAME_SIZE);
905                   n_left_from = VLIB_FRAME_SIZE;
906                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
907                 }
908             }
909         }
910
911       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
912     }
913
914   vlib_node_increment_counter (vm, nm->out2in_reass_node_index,
915                                NAT64_OUT2IN_ERROR_PROCESSED_FRAGMENTS,
916                                pkts_processed);
917   vlib_node_increment_counter (vm, nm->out2in_reass_node_index,
918                                NAT64_OUT2IN_ERROR_CACHED_FRAGMENTS,
919                                cached_fragments);
920
921   nat_send_all_to_node (vm, fragments_to_drop, node,
922                         &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
923                         NAT64_OUT2IN_NEXT_DROP);
924
925   vec_free (fragments_to_drop);
926   vec_free (fragments_to_loopback);
927   return frame->n_vectors;
928 }
929
930 /* *INDENT-OFF* */
931 VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
932   .name = "nat64-out2in-reass",
933   .vector_size = sizeof (u32),
934   .format_trace = format_nat64_out2in_reass_trace,
935   .type = VLIB_NODE_TYPE_INTERNAL,
936   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
937   .error_strings = nat64_out2in_error_strings,
938   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
939   /* edit / add dispositions here */
940   .next_nodes = {
941     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
942     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
943     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
944     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
945   },
946 };
947 /* *INDENT-ON* */
948
949 #define foreach_nat64_out2in_handoff_error                       \
950 _(CONGESTION_DROP, "congestion drop")                            \
951 _(SAME_WORKER, "same worker")                                    \
952 _(DO_HANDOFF, "do handoff")
953
954 typedef enum
955 {
956 #define _(sym,str) NAT64_OUT2IN_HANDOFF_ERROR_##sym,
957   foreach_nat64_out2in_handoff_error
958 #undef _
959     NAT64_OUT2IN_HANDOFF_N_ERROR,
960 } nat64_out2in_handoff_error_t;
961
962 static char *nat64_out2in_handoff_error_strings[] = {
963 #define _(sym,string) string,
964   foreach_nat64_out2in_handoff_error
965 #undef _
966 };
967
968 typedef struct
969 {
970   u32 next_worker_index;
971 } nat64_out2in_handoff_trace_t;
972
973 static u8 *
974 format_nat64_out2in_handoff_trace (u8 * s, va_list * args)
975 {
976   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
977   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
978   nat64_out2in_handoff_trace_t *t =
979     va_arg (*args, nat64_out2in_handoff_trace_t *);
980
981   s =
982     format (s, "NAT64-OUT2IN-HANDOFF: next-worker %d", t->next_worker_index);
983
984   return s;
985 }
986
987 VLIB_NODE_FN (nat64_out2in_handoff_node) (vlib_main_t * vm,
988                                           vlib_node_runtime_t * node,
989                                           vlib_frame_t * frame)
990 {
991   nat64_main_t *nm = &nat64_main;
992   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
993   u32 n_enq, n_left_from, *from;
994   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
995   u32 fq_index;
996   u32 thread_index = vm->thread_index;
997   u32 do_handoff = 0, same_worker = 0;
998
999   from = vlib_frame_vector_args (frame);
1000   n_left_from = frame->n_vectors;
1001   vlib_get_buffers (vm, from, bufs, n_left_from);
1002
1003   b = bufs;
1004   ti = thread_indices;
1005
1006   fq_index = nm->fq_out2in_index;
1007
1008   while (n_left_from > 0)
1009     {
1010       ip4_header_t *ip0;
1011
1012       ip0 = vlib_buffer_get_current (b[0]);
1013       ti[0] = nat64_get_worker_out2in (ip0);
1014
1015       if (ti[0] != thread_index)
1016         do_handoff++;
1017       else
1018         same_worker++;
1019
1020       if (PREDICT_FALSE
1021           ((node->flags & VLIB_NODE_FLAG_TRACE)
1022            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1023         {
1024           nat64_out2in_handoff_trace_t *t =
1025             vlib_add_trace (vm, node, b[0], sizeof (*t));
1026           t->next_worker_index = ti[0];
1027         }
1028
1029       n_left_from -= 1;
1030       ti += 1;
1031       b += 1;
1032     }
1033
1034   n_enq =
1035     vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1036                                    frame->n_vectors, 1);
1037
1038   if (n_enq < frame->n_vectors)
1039     vlib_node_increment_counter (vm, node->node_index,
1040                                  NAT64_OUT2IN_HANDOFF_ERROR_CONGESTION_DROP,
1041                                  frame->n_vectors - n_enq);
1042   vlib_node_increment_counter (vm, node->node_index,
1043                                NAT64_OUT2IN_HANDOFF_ERROR_SAME_WORKER,
1044                                same_worker);
1045   vlib_node_increment_counter (vm, node->node_index,
1046                                NAT64_OUT2IN_HANDOFF_ERROR_DO_HANDOFF,
1047                                do_handoff);
1048
1049   return frame->n_vectors;
1050 }
1051
1052 /* *INDENT-OFF* */
1053 VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
1054   .name = "nat64-out2in-handoff",
1055   .vector_size = sizeof (u32),
1056   .format_trace = format_nat64_out2in_handoff_trace,
1057   .type = VLIB_NODE_TYPE_INTERNAL,
1058   .n_errors = ARRAY_LEN(nat64_out2in_handoff_error_strings),
1059   .error_strings = nat64_out2in_handoff_error_strings,
1060
1061   .n_next_nodes = 1,
1062
1063   .next_nodes = {
1064     [0] = "error-drop",
1065   },
1066 };
1067 /* *INDENT-ON* */
1068
1069 /*
1070  * fd.io coding-style-patch-verification: ON
1071  *
1072  * Local Variables:
1073  * eval: (c-set-style "gnu")
1074  * End:
1075  */