NAT: VPP-1537 IPFIX per worker processing
[vpp.git] / src / plugins / nat / nat64_out2in.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv4 to IPv6 translation (otside to inside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/ip/ip4_to_ip6.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp.h>
26
27 typedef struct
28 {
29   u32 sw_if_index;
30   u32 next_index;
31 } nat64_out2in_trace_t;
32
33 static u8 *
34 format_nat64_out2in_trace (u8 * s, va_list * args)
35 {
36   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38   nat64_out2in_trace_t *t = va_arg (*args, nat64_out2in_trace_t *);
39
40   s =
41     format (s, "NAT64-out2in: sw_if_index %d, next index %d", t->sw_if_index,
42             t->next_index);
43
44   return s;
45 }
46
47 typedef struct
48 {
49   u32 sw_if_index;
50   u32 next_index;
51   u8 cached;
52 } nat64_out2in_reass_trace_t;
53
54 static u8 *
55 format_nat64_out2in_reass_trace (u8 * s, va_list * args)
56 {
57   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
58   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
59   nat64_out2in_reass_trace_t *t =
60     va_arg (*args, nat64_out2in_reass_trace_t *);
61
62   s =
63     format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
64             t->sw_if_index, t->next_index,
65             t->cached ? "cached" : "translated");
66
67   return s;
68 }
69
70 vlib_node_registration_t nat64_out2in_node;
71 vlib_node_registration_t nat64_out2in_reass_node;
72 vlib_node_registration_t nat64_out2in_handoff_node;
73
74 #define foreach_nat64_out2in_error                       \
75 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
76 _(OUT2IN_PACKETS, "good out2in packets processed")       \
77 _(NO_TRANSLATION, "no translation")                      \
78 _(UNKNOWN, "unknown")                                    \
79 _(DROP_FRAGMENT, "drop fragment")                        \
80 _(MAX_REASS, "maximum reassemblies exceeded")            \
81 _(MAX_FRAG, "maximum fragments per reassembly exceeded") \
82 _(TCP_PACKETS, "TCP packets")                            \
83 _(UDP_PACKETS, "UDP packets")                            \
84 _(ICMP_PACKETS, "ICMP packets")                          \
85 _(OTHER_PACKETS, "other protocol packets")               \
86 _(FRAGMENTS, "fragments")                                \
87 _(CACHED_FRAGMENTS, "cached fragments")                  \
88 _(PROCESSED_FRAGMENTS, "processed fragments")
89
90
91 typedef enum
92 {
93 #define _(sym,str) NAT64_OUT2IN_ERROR_##sym,
94   foreach_nat64_out2in_error
95 #undef _
96     NAT64_OUT2IN_N_ERROR,
97 } nat64_out2in_error_t;
98
99 static char *nat64_out2in_error_strings[] = {
100 #define _(sym,string) string,
101   foreach_nat64_out2in_error
102 #undef _
103 };
104
105 typedef enum
106 {
107   NAT64_OUT2IN_NEXT_IP6_LOOKUP,
108   NAT64_OUT2IN_NEXT_IP4_LOOKUP,
109   NAT64_OUT2IN_NEXT_DROP,
110   NAT64_OUT2IN_NEXT_REASS,
111   NAT64_OUT2IN_N_NEXT,
112 } nat64_out2in_next_t;
113
114 typedef struct nat64_out2in_set_ctx_t_
115 {
116   vlib_buffer_t *b;
117   vlib_main_t *vm;
118   u32 thread_index;
119 } nat64_out2in_set_ctx_t;
120
121 static int
122 nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
123                              void *arg)
124 {
125   nat64_main_t *nm = &nat64_main;
126   nat64_out2in_set_ctx_t *ctx = arg;
127   nat64_db_bib_entry_t *bibe;
128   nat64_db_st_entry_t *ste;
129   ip46_address_t saddr, daddr;
130   ip6_address_t ip6_saddr;
131   udp_header_t *udp = ip4_next_header (ip4);
132   tcp_header_t *tcp = ip4_next_header (ip4);
133   u8 proto = ip4->protocol;
134   u16 dport = udp->dst_port;
135   u16 sport = udp->src_port;
136   u32 sw_if_index, fib_index;
137   u16 *checksum;
138   ip_csum_t csum;
139   nat64_db_t *db = &nm->db[ctx->thread_index];
140
141   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
142   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
143
144   clib_memset (&saddr, 0, sizeof (saddr));
145   saddr.ip4.as_u32 = ip4->src_address.as_u32;
146   clib_memset (&daddr, 0, sizeof (daddr));
147   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
148
149   ste =
150     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
151                             fib_index, 0);
152   if (ste)
153     {
154       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
155       if (!bibe)
156         return -1;
157     }
158   else
159     {
160       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, fib_index, 0);
161
162       if (!bibe)
163         return -1;
164
165       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
166       ste =
167         nat64_db_st_entry_create (ctx->thread_index, db, bibe, &ip6_saddr,
168                                   &saddr.ip4, sport);
169
170       if (!ste)
171         return -1;
172
173       vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
174                                db->st.st_entries_num);
175     }
176
177   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
178   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
179
180   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
181   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
182   udp->dst_port = bibe->in_port;
183
184   if (proto == IP_PROTOCOL_UDP)
185     checksum = &udp->checksum;
186   else
187     {
188       checksum = &tcp->checksum;
189       nat64_tcp_session_set_state (ste, tcp, 0);
190     }
191
192   csum = ip_csum_sub_even (*checksum, dport);
193   csum = ip_csum_add_even (csum, udp->dst_port);
194   *checksum = ip_csum_fold (csum);
195
196   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
197
198   nat64_session_reset_timeout (ste, ctx->vm);
199
200   return 0;
201 }
202
203 static int
204 nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
205 {
206   nat64_main_t *nm = &nat64_main;
207   nat64_out2in_set_ctx_t *ctx = arg;
208   nat64_db_bib_entry_t *bibe;
209   nat64_db_st_entry_t *ste;
210   ip46_address_t saddr, daddr;
211   ip6_address_t ip6_saddr;
212   u32 sw_if_index, fib_index;
213   icmp46_header_t *icmp = ip4_next_header (ip4);
214   nat64_db_t *db = &nm->db[ctx->thread_index];
215
216   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
217   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
218
219   clib_memset (&saddr, 0, sizeof (saddr));
220   saddr.ip4.as_u32 = ip4->src_address.as_u32;
221   clib_memset (&daddr, 0, sizeof (daddr));
222   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
223
224   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
225     {
226       u16 out_id = ((u16 *) (icmp))[2];
227       ste =
228         nat64_db_st_entry_find (db, &daddr, &saddr, out_id, 0,
229                                 IP_PROTOCOL_ICMP, fib_index, 0);
230
231       if (ste)
232         {
233           bibe =
234             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
235                                          ste->bibe_index);
236           if (!bibe)
237             return -1;
238         }
239       else
240         {
241           bibe =
242             nat64_db_bib_entry_find (db, &daddr, out_id,
243                                      IP_PROTOCOL_ICMP, fib_index, 0);
244           if (!bibe)
245             return -1;
246
247           nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
248           ste =
249             nat64_db_st_entry_create (ctx->thread_index, db,
250                                       bibe, &ip6_saddr, &saddr.ip4, 0);
251
252           if (!ste)
253             return -1;
254
255           vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
256                                    db->st.st_entries_num);
257         }
258
259       nat64_session_reset_timeout (ste, ctx->vm);
260
261       ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
262       ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
263
264       ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
265       ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
266       ((u16 *) (icmp))[2] = bibe->in_port;
267
268       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
269     }
270   else
271     {
272       ip6_header_t *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
273
274       nat64_compose_ip6 (&ip6->src_address, &ip4->src_address,
275                          vnet_buffer (ctx->b)->sw_if_index[VLIB_TX]);
276       ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
277       ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
278     }
279
280   return 0;
281 }
282
283 static int
284 nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
285                                 void *arg)
286 {
287   nat64_main_t *nm = &nat64_main;
288   nat64_out2in_set_ctx_t *ctx = arg;
289   nat64_db_bib_entry_t *bibe;
290   nat64_db_st_entry_t *ste;
291   ip46_address_t saddr, daddr;
292   u32 sw_if_index, fib_index;
293   u8 proto = ip4->protocol;
294   nat64_db_t *db = &nm->db[ctx->thread_index];
295
296   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
297   fib_index =
298     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
299
300   clib_memset (&saddr, 0, sizeof (saddr));
301   saddr.ip4.as_u32 = ip4->src_address.as_u32;
302   clib_memset (&daddr, 0, sizeof (daddr));
303   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
304
305   if (proto == IP_PROTOCOL_ICMP6)
306     {
307       icmp46_header_t *icmp = ip4_next_header (ip4);
308       u16 out_id = ((u16 *) (icmp))[2];
309       proto = IP_PROTOCOL_ICMP;
310
311       if (!
312           (icmp->type == ICMP6_echo_request
313            || icmp->type == ICMP6_echo_reply))
314         return -1;
315
316       ste =
317         nat64_db_st_entry_find (db, &saddr, &daddr, out_id, 0, proto,
318                                 fib_index, 0);
319       if (!ste)
320         return -1;
321
322       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
323       if (!bibe)
324         return -1;
325
326       ip6->dst_address.as_u64[0] = ste->in_r_addr.as_u64[0];
327       ip6->dst_address.as_u64[1] = ste->in_r_addr.as_u64[1];
328       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
329       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
330       ((u16 *) (icmp))[2] = bibe->in_port;
331
332       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
333     }
334   else
335     {
336       udp_header_t *udp = ip4_next_header (ip4);
337       tcp_header_t *tcp = ip4_next_header (ip4);
338       u16 dport = udp->dst_port;
339       u16 sport = udp->src_port;
340       u16 *checksum;
341       ip_csum_t csum;
342
343       ste =
344         nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
345                                 fib_index, 0);
346       if (!ste)
347         return -1;
348
349       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
350       if (!bibe)
351         return -1;
352
353       nat64_compose_ip6 (&ip6->dst_address, &daddr.ip4, bibe->fib_index);
354       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
355       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
356       udp->src_port = bibe->in_port;
357
358       if (proto == IP_PROTOCOL_UDP)
359         checksum = &udp->checksum;
360       else
361         checksum = &tcp->checksum;
362       if (*checksum)
363         {
364           csum = ip_csum_sub_even (*checksum, sport);
365           csum = ip_csum_add_even (csum, udp->src_port);
366           *checksum = ip_csum_fold (csum);
367         }
368
369       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
370     }
371
372   return 0;
373 }
374
375 static int
376 nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
377                                void *arg)
378 {
379   nat64_main_t *nm = &nat64_main;
380   nat64_out2in_set_ctx_t *ctx = arg;
381   nat64_db_bib_entry_t *bibe;
382   nat64_db_st_entry_t *ste;
383   ip46_address_t saddr, daddr;
384   ip6_address_t ip6_saddr;
385   u32 sw_if_index, fib_index;
386   u8 proto = ip4->protocol;
387   nat64_db_t *db = &nm->db[ctx->thread_index];
388
389   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
390   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
391
392   clib_memset (&saddr, 0, sizeof (saddr));
393   saddr.ip4.as_u32 = ip4->src_address.as_u32;
394   clib_memset (&daddr, 0, sizeof (daddr));
395   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
396
397   ste =
398     nat64_db_st_entry_find (db, &daddr, &saddr, 0, 0, proto, fib_index, 0);
399   if (ste)
400     {
401       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
402       if (!bibe)
403         return -1;
404     }
405   else
406     {
407       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, fib_index, 0);
408
409       if (!bibe)
410         return -1;
411
412       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
413       ste = nat64_db_st_entry_create (ctx->thread_index, db,
414                                       bibe, &ip6_saddr, &saddr.ip4, 0);
415
416       if (!ste)
417         return -1;
418
419       vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
420                                db->st.st_entries_num);
421     }
422
423   nat64_session_reset_timeout (ste, ctx->vm);
424
425   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
426   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
427
428   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
429   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
430
431   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
432
433   return 0;
434 }
435
436 static uword
437 nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
438                       vlib_frame_t * frame)
439 {
440   u32 n_left_from, *from, *to_next;
441   nat64_out2in_next_t next_index;
442   u32 pkts_processed = 0;
443   u32 thread_index = vm->thread_index;
444   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
445     0, fragments = 0;
446
447   from = vlib_frame_vector_args (frame);
448   n_left_from = frame->n_vectors;
449   next_index = node->cached_next_index;
450   while (n_left_from > 0)
451     {
452       u32 n_left_to_next;
453
454       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
455
456       while (n_left_from > 0 && n_left_to_next > 0)
457         {
458           u32 bi0;
459           vlib_buffer_t *b0;
460           u32 next0;
461           ip4_header_t *ip40;
462           u32 proto0;
463           nat64_out2in_set_ctx_t ctx0;
464           udp_header_t *udp0;
465
466           /* speculatively enqueue b0 to the current next frame */
467           bi0 = from[0];
468           to_next[0] = bi0;
469           from += 1;
470           to_next += 1;
471           n_left_from -= 1;
472           n_left_to_next -= 1;
473
474           b0 = vlib_get_buffer (vm, bi0);
475           ip40 = vlib_buffer_get_current (b0);
476
477           ctx0.b = b0;
478           ctx0.vm = vm;
479           ctx0.thread_index = thread_index;
480
481           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
482
483           proto0 = ip_proto_to_snat_proto (ip40->protocol);
484
485           if (PREDICT_FALSE (proto0 == ~0))
486             {
487               if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
488                 {
489                   next0 = NAT64_OUT2IN_NEXT_DROP;
490                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
491                 }
492               other_packets++;
493               goto trace0;
494             }
495
496           if (PREDICT_FALSE (ip4_is_fragment (ip40)))
497             {
498               next0 = NAT64_OUT2IN_NEXT_REASS;
499               fragments++;
500               goto trace0;
501             }
502
503           if (proto0 == SNAT_PROTOCOL_ICMP)
504             {
505               icmp_packets++;
506               if (icmp_to_icmp6
507                   (b0, nat64_out2in_icmp_set_cb, &ctx0,
508                    nat64_out2in_inner_icmp_set_cb, &ctx0))
509                 {
510                   next0 = NAT64_OUT2IN_NEXT_DROP;
511                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
512                   goto trace0;
513                 }
514             }
515           else
516             {
517               if (proto0 == SNAT_PROTOCOL_TCP)
518                 tcp_packets++;
519               else
520                 udp_packets++;
521
522               if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
523                 {
524                   udp0 = ip4_next_header (ip40);
525                   /*
526                    * Send DHCP packets to the ipv4 stack, or we won't
527                    * be able to use dhcp client on the outside interface
528                    */
529                   if ((proto0 == SNAT_PROTOCOL_UDP)
530                       && (udp0->dst_port ==
531                           clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))
532                     {
533                       next0 = NAT64_OUT2IN_NEXT_IP4_LOOKUP;
534                       goto trace0;
535                     }
536                   next0 = NAT64_OUT2IN_NEXT_DROP;
537                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
538                   goto trace0;
539                 }
540             }
541
542         trace0:
543           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
544                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
545             {
546               nat64_out2in_trace_t *t =
547                 vlib_add_trace (vm, node, b0, sizeof (*t));
548               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
549               t->next_index = next0;
550             }
551
552           pkts_processed += next0 == NAT64_OUT2IN_NEXT_IP6_LOOKUP;
553
554           /* verify speculative enqueue, maybe switch current next frame */
555           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
556                                            n_left_to_next, bi0, next0);
557         }
558       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
559     }
560   vlib_node_increment_counter (vm, nat64_out2in_node.index,
561                                NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
562                                pkts_processed);
563   vlib_node_increment_counter (vm, nat64_out2in_node.index,
564                                NAT64_OUT2IN_ERROR_TCP_PACKETS, tcp_packets);
565   vlib_node_increment_counter (vm, nat64_out2in_node.index,
566                                NAT64_OUT2IN_ERROR_UDP_PACKETS, tcp_packets);
567   vlib_node_increment_counter (vm, nat64_out2in_node.index,
568                                NAT64_OUT2IN_ERROR_ICMP_PACKETS, icmp_packets);
569   vlib_node_increment_counter (vm, nat64_out2in_node.index,
570                                NAT64_OUT2IN_ERROR_OTHER_PACKETS,
571                                other_packets);
572   vlib_node_increment_counter (vm, nat64_out2in_node.index,
573                                NAT64_OUT2IN_ERROR_FRAGMENTS, fragments);
574
575   return frame->n_vectors;
576 }
577
578 /* *INDENT-OFF* */
579 VLIB_REGISTER_NODE (nat64_out2in_node) = {
580   .function = nat64_out2in_node_fn,
581   .name = "nat64-out2in",
582   .vector_size = sizeof (u32),
583   .format_trace = format_nat64_out2in_trace,
584   .type = VLIB_NODE_TYPE_INTERNAL,
585   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
586   .error_strings = nat64_out2in_error_strings,
587   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
588   /* edit / add dispositions here */
589   .next_nodes = {
590     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
591     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
592     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
593     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
594   },
595 };
596 /* *INDENT-ON* */
597
598 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
599
600 typedef struct nat64_out2in_frag_set_ctx_t_
601 {
602   vlib_main_t *vm;
603   vlib_buffer_t *b;
604   u32 sess_index;
605   u32 thread_index;
606   u8 proto;
607   u8 first_frag;
608 } nat64_out2in_frag_set_ctx_t;
609
610 static int
611 nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
612 {
613   nat64_main_t *nm = &nat64_main;
614   nat64_out2in_frag_set_ctx_t *ctx = arg;
615   nat64_db_st_entry_t *ste;
616   nat64_db_bib_entry_t *bibe;
617   udp_header_t *udp = ip4_next_header (ip4);
618   ip_csum_t csum;
619   u16 *checksum;
620   nat64_db_t *db = &nm->db[ctx->thread_index];
621
622   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
623   if (!ste)
624     return -1;
625
626   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
627   if (!bibe)
628     return -1;
629
630   if (ctx->first_frag)
631     {
632       udp->dst_port = bibe->in_port;
633
634       if (ip4->protocol == IP_PROTOCOL_UDP)
635         {
636           checksum = &udp->checksum;
637
638           if (!checksum)
639             {
640               u16 udp_len =
641                 clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
642               csum = ip_incremental_checksum (0, udp, udp_len);
643               csum =
644                 ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
645               csum =
646                 ip_csum_with_carry (csum,
647                                     clib_host_to_net_u16 (IP_PROTOCOL_UDP));
648               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
649               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
650               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
651               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
652               *checksum = ~ip_csum_fold (csum);
653             }
654           else
655             {
656               csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
657               csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
658               csum = ip_csum_sub_even (csum, bibe->out_port);
659               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
660               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
661               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
662               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
663               csum = ip_csum_add_even (csum, bibe->in_port);
664               *checksum = ip_csum_fold (csum);
665             }
666         }
667       else
668         {
669           tcp_header_t *tcp = ip4_next_header (ip4);
670           nat64_tcp_session_set_state (ste, tcp, 0);
671           checksum = &tcp->checksum;
672           csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
673           csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
674           csum = ip_csum_sub_even (csum, bibe->out_port);
675           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
676           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
677           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
678           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
679           csum = ip_csum_add_even (csum, bibe->in_port);
680           *checksum = ip_csum_fold (csum);
681         }
682
683     }
684
685   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
686   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
687
688   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
689   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
690
691   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
692
693   nat64_session_reset_timeout (ste, ctx->vm);
694
695   return 0;
696 }
697
698 static uword
699 nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
700                             vlib_frame_t * frame)
701 {
702   u32 n_left_from, *from, *to_next;
703   nat64_out2in_next_t next_index;
704   u32 pkts_processed = 0, cached_fragments = 0;
705   u32 *fragments_to_drop = 0;
706   u32 *fragments_to_loopback = 0;
707   nat64_main_t *nm = &nat64_main;
708   u32 thread_index = vm->thread_index;
709
710   from = vlib_frame_vector_args (frame);
711   n_left_from = frame->n_vectors;
712   next_index = node->cached_next_index;
713
714   while (n_left_from > 0)
715     {
716       u32 n_left_to_next;
717
718       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
719
720       while (n_left_from > 0 && n_left_to_next > 0)
721         {
722           u32 bi0;
723           vlib_buffer_t *b0;
724           u32 next0;
725           ip4_header_t *ip40;
726           u8 cached0 = 0;
727           u32 sw_if_index0, fib_index0;
728           udp_header_t *udp0;
729           nat_reass_ip4_t *reass0;
730           ip46_address_t saddr0, daddr0;
731           nat64_db_st_entry_t *ste0;
732           nat64_db_bib_entry_t *bibe0;
733           ip6_address_t ip6_saddr0;
734           nat64_out2in_frag_set_ctx_t ctx0;
735           nat64_db_t *db = &nm->db[thread_index];
736
737           /* speculatively enqueue b0 to the current next frame */
738           bi0 = from[0];
739           to_next[0] = bi0;
740           from += 1;
741           to_next += 1;
742           n_left_from -= 1;
743           n_left_to_next -= 1;
744
745           b0 = vlib_get_buffer (vm, bi0);
746           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
747
748           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
749           fib_index0 =
750             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
751                                                  sw_if_index0);
752
753           ctx0.thread_index = thread_index;
754
755           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
756             {
757               next0 = NAT64_OUT2IN_NEXT_DROP;
758               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
759               goto trace0;
760             }
761
762           ip40 = vlib_buffer_get_current (b0);
763
764           if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
765                                || ip40->protocol == IP_PROTOCOL_UDP)))
766             {
767               next0 = NAT64_OUT2IN_NEXT_DROP;
768               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
769               goto trace0;
770             }
771
772           udp0 = ip4_next_header (ip40);
773
774           reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
775                                                  ip40->dst_address,
776                                                  ip40->fragment_id,
777                                                  ip40->protocol,
778                                                  1, &fragments_to_drop);
779
780           if (PREDICT_FALSE (!reass0))
781             {
782               next0 = NAT64_OUT2IN_NEXT_DROP;
783               b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
784               goto trace0;
785             }
786
787           if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
788             {
789               ctx0.first_frag = 1;
790
791               clib_memset (&saddr0, 0, sizeof (saddr0));
792               saddr0.ip4.as_u32 = ip40->src_address.as_u32;
793               clib_memset (&daddr0, 0, sizeof (daddr0));
794               daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
795
796               ste0 =
797                 nat64_db_st_entry_find (db, &daddr0, &saddr0,
798                                         udp0->dst_port, udp0->src_port,
799                                         ip40->protocol, fib_index0, 0);
800               if (!ste0)
801                 {
802                   bibe0 =
803                     nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port,
804                                              ip40->protocol, fib_index0, 0);
805                   if (!bibe0)
806                     {
807                       next0 = NAT64_OUT2IN_NEXT_DROP;
808                       b0->error =
809                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
810                       goto trace0;
811                     }
812
813                   nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
814                                      bibe0->fib_index);
815                   ste0 =
816                     nat64_db_st_entry_create (thread_index,
817                                               db, bibe0, &ip6_saddr0,
818                                               &saddr0.ip4, udp0->src_port);
819
820                   if (!ste0)
821                     {
822                       next0 = NAT64_OUT2IN_NEXT_DROP;
823                       b0->error =
824                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
825                       goto trace0;
826                     }
827
828                   vlib_set_simple_counter (&nm->total_sessions, thread_index,
829                                            0, db->st.st_entries_num);
830                 }
831               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
832               reass0->thread_index = thread_index;
833
834               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
835             }
836           else
837             {
838               ctx0.first_frag = 0;
839
840               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
841                 {
842                   if (nat_ip4_reass_add_fragment
843                       (thread_index, reass0, bi0, &fragments_to_drop))
844                     {
845                       b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
846                       next0 = NAT64_OUT2IN_NEXT_DROP;
847                       goto trace0;
848                     }
849                   cached0 = 1;
850                   goto trace0;
851                 }
852             }
853
854           ctx0.sess_index = reass0->sess_index;
855           ctx0.proto = ip40->protocol;
856           ctx0.vm = vm;
857           ctx0.b = b0;
858
859           if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
860             {
861               next0 = NAT64_OUT2IN_NEXT_DROP;
862               b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
863               goto trace0;
864             }
865
866         trace0:
867           if (PREDICT_FALSE
868               ((node->flags & VLIB_NODE_FLAG_TRACE)
869                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
870             {
871               nat64_out2in_reass_trace_t *t =
872                 vlib_add_trace (vm, node, b0, sizeof (*t));
873               t->cached = cached0;
874               t->sw_if_index = sw_if_index0;
875               t->next_index = next0;
876             }
877
878           if (cached0)
879             {
880               n_left_to_next++;
881               to_next--;
882               cached_fragments++;
883             }
884           else
885             {
886               pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
887
888               /* verify speculative enqueue, maybe switch current next frame */
889               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
890                                                to_next, n_left_to_next,
891                                                bi0, next0);
892             }
893
894           if (n_left_from == 0 && vec_len (fragments_to_loopback))
895             {
896               from = vlib_frame_vector_args (frame);
897               u32 len = vec_len (fragments_to_loopback);
898               if (len <= VLIB_FRAME_SIZE)
899                 {
900                   clib_memcpy_fast (from, fragments_to_loopback,
901                                     sizeof (u32) * len);
902                   n_left_from = len;
903                   vec_reset_length (fragments_to_loopback);
904                 }
905               else
906                 {
907                   clib_memcpy_fast (from, fragments_to_loopback +
908                                     (len - VLIB_FRAME_SIZE),
909                                     sizeof (u32) * VLIB_FRAME_SIZE);
910                   n_left_from = VLIB_FRAME_SIZE;
911                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
912                 }
913             }
914         }
915
916       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
917     }
918
919   vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
920                                NAT64_OUT2IN_ERROR_PROCESSED_FRAGMENTS,
921                                pkts_processed);
922   vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
923                                NAT64_OUT2IN_ERROR_CACHED_FRAGMENTS,
924                                cached_fragments);
925
926   nat_send_all_to_node (vm, fragments_to_drop, node,
927                         &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
928                         NAT64_OUT2IN_NEXT_DROP);
929
930   vec_free (fragments_to_drop);
931   vec_free (fragments_to_loopback);
932   return frame->n_vectors;
933 }
934
935 /* *INDENT-OFF* */
936 VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
937   .function = nat64_out2in_reass_node_fn,
938   .name = "nat64-out2in-reass",
939   .vector_size = sizeof (u32),
940   .format_trace = format_nat64_out2in_reass_trace,
941   .type = VLIB_NODE_TYPE_INTERNAL,
942   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
943   .error_strings = nat64_out2in_error_strings,
944   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
945   /* edit / add dispositions here */
946   .next_nodes = {
947     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
948     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
949     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
950     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
951   },
952 };
953 /* *INDENT-ON* */
954
955 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_reass_node,
956                               nat64_out2in_reass_node_fn);
957
958 #define foreach_nat64_out2in_handoff_error                       \
959 _(CONGESTION_DROP, "congestion drop")                            \
960 _(SAME_WORKER, "same worker")                                    \
961 _(DO_HANDOFF, "do handoff")
962
963 typedef enum
964 {
965 #define _(sym,str) NAT64_OUT2IN_HANDOFF_ERROR_##sym,
966   foreach_nat64_out2in_handoff_error
967 #undef _
968     NAT64_OUT2IN_HANDOFF_N_ERROR,
969 } nat64_out2in_handoff_error_t;
970
971 static char *nat64_out2in_handoff_error_strings[] = {
972 #define _(sym,string) string,
973   foreach_nat64_out2in_handoff_error
974 #undef _
975 };
976
977 typedef struct
978 {
979   u32 next_worker_index;
980 } nat64_out2in_handoff_trace_t;
981
982 static u8 *
983 format_nat64_out2in_handoff_trace (u8 * s, va_list * args)
984 {
985   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
986   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
987   nat64_out2in_handoff_trace_t *t =
988     va_arg (*args, nat64_out2in_handoff_trace_t *);
989
990   s =
991     format (s, "NAT64-OUT2IN-HANDOFF: next-worker %d", t->next_worker_index);
992
993   return s;
994 }
995
996 static inline uword
997 nat64_out2in_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
998                               vlib_frame_t * frame)
999 {
1000   nat64_main_t *nm = &nat64_main;
1001   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1002   u32 n_enq, n_left_from, *from;
1003   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1004   u32 fq_index;
1005   u32 thread_index = vm->thread_index;
1006   u32 do_handoff = 0, same_worker = 0;
1007
1008   from = vlib_frame_vector_args (frame);
1009   n_left_from = frame->n_vectors;
1010   vlib_get_buffers (vm, from, bufs, n_left_from);
1011
1012   b = bufs;
1013   ti = thread_indices;
1014
1015   fq_index = nm->fq_out2in_index;
1016
1017   while (n_left_from > 0)
1018     {
1019       ip4_header_t *ip0;
1020
1021       ip0 = vlib_buffer_get_current (b[0]);
1022       ti[0] = nat64_get_worker_out2in (ip0);
1023
1024       if (ti[0] != thread_index)
1025         do_handoff++;
1026       else
1027         same_worker++;
1028
1029       if (PREDICT_FALSE
1030           ((node->flags & VLIB_NODE_FLAG_TRACE)
1031            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1032         {
1033           nat64_out2in_handoff_trace_t *t =
1034             vlib_add_trace (vm, node, b[0], sizeof (*t));
1035           t->next_worker_index = ti[0];
1036         }
1037
1038       n_left_from -= 1;
1039       ti += 1;
1040       b += 1;
1041     }
1042
1043   n_enq =
1044     vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1045                                    frame->n_vectors, 1);
1046
1047   if (n_enq < frame->n_vectors)
1048     vlib_node_increment_counter (vm, node->node_index,
1049                                  NAT64_OUT2IN_HANDOFF_ERROR_CONGESTION_DROP,
1050                                  frame->n_vectors - n_enq);
1051   vlib_node_increment_counter (vm, node->node_index,
1052                                NAT64_OUT2IN_HANDOFF_ERROR_SAME_WORKER,
1053                                same_worker);
1054   vlib_node_increment_counter (vm, node->node_index,
1055                                NAT64_OUT2IN_HANDOFF_ERROR_DO_HANDOFF,
1056                                do_handoff);
1057
1058   return frame->n_vectors;
1059 }
1060
1061 /* *INDENT-OFF* */
1062 VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
1063   .function = nat64_out2in_handoff_node_fn,
1064   .name = "nat64-out2in-handoff",
1065   .vector_size = sizeof (u32),
1066   .format_trace = format_nat64_out2in_handoff_trace,
1067   .type = VLIB_NODE_TYPE_INTERNAL,
1068   .n_errors = ARRAY_LEN(nat64_out2in_handoff_error_strings),
1069   .error_strings = nat64_out2in_handoff_error_strings,
1070
1071   .n_next_nodes = 1,
1072
1073   .next_nodes = {
1074     [0] = "error-drop",
1075   },
1076 };
1077 /* *INDENT-ON* */
1078
1079 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_handoff_node,
1080                               nat64_out2in_handoff_node_fn);
1081 /*
1082  * fd.io coding-style-patch-verification: ON
1083  *
1084  * Local Variables:
1085  * eval: (c-set-style "gnu")
1086  * End:
1087  */