NAT: total users and sessions gauges (VPP-1484)
[vpp.git] / src / plugins / nat / nat64_out2in.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 IPv4 to IPv6 translation (otside to inside network)
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat_reass.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/ip/ip4_to_ip6.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vnet/udp/udp.h>
26
27 typedef struct
28 {
29   u32 sw_if_index;
30   u32 next_index;
31 } nat64_out2in_trace_t;
32
33 static u8 *
34 format_nat64_out2in_trace (u8 * s, va_list * args)
35 {
36   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
37   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
38   nat64_out2in_trace_t *t = va_arg (*args, nat64_out2in_trace_t *);
39
40   s =
41     format (s, "NAT64-out2in: sw_if_index %d, next index %d", t->sw_if_index,
42             t->next_index);
43
44   return s;
45 }
46
47 typedef struct
48 {
49   u32 sw_if_index;
50   u32 next_index;
51   u8 cached;
52 } nat64_out2in_reass_trace_t;
53
54 static u8 *
55 format_nat64_out2in_reass_trace (u8 * s, va_list * args)
56 {
57   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
58   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
59   nat64_out2in_reass_trace_t *t =
60     va_arg (*args, nat64_out2in_reass_trace_t *);
61
62   s =
63     format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
64             t->sw_if_index, t->next_index,
65             t->cached ? "cached" : "translated");
66
67   return s;
68 }
69
70 vlib_node_registration_t nat64_out2in_node;
71 vlib_node_registration_t nat64_out2in_reass_node;
72 vlib_node_registration_t nat64_out2in_handoff_node;
73
74 #define foreach_nat64_out2in_error                       \
75 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")          \
76 _(OUT2IN_PACKETS, "good out2in packets processed")       \
77 _(NO_TRANSLATION, "no translation")                      \
78 _(UNKNOWN, "unknown")                                    \
79 _(DROP_FRAGMENT, "drop fragment")                        \
80 _(MAX_REASS, "maximum reassemblies exceeded")            \
81 _(MAX_FRAG, "maximum fragments per reassembly exceeded") \
82 _(TCP_PACKETS, "TCP packets")                            \
83 _(UDP_PACKETS, "UDP packets")                            \
84 _(ICMP_PACKETS, "ICMP packets")                          \
85 _(OTHER_PACKETS, "other protocol packets")               \
86 _(FRAGMENTS, "fragments")                                \
87 _(CACHED_FRAGMENTS, "cached fragments")                  \
88 _(PROCESSED_FRAGMENTS, "processed fragments")
89
90
91 typedef enum
92 {
93 #define _(sym,str) NAT64_OUT2IN_ERROR_##sym,
94   foreach_nat64_out2in_error
95 #undef _
96     NAT64_OUT2IN_N_ERROR,
97 } nat64_out2in_error_t;
98
99 static char *nat64_out2in_error_strings[] = {
100 #define _(sym,string) string,
101   foreach_nat64_out2in_error
102 #undef _
103 };
104
105 typedef enum
106 {
107   NAT64_OUT2IN_NEXT_IP6_LOOKUP,
108   NAT64_OUT2IN_NEXT_IP4_LOOKUP,
109   NAT64_OUT2IN_NEXT_DROP,
110   NAT64_OUT2IN_NEXT_REASS,
111   NAT64_OUT2IN_N_NEXT,
112 } nat64_out2in_next_t;
113
114 typedef struct nat64_out2in_set_ctx_t_
115 {
116   vlib_buffer_t *b;
117   vlib_main_t *vm;
118   u32 thread_index;
119 } nat64_out2in_set_ctx_t;
120
121 static int
122 nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
123                              void *arg)
124 {
125   nat64_main_t *nm = &nat64_main;
126   nat64_out2in_set_ctx_t *ctx = arg;
127   nat64_db_bib_entry_t *bibe;
128   nat64_db_st_entry_t *ste;
129   ip46_address_t saddr, daddr;
130   ip6_address_t ip6_saddr;
131   udp_header_t *udp = ip4_next_header (ip4);
132   tcp_header_t *tcp = ip4_next_header (ip4);
133   u8 proto = ip4->protocol;
134   u16 dport = udp->dst_port;
135   u16 sport = udp->src_port;
136   u32 sw_if_index, fib_index;
137   u16 *checksum;
138   ip_csum_t csum;
139   nat64_db_t *db = &nm->db[ctx->thread_index];
140
141   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
142   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
143
144   clib_memset (&saddr, 0, sizeof (saddr));
145   saddr.ip4.as_u32 = ip4->src_address.as_u32;
146   clib_memset (&daddr, 0, sizeof (daddr));
147   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
148
149   ste =
150     nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto,
151                             fib_index, 0);
152   if (ste)
153     {
154       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
155       if (!bibe)
156         return -1;
157     }
158   else
159     {
160       bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, fib_index, 0);
161
162       if (!bibe)
163         return -1;
164
165       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
166       ste =
167         nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, sport);
168
169       if (!ste)
170         return -1;
171
172       vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
173                                db->st.st_entries_num);
174     }
175
176   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
177   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
178
179   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
180   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
181   udp->dst_port = bibe->in_port;
182
183   if (proto == IP_PROTOCOL_UDP)
184     checksum = &udp->checksum;
185   else
186     {
187       checksum = &tcp->checksum;
188       nat64_tcp_session_set_state (ste, tcp, 0);
189     }
190
191   csum = ip_csum_sub_even (*checksum, dport);
192   csum = ip_csum_add_even (csum, udp->dst_port);
193   *checksum = ip_csum_fold (csum);
194
195   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
196
197   nat64_session_reset_timeout (ste, ctx->vm);
198
199   return 0;
200 }
201
202 static int
203 nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
204 {
205   nat64_main_t *nm = &nat64_main;
206   nat64_out2in_set_ctx_t *ctx = arg;
207   nat64_db_bib_entry_t *bibe;
208   nat64_db_st_entry_t *ste;
209   ip46_address_t saddr, daddr;
210   ip6_address_t ip6_saddr;
211   u32 sw_if_index, fib_index;
212   icmp46_header_t *icmp = ip4_next_header (ip4);
213   nat64_db_t *db = &nm->db[ctx->thread_index];
214
215   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
216   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
217
218   clib_memset (&saddr, 0, sizeof (saddr));
219   saddr.ip4.as_u32 = ip4->src_address.as_u32;
220   clib_memset (&daddr, 0, sizeof (daddr));
221   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
222
223   if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
224     {
225       u16 out_id = ((u16 *) (icmp))[2];
226       ste =
227         nat64_db_st_entry_find (db, &daddr, &saddr, out_id, 0,
228                                 IP_PROTOCOL_ICMP, fib_index, 0);
229
230       if (ste)
231         {
232           bibe =
233             nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP,
234                                          ste->bibe_index);
235           if (!bibe)
236             return -1;
237         }
238       else
239         {
240           bibe =
241             nat64_db_bib_entry_find (db, &daddr, out_id,
242                                      IP_PROTOCOL_ICMP, fib_index, 0);
243           if (!bibe)
244             return -1;
245
246           nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
247           ste =
248             nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
249
250           if (!ste)
251             return -1;
252
253           vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
254                                    db->st.st_entries_num);
255         }
256
257       nat64_session_reset_timeout (ste, ctx->vm);
258
259       ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
260       ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
261
262       ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
263       ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
264       ((u16 *) (icmp))[2] = bibe->in_port;
265
266       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
267     }
268   else
269     {
270       ip6_header_t *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
271
272       nat64_compose_ip6 (&ip6->src_address, &ip4->src_address,
273                          vnet_buffer (ctx->b)->sw_if_index[VLIB_TX]);
274       ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0];
275       ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1];
276     }
277
278   return 0;
279 }
280
281 static int
282 nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
283                                 void *arg)
284 {
285   nat64_main_t *nm = &nat64_main;
286   nat64_out2in_set_ctx_t *ctx = arg;
287   nat64_db_bib_entry_t *bibe;
288   nat64_db_st_entry_t *ste;
289   ip46_address_t saddr, daddr;
290   u32 sw_if_index, fib_index;
291   u8 proto = ip4->protocol;
292   nat64_db_t *db = &nm->db[ctx->thread_index];
293
294   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
295   fib_index =
296     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
297
298   clib_memset (&saddr, 0, sizeof (saddr));
299   saddr.ip4.as_u32 = ip4->src_address.as_u32;
300   clib_memset (&daddr, 0, sizeof (daddr));
301   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
302
303   if (proto == IP_PROTOCOL_ICMP6)
304     {
305       icmp46_header_t *icmp = ip4_next_header (ip4);
306       u16 out_id = ((u16 *) (icmp))[2];
307       proto = IP_PROTOCOL_ICMP;
308
309       if (!
310           (icmp->type == ICMP6_echo_request
311            || icmp->type == ICMP6_echo_reply))
312         return -1;
313
314       ste =
315         nat64_db_st_entry_find (db, &saddr, &daddr, out_id, 0, proto,
316                                 fib_index, 0);
317       if (!ste)
318         return -1;
319
320       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
321       if (!bibe)
322         return -1;
323
324       ip6->dst_address.as_u64[0] = ste->in_r_addr.as_u64[0];
325       ip6->dst_address.as_u64[1] = ste->in_r_addr.as_u64[1];
326       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
327       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
328       ((u16 *) (icmp))[2] = bibe->in_port;
329
330       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
331     }
332   else
333     {
334       udp_header_t *udp = ip4_next_header (ip4);
335       tcp_header_t *tcp = ip4_next_header (ip4);
336       u16 dport = udp->dst_port;
337       u16 sport = udp->src_port;
338       u16 *checksum;
339       ip_csum_t csum;
340
341       ste =
342         nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
343                                 fib_index, 0);
344       if (!ste)
345         return -1;
346
347       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
348       if (!bibe)
349         return -1;
350
351       nat64_compose_ip6 (&ip6->dst_address, &daddr.ip4, bibe->fib_index);
352       ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0];
353       ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1];
354       udp->src_port = bibe->in_port;
355
356       if (proto == IP_PROTOCOL_UDP)
357         checksum = &udp->checksum;
358       else
359         checksum = &tcp->checksum;
360       if (*checksum)
361         {
362           csum = ip_csum_sub_even (*checksum, sport);
363           csum = ip_csum_add_even (csum, udp->src_port);
364           *checksum = ip_csum_fold (csum);
365         }
366
367       vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
368     }
369
370   return 0;
371 }
372
373 static int
374 nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
375                                void *arg)
376 {
377   nat64_main_t *nm = &nat64_main;
378   nat64_out2in_set_ctx_t *ctx = arg;
379   nat64_db_bib_entry_t *bibe;
380   nat64_db_st_entry_t *ste;
381   ip46_address_t saddr, daddr;
382   ip6_address_t ip6_saddr;
383   u32 sw_if_index, fib_index;
384   u8 proto = ip4->protocol;
385   nat64_db_t *db = &nm->db[ctx->thread_index];
386
387   sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
388   fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
389
390   clib_memset (&saddr, 0, sizeof (saddr));
391   saddr.ip4.as_u32 = ip4->src_address.as_u32;
392   clib_memset (&daddr, 0, sizeof (daddr));
393   daddr.ip4.as_u32 = ip4->dst_address.as_u32;
394
395   ste =
396     nat64_db_st_entry_find (db, &daddr, &saddr, 0, 0, proto, fib_index, 0);
397   if (ste)
398     {
399       bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index);
400       if (!bibe)
401         return -1;
402     }
403   else
404     {
405       bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, fib_index, 0);
406
407       if (!bibe)
408         return -1;
409
410       nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
411       ste = nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
412
413       if (!ste)
414         return -1;
415
416       vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
417                                db->st.st_entries_num);
418     }
419
420   nat64_session_reset_timeout (ste, ctx->vm);
421
422   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
423   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
424
425   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
426   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
427
428   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
429
430   return 0;
431 }
432
433 static uword
434 nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
435                       vlib_frame_t * frame)
436 {
437   u32 n_left_from, *from, *to_next;
438   nat64_out2in_next_t next_index;
439   u32 pkts_processed = 0;
440   u32 thread_index = vm->thread_index;
441   u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
442     0, fragments = 0;
443
444   from = vlib_frame_vector_args (frame);
445   n_left_from = frame->n_vectors;
446   next_index = node->cached_next_index;
447   while (n_left_from > 0)
448     {
449       u32 n_left_to_next;
450
451       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
452
453       while (n_left_from > 0 && n_left_to_next > 0)
454         {
455           u32 bi0;
456           vlib_buffer_t *b0;
457           u32 next0;
458           ip4_header_t *ip40;
459           u32 proto0;
460           nat64_out2in_set_ctx_t ctx0;
461           udp_header_t *udp0;
462
463           /* speculatively enqueue b0 to the current next frame */
464           bi0 = from[0];
465           to_next[0] = bi0;
466           from += 1;
467           to_next += 1;
468           n_left_from -= 1;
469           n_left_to_next -= 1;
470
471           b0 = vlib_get_buffer (vm, bi0);
472           ip40 = vlib_buffer_get_current (b0);
473
474           ctx0.b = b0;
475           ctx0.vm = vm;
476           ctx0.thread_index = thread_index;
477
478           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
479
480           proto0 = ip_proto_to_snat_proto (ip40->protocol);
481
482           if (PREDICT_FALSE (proto0 == ~0))
483             {
484               if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
485                 {
486                   next0 = NAT64_OUT2IN_NEXT_DROP;
487                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
488                 }
489               other_packets++;
490               goto trace0;
491             }
492
493           if (PREDICT_FALSE (ip4_is_fragment (ip40)))
494             {
495               next0 = NAT64_OUT2IN_NEXT_REASS;
496               fragments++;
497               goto trace0;
498             }
499
500           if (proto0 == SNAT_PROTOCOL_ICMP)
501             {
502               icmp_packets++;
503               if (icmp_to_icmp6
504                   (b0, nat64_out2in_icmp_set_cb, &ctx0,
505                    nat64_out2in_inner_icmp_set_cb, &ctx0))
506                 {
507                   next0 = NAT64_OUT2IN_NEXT_DROP;
508                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
509                   goto trace0;
510                 }
511             }
512           else
513             {
514               if (proto0 == SNAT_PROTOCOL_TCP)
515                 tcp_packets++;
516               else
517                 udp_packets++;
518
519               if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
520                 {
521                   udp0 = ip4_next_header (ip40);
522                   /*
523                    * Send DHCP packets to the ipv4 stack, or we won't
524                    * be able to use dhcp client on the outside interface
525                    */
526                   if ((proto0 == SNAT_PROTOCOL_UDP)
527                       && (udp0->dst_port ==
528                           clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))
529                     {
530                       next0 = NAT64_OUT2IN_NEXT_IP4_LOOKUP;
531                       goto trace0;
532                     }
533                   next0 = NAT64_OUT2IN_NEXT_DROP;
534                   b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
535                   goto trace0;
536                 }
537             }
538
539         trace0:
540           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
541                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
542             {
543               nat64_out2in_trace_t *t =
544                 vlib_add_trace (vm, node, b0, sizeof (*t));
545               t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
546               t->next_index = next0;
547             }
548
549           pkts_processed += next0 == NAT64_OUT2IN_NEXT_IP6_LOOKUP;
550
551           /* verify speculative enqueue, maybe switch current next frame */
552           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
553                                            n_left_to_next, bi0, next0);
554         }
555       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
556     }
557   vlib_node_increment_counter (vm, nat64_out2in_node.index,
558                                NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
559                                pkts_processed);
560   vlib_node_increment_counter (vm, nat64_out2in_node.index,
561                                NAT64_OUT2IN_ERROR_TCP_PACKETS, tcp_packets);
562   vlib_node_increment_counter (vm, nat64_out2in_node.index,
563                                NAT64_OUT2IN_ERROR_UDP_PACKETS, tcp_packets);
564   vlib_node_increment_counter (vm, nat64_out2in_node.index,
565                                NAT64_OUT2IN_ERROR_ICMP_PACKETS, icmp_packets);
566   vlib_node_increment_counter (vm, nat64_out2in_node.index,
567                                NAT64_OUT2IN_ERROR_OTHER_PACKETS,
568                                other_packets);
569   vlib_node_increment_counter (vm, nat64_out2in_node.index,
570                                NAT64_OUT2IN_ERROR_FRAGMENTS, fragments);
571
572   return frame->n_vectors;
573 }
574
575 /* *INDENT-OFF* */
576 VLIB_REGISTER_NODE (nat64_out2in_node) = {
577   .function = nat64_out2in_node_fn,
578   .name = "nat64-out2in",
579   .vector_size = sizeof (u32),
580   .format_trace = format_nat64_out2in_trace,
581   .type = VLIB_NODE_TYPE_INTERNAL,
582   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
583   .error_strings = nat64_out2in_error_strings,
584   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
585   /* edit / add dispositions here */
586   .next_nodes = {
587     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
588     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
589     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
590     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
591   },
592 };
593 /* *INDENT-ON* */
594
595 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
596
597 typedef struct nat64_out2in_frag_set_ctx_t_
598 {
599   vlib_main_t *vm;
600   vlib_buffer_t *b;
601   u32 sess_index;
602   u32 thread_index;
603   u8 proto;
604   u8 first_frag;
605 } nat64_out2in_frag_set_ctx_t;
606
607 static int
608 nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
609 {
610   nat64_main_t *nm = &nat64_main;
611   nat64_out2in_frag_set_ctx_t *ctx = arg;
612   nat64_db_st_entry_t *ste;
613   nat64_db_bib_entry_t *bibe;
614   udp_header_t *udp = ip4_next_header (ip4);
615   ip_csum_t csum;
616   u16 *checksum;
617   nat64_db_t *db = &nm->db[ctx->thread_index];
618
619   ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
620   if (!ste)
621     return -1;
622
623   bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
624   if (!bibe)
625     return -1;
626
627   if (ctx->first_frag)
628     {
629       udp->dst_port = bibe->in_port;
630
631       if (ip4->protocol == IP_PROTOCOL_UDP)
632         {
633           checksum = &udp->checksum;
634
635           if (!checksum)
636             {
637               u16 udp_len =
638                 clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
639               csum = ip_incremental_checksum (0, udp, udp_len);
640               csum =
641                 ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
642               csum =
643                 ip_csum_with_carry (csum,
644                                     clib_host_to_net_u16 (IP_PROTOCOL_UDP));
645               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
646               csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
647               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
648               csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
649               *checksum = ~ip_csum_fold (csum);
650             }
651           else
652             {
653               csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
654               csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
655               csum = ip_csum_sub_even (csum, bibe->out_port);
656               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
657               csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
658               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
659               csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
660               csum = ip_csum_add_even (csum, bibe->in_port);
661               *checksum = ip_csum_fold (csum);
662             }
663         }
664       else
665         {
666           tcp_header_t *tcp = ip4_next_header (ip4);
667           nat64_tcp_session_set_state (ste, tcp, 0);
668           checksum = &tcp->checksum;
669           csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
670           csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
671           csum = ip_csum_sub_even (csum, bibe->out_port);
672           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
673           csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
674           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
675           csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
676           csum = ip_csum_add_even (csum, bibe->in_port);
677           *checksum = ip_csum_fold (csum);
678         }
679
680     }
681
682   ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
683   ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
684
685   ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
686   ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
687
688   vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
689
690   nat64_session_reset_timeout (ste, ctx->vm);
691
692   return 0;
693 }
694
695 static uword
696 nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
697                             vlib_frame_t * frame)
698 {
699   u32 n_left_from, *from, *to_next;
700   nat64_out2in_next_t next_index;
701   u32 pkts_processed = 0, cached_fragments = 0;
702   u32 *fragments_to_drop = 0;
703   u32 *fragments_to_loopback = 0;
704   nat64_main_t *nm = &nat64_main;
705   u32 thread_index = vm->thread_index;
706
707   from = vlib_frame_vector_args (frame);
708   n_left_from = frame->n_vectors;
709   next_index = node->cached_next_index;
710
711   while (n_left_from > 0)
712     {
713       u32 n_left_to_next;
714
715       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
716
717       while (n_left_from > 0 && n_left_to_next > 0)
718         {
719           u32 bi0;
720           vlib_buffer_t *b0;
721           u32 next0;
722           ip4_header_t *ip40;
723           u8 cached0 = 0;
724           u32 sw_if_index0, fib_index0;
725           udp_header_t *udp0;
726           nat_reass_ip4_t *reass0;
727           ip46_address_t saddr0, daddr0;
728           nat64_db_st_entry_t *ste0;
729           nat64_db_bib_entry_t *bibe0;
730           ip6_address_t ip6_saddr0;
731           nat64_out2in_frag_set_ctx_t ctx0;
732           nat64_db_t *db = &nm->db[thread_index];
733
734           /* speculatively enqueue b0 to the current next frame */
735           bi0 = from[0];
736           to_next[0] = bi0;
737           from += 1;
738           to_next += 1;
739           n_left_from -= 1;
740           n_left_to_next -= 1;
741
742           b0 = vlib_get_buffer (vm, bi0);
743           next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
744
745           sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
746           fib_index0 =
747             fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
748                                                  sw_if_index0);
749
750           ctx0.thread_index = thread_index;
751
752           if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
753             {
754               next0 = NAT64_OUT2IN_NEXT_DROP;
755               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
756               goto trace0;
757             }
758
759           ip40 = vlib_buffer_get_current (b0);
760
761           if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
762                                || ip40->protocol == IP_PROTOCOL_UDP)))
763             {
764               next0 = NAT64_OUT2IN_NEXT_DROP;
765               b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
766               goto trace0;
767             }
768
769           udp0 = ip4_next_header (ip40);
770
771           reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
772                                                  ip40->dst_address,
773                                                  ip40->fragment_id,
774                                                  ip40->protocol,
775                                                  1, &fragments_to_drop);
776
777           if (PREDICT_FALSE (!reass0))
778             {
779               next0 = NAT64_OUT2IN_NEXT_DROP;
780               b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
781               goto trace0;
782             }
783
784           if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
785             {
786               ctx0.first_frag = 1;
787
788               clib_memset (&saddr0, 0, sizeof (saddr0));
789               saddr0.ip4.as_u32 = ip40->src_address.as_u32;
790               clib_memset (&daddr0, 0, sizeof (daddr0));
791               daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
792
793               ste0 =
794                 nat64_db_st_entry_find (db, &daddr0, &saddr0,
795                                         udp0->dst_port, udp0->src_port,
796                                         ip40->protocol, fib_index0, 0);
797               if (!ste0)
798                 {
799                   bibe0 =
800                     nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port,
801                                              ip40->protocol, fib_index0, 0);
802                   if (!bibe0)
803                     {
804                       next0 = NAT64_OUT2IN_NEXT_DROP;
805                       b0->error =
806                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
807                       goto trace0;
808                     }
809
810                   nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
811                                      bibe0->fib_index);
812                   ste0 =
813                     nat64_db_st_entry_create (db, bibe0, &ip6_saddr0,
814                                               &saddr0.ip4, udp0->src_port);
815
816                   if (!ste0)
817                     {
818                       next0 = NAT64_OUT2IN_NEXT_DROP;
819                       b0->error =
820                         node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
821                       goto trace0;
822                     }
823
824                   vlib_set_simple_counter (&nm->total_sessions, thread_index,
825                                            0, db->st.st_entries_num);
826                 }
827               reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
828               reass0->thread_index = thread_index;
829
830               nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
831             }
832           else
833             {
834               ctx0.first_frag = 0;
835
836               if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
837                 {
838                   if (nat_ip4_reass_add_fragment
839                       (reass0, bi0, &fragments_to_drop))
840                     {
841                       b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
842                       next0 = NAT64_OUT2IN_NEXT_DROP;
843                       goto trace0;
844                     }
845                   cached0 = 1;
846                   goto trace0;
847                 }
848             }
849
850           ctx0.sess_index = reass0->sess_index;
851           ctx0.proto = ip40->protocol;
852           ctx0.vm = vm;
853           ctx0.b = b0;
854
855           if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
856             {
857               next0 = NAT64_OUT2IN_NEXT_DROP;
858               b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
859               goto trace0;
860             }
861
862         trace0:
863           if (PREDICT_FALSE
864               ((node->flags & VLIB_NODE_FLAG_TRACE)
865                && (b0->flags & VLIB_BUFFER_IS_TRACED)))
866             {
867               nat64_out2in_reass_trace_t *t =
868                 vlib_add_trace (vm, node, b0, sizeof (*t));
869               t->cached = cached0;
870               t->sw_if_index = sw_if_index0;
871               t->next_index = next0;
872             }
873
874           if (cached0)
875             {
876               n_left_to_next++;
877               to_next--;
878               cached_fragments++;
879             }
880           else
881             {
882               pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
883
884               /* verify speculative enqueue, maybe switch current next frame */
885               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
886                                                to_next, n_left_to_next,
887                                                bi0, next0);
888             }
889
890           if (n_left_from == 0 && vec_len (fragments_to_loopback))
891             {
892               from = vlib_frame_vector_args (frame);
893               u32 len = vec_len (fragments_to_loopback);
894               if (len <= VLIB_FRAME_SIZE)
895                 {
896                   clib_memcpy_fast (from, fragments_to_loopback,
897                                     sizeof (u32) * len);
898                   n_left_from = len;
899                   vec_reset_length (fragments_to_loopback);
900                 }
901               else
902                 {
903                   clib_memcpy_fast (from, fragments_to_loopback +
904                                     (len - VLIB_FRAME_SIZE),
905                                     sizeof (u32) * VLIB_FRAME_SIZE);
906                   n_left_from = VLIB_FRAME_SIZE;
907                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
908                 }
909             }
910         }
911
912       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
913     }
914
915   vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
916                                NAT64_OUT2IN_ERROR_PROCESSED_FRAGMENTS,
917                                pkts_processed);
918   vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
919                                NAT64_OUT2IN_ERROR_CACHED_FRAGMENTS,
920                                cached_fragments);
921
922   nat_send_all_to_node (vm, fragments_to_drop, node,
923                         &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
924                         NAT64_OUT2IN_NEXT_DROP);
925
926   vec_free (fragments_to_drop);
927   vec_free (fragments_to_loopback);
928   return frame->n_vectors;
929 }
930
931 /* *INDENT-OFF* */
932 VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
933   .function = nat64_out2in_reass_node_fn,
934   .name = "nat64-out2in-reass",
935   .vector_size = sizeof (u32),
936   .format_trace = format_nat64_out2in_reass_trace,
937   .type = VLIB_NODE_TYPE_INTERNAL,
938   .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
939   .error_strings = nat64_out2in_error_strings,
940   .n_next_nodes = NAT64_OUT2IN_N_NEXT,
941   /* edit / add dispositions here */
942   .next_nodes = {
943     [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
944     [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
945     [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
946     [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
947   },
948 };
949 /* *INDENT-ON* */
950
951 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_reass_node,
952                               nat64_out2in_reass_node_fn);
953
954 #define foreach_nat64_out2in_handoff_error                       \
955 _(CONGESTION_DROP, "congestion drop")                            \
956 _(SAME_WORKER, "same worker")                                    \
957 _(DO_HANDOFF, "do handoff")
958
959 typedef enum
960 {
961 #define _(sym,str) NAT64_OUT2IN_HANDOFF_ERROR_##sym,
962   foreach_nat64_out2in_handoff_error
963 #undef _
964     NAT64_OUT2IN_HANDOFF_N_ERROR,
965 } nat64_out2in_handoff_error_t;
966
967 static char *nat64_out2in_handoff_error_strings[] = {
968 #define _(sym,string) string,
969   foreach_nat64_out2in_handoff_error
970 #undef _
971 };
972
973 typedef struct
974 {
975   u32 next_worker_index;
976 } nat64_out2in_handoff_trace_t;
977
978 static u8 *
979 format_nat64_out2in_handoff_trace (u8 * s, va_list * args)
980 {
981   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
982   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
983   nat64_out2in_handoff_trace_t *t =
984     va_arg (*args, nat64_out2in_handoff_trace_t *);
985
986   s =
987     format (s, "NAT64-OUT2IN-HANDOFF: next-worker %d", t->next_worker_index);
988
989   return s;
990 }
991
992 static inline uword
993 nat64_out2in_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
994                               vlib_frame_t * frame)
995 {
996   nat64_main_t *nm = &nat64_main;
997   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
998   u32 n_enq, n_left_from, *from;
999   u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1000   u32 fq_index;
1001   u32 thread_index = vm->thread_index;
1002   u32 do_handoff = 0, same_worker = 0;
1003
1004   from = vlib_frame_vector_args (frame);
1005   n_left_from = frame->n_vectors;
1006   vlib_get_buffers (vm, from, bufs, n_left_from);
1007
1008   b = bufs;
1009   ti = thread_indices;
1010
1011   fq_index = nm->fq_out2in_index;
1012
1013   while (n_left_from > 0)
1014     {
1015       ip4_header_t *ip0;
1016
1017       ip0 = vlib_buffer_get_current (b[0]);
1018       ti[0] = nat64_get_worker_out2in (ip0);
1019
1020       if (ti[0] != thread_index)
1021         do_handoff++;
1022       else
1023         same_worker++;
1024
1025       if (PREDICT_FALSE
1026           ((node->flags & VLIB_NODE_FLAG_TRACE)
1027            && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1028         {
1029           nat64_out2in_handoff_trace_t *t =
1030             vlib_add_trace (vm, node, b[0], sizeof (*t));
1031           t->next_worker_index = ti[0];
1032         }
1033
1034       n_left_from -= 1;
1035       ti += 1;
1036       b += 1;
1037     }
1038
1039   n_enq =
1040     vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1041                                    frame->n_vectors, 1);
1042
1043   if (n_enq < frame->n_vectors)
1044     vlib_node_increment_counter (vm, node->node_index,
1045                                  NAT64_OUT2IN_HANDOFF_ERROR_CONGESTION_DROP,
1046                                  frame->n_vectors - n_enq);
1047   vlib_node_increment_counter (vm, node->node_index,
1048                                NAT64_OUT2IN_HANDOFF_ERROR_SAME_WORKER,
1049                                same_worker);
1050   vlib_node_increment_counter (vm, node->node_index,
1051                                NAT64_OUT2IN_HANDOFF_ERROR_DO_HANDOFF,
1052                                do_handoff);
1053
1054   return frame->n_vectors;
1055 }
1056
1057 /* *INDENT-OFF* */
1058 VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
1059   .function = nat64_out2in_handoff_node_fn,
1060   .name = "nat64-out2in-handoff",
1061   .vector_size = sizeof (u32),
1062   .format_trace = format_nat64_out2in_handoff_trace,
1063   .type = VLIB_NODE_TYPE_INTERNAL,
1064   .n_errors = ARRAY_LEN(nat64_out2in_handoff_error_strings),
1065   .error_strings = nat64_out2in_handoff_error_strings,
1066
1067   .n_next_nodes = 1,
1068
1069   .next_nodes = {
1070     [0] = "error-drop",
1071   },
1072 };
1073 /* *INDENT-ON* */
1074
1075 VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_handoff_node,
1076                               nat64_out2in_handoff_node_fn);
1077 /*
1078  * fd.io coding-style-patch-verification: ON
1079  *
1080  * Local Variables:
1081  * eval: (c-set-style "gnu")
1082  * End:
1083  */