NAT: total users and sessions gauges (VPP-1484)
[vpp.git] / src / plugins / nat / nat64.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 implementation
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat64_db.h>
22 #include <nat/nat_reass.h>
23 #include <nat/nat_inlines.h>
24 #include <vnet/fib/ip4_fib.h>
25 #include <vppinfra/crc32.h>
26
27
28 nat64_main_t nat64_main;
29
30 /* *INDENT-OFF* */
31
32 /* Hook up input features */
33 VNET_FEATURE_INIT (nat64_in2out, static) = {
34   .arc_name = "ip6-unicast",
35   .node_name = "nat64-in2out",
36   .runs_before = VNET_FEATURES ("ip6-lookup"),
37 };
38 VNET_FEATURE_INIT (nat64_out2in, static) = {
39   .arc_name = "ip4-unicast",
40   .node_name = "nat64-out2in",
41   .runs_before = VNET_FEATURES ("ip4-lookup"),
42 };
43 VNET_FEATURE_INIT (nat64_in2out_handoff, static) = {
44   .arc_name = "ip6-unicast",
45   .node_name = "nat64-in2out-handoff",
46   .runs_before = VNET_FEATURES ("ip6-lookup"),
47 };
48 VNET_FEATURE_INIT (nat64_out2in_handoff, static) = {
49   .arc_name = "ip4-unicast",
50   .node_name = "nat64-out2in-handoff",
51   .runs_before = VNET_FEATURES ("ip4-lookup"),
52 };
53
54
55 static u8 well_known_prefix[] = {
56   0x00, 0x64, 0xff, 0x9b,
57   0x00, 0x00, 0x00, 0x00,
58   0x00, 0x00, 0x00, 0x00,
59   0x00, 0x00, 0x00, 0x00
60 };
61
62 /* *INDENT-ON* */
63
64 static void
65 nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque,
66                                         u32 sw_if_index,
67                                         ip4_address_t * address,
68                                         u32 address_length,
69                                         u32 if_address_index, u32 is_delete)
70 {
71   nat64_main_t *nm = &nat64_main;
72   int i, j;
73
74   for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
75     {
76       if (sw_if_index == nm->auto_add_sw_if_indices[i])
77         {
78           if (!is_delete)
79             {
80               /* Don't trip over lease renewal, static config */
81               for (j = 0; j < vec_len (nm->addr_pool); j++)
82                 if (nm->addr_pool[j].addr.as_u32 == address->as_u32)
83                   return;
84
85               (void) nat64_add_del_pool_addr (address, ~0, 1);
86               return;
87             }
88           else
89             {
90               (void) nat64_add_del_pool_addr (address, ~0, 0);
91               return;
92             }
93         }
94     }
95 }
96
97 u32
98 nat64_get_worker_in2out (ip6_address_t * addr)
99 {
100   nat64_main_t *nm = &nat64_main;
101   snat_main_t *sm = nm->sm;
102   u32 next_worker_index = nm->sm->first_worker_index;
103   u32 hash;
104
105 #ifdef clib_crc32c_uses_intrinsics
106   hash = clib_crc32c ((u8 *) addr->as_u32, 16);
107 #else
108   u64 tmp = addr->as_u64[0] ^ addr->as_u64[1];
109   hash = clib_xxhash (tmp);
110 #endif
111
112   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
113     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
114   else
115     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
116
117   return next_worker_index;
118 }
119
120 u32
121 nat64_get_worker_out2in (ip4_header_t * ip)
122 {
123   nat64_main_t *nm = &nat64_main;
124   snat_main_t *sm = nm->sm;
125   udp_header_t *udp;
126   u16 port;
127   u32 proto;
128
129   proto = ip_proto_to_snat_proto (ip->protocol);
130   udp = ip4_next_header (ip);
131   port = udp->dst_port;
132
133   /* fragments */
134   if (PREDICT_FALSE (ip4_is_fragment (ip)))
135     {
136       if (PREDICT_FALSE (nat_reass_is_drop_frag (0)))
137         return vlib_get_thread_index ();
138
139       if (PREDICT_TRUE (!ip4_is_first_fragment (ip)))
140         {
141           nat_reass_ip4_t *reass;
142
143           reass = nat_ip4_reass_find (ip->src_address, ip->dst_address,
144                                       ip->fragment_id, ip->protocol);
145
146           if (reass && (reass->thread_index != (u32) ~ 0))
147             return reass->thread_index;
148           else
149             return vlib_get_thread_index ();
150         }
151     }
152
153   /* unknown protocol */
154   if (PREDICT_FALSE (proto == ~0))
155     {
156       nat64_db_t *db;
157       ip46_address_t daddr;
158       nat64_db_bib_entry_t *bibe;
159
160       clib_memset (&daddr, 0, sizeof (daddr));
161       daddr.ip4.as_u32 = ip->dst_address.as_u32;
162
163       /* *INDENT-OFF* */
164       vec_foreach (db, nm->db)
165         {
166           bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0);
167           if (bibe)
168             return (u32) (db - nm->db);
169         }
170       /* *INDENT-ON* */
171       return vlib_get_thread_index ();
172     }
173
174   /* ICMP */
175   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
176     {
177       icmp46_header_t *icmp = (icmp46_header_t *) udp;
178       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
179       if (!icmp_is_error_message (icmp))
180         port = echo->identifier;
181       else
182         {
183           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
184           proto = ip_proto_to_snat_proto (inner_ip->protocol);
185           void *l4_header = ip4_next_header (inner_ip);
186           switch (proto)
187             {
188             case SNAT_PROTOCOL_ICMP:
189               icmp = (icmp46_header_t *) l4_header;
190               echo = (icmp_echo_header_t *) (icmp + 1);
191               port = echo->identifier;
192               break;
193             case SNAT_PROTOCOL_UDP:
194             case SNAT_PROTOCOL_TCP:
195               port = ((tcp_udp_header_t *) l4_header)->src_port;
196               break;
197             default:
198               return vlib_get_thread_index ();
199             }
200         }
201     }
202
203   /* worker by outside port  (TCP/UDP) */
204   port = clib_net_to_host_u16 (port);
205   if (port > 1024)
206     return nm->sm->first_worker_index + ((port - 1024) / sm->port_per_thread);
207
208   return vlib_get_thread_index ();
209 }
210
211 clib_error_t *
212 nat64_init (vlib_main_t * vm)
213 {
214   nat64_main_t *nm = &nat64_main;
215   vlib_thread_main_t *tm = vlib_get_thread_main ();
216   ip4_add_del_interface_address_callback_t cb4;
217   ip4_main_t *im = &ip4_main;
218   vlib_node_t *error_drop_node =
219     vlib_get_node_by_name (vm, (u8 *) "error-drop");
220
221   vec_validate (nm->db, tm->n_vlib_mains - 1);
222
223   nm->sm = &snat_main;
224
225   nm->fq_in2out_index = ~0;
226   nm->fq_out2in_index = ~0;
227   nm->error_node_index = error_drop_node->index;
228
229   /* set session timeouts to default values */
230   nm->udp_timeout = SNAT_UDP_TIMEOUT;
231   nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
232   nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
233   nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
234
235   nm->total_enabled_count = 0;
236
237   /* Set up the interface address add/del callback */
238   cb4.function = nat64_ip4_add_del_interface_address_cb;
239   cb4.function_opaque = 0;
240   vec_add1 (im->add_del_interface_address_callbacks, cb4);
241   nm->ip4_main = im;
242
243   /* Init counters */
244   nm->total_bibs.name = "total-bibs";
245   nm->total_bibs.stat_segment_name = "/nat64/total-bibs";
246   vlib_validate_simple_counter (&nm->total_bibs, 0);
247   vlib_zero_simple_counter (&nm->total_bibs, 0);
248   nm->total_sessions.name = "total-sessions";
249   nm->total_sessions.stat_segment_name = "/nat64/total-sessions";
250   vlib_validate_simple_counter (&nm->total_sessions, 0);
251   vlib_zero_simple_counter (&nm->total_sessions, 0);
252
253   return 0;
254 }
255
256 static void nat64_free_out_addr_and_port (struct nat64_db_s *db,
257                                           ip4_address_t * addr, u16 port,
258                                           u8 protocol);
259
260 void
261 nat64_set_hash (u32 bib_buckets, u32 bib_memory_size, u32 st_buckets,
262                 u32 st_memory_size)
263 {
264   nat64_main_t *nm = &nat64_main;
265   nat64_db_t *db;
266
267   nm->bib_buckets = bib_buckets;
268   nm->bib_memory_size = bib_memory_size;
269   nm->st_buckets = st_buckets;
270   nm->st_memory_size = st_memory_size;
271
272   /* *INDENT-OFF* */
273   vec_foreach (db, nm->db)
274     {
275       if (nat64_db_init (db, bib_buckets, bib_memory_size, st_buckets,
276                          st_memory_size, nat64_free_out_addr_and_port))
277         nat_log_err ("NAT64 DB init failed");
278     }
279   /* *INDENT-ON* */
280 }
281
282 int
283 nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add)
284 {
285   nat64_main_t *nm = &nat64_main;
286   snat_address_t *a = 0;
287   snat_interface_t *interface;
288   int i;
289   nat64_db_t *db;
290   vlib_thread_main_t *tm = vlib_get_thread_main ();
291
292   /* Check if address already exists */
293   for (i = 0; i < vec_len (nm->addr_pool); i++)
294     {
295       if (nm->addr_pool[i].addr.as_u32 == addr->as_u32)
296         {
297           a = nm->addr_pool + i;
298           break;
299         }
300     }
301
302   if (is_add)
303     {
304       if (a)
305         return VNET_API_ERROR_VALUE_EXIST;
306
307       vec_add2 (nm->addr_pool, a, 1);
308       a->addr = *addr;
309       a->fib_index = ~0;
310       if (vrf_id != ~0)
311         a->fib_index =
312           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
313                                              FIB_SOURCE_PLUGIN_HI);
314 #define _(N, id, n, s) \
315       clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); \
316       a->busy_##n##_ports = 0; \
317       vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
318       foreach_snat_protocol
319 #undef _
320     }
321   else
322     {
323       if (!a)
324         return VNET_API_ERROR_NO_SUCH_ENTRY;
325
326       if (a->fib_index != ~0)
327         fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6,
328                           FIB_SOURCE_PLUGIN_HI);
329       /* Delete sessions using address */
330         /* *INDENT-OFF* */
331         vec_foreach (db, nm->db)
332           {
333             nat64_db_free_out_addr (db, &a->addr);
334             vlib_set_simple_counter (&nm->total_bibs, db - nm->db, 0,
335                                      db->bib.bib_entries_num);
336             vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0,
337                                      db->st.st_entries_num);
338           }
339 #define _(N, id, n, s) \
340       clib_bitmap_free (a->busy_##n##_port_bitmap);
341       foreach_snat_protocol
342 #undef _
343         /* *INDENT-ON* */
344       vec_del1 (nm->addr_pool, i);
345     }
346
347   /* Add/del external address to FIB */
348   /* *INDENT-OFF* */
349   pool_foreach (interface, nm->interfaces,
350   ({
351     if (nat_interface_is_inside(interface))
352       continue;
353
354     snat_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add);
355     break;
356   }));
357   /* *INDENT-ON* */
358
359   return 0;
360 }
361
362 void
363 nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx)
364 {
365   nat64_main_t *nm = &nat64_main;
366   snat_address_t *a = 0;
367
368   /* *INDENT-OFF* */
369   vec_foreach (a, nm->addr_pool)
370     {
371       if (fn (a, ctx))
372         break;
373     };
374   /* *INDENT-ON* */
375 }
376
377 int
378 nat64_add_interface_address (u32 sw_if_index, int is_add)
379 {
380   nat64_main_t *nm = &nat64_main;
381   ip4_main_t *ip4_main = nm->ip4_main;
382   ip4_address_t *first_int_addr;
383   int i;
384
385   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
386
387   for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
388     {
389       if (nm->auto_add_sw_if_indices[i] == sw_if_index)
390         {
391           if (is_add)
392             return VNET_API_ERROR_VALUE_EXIST;
393           else
394             {
395               /* if have address remove it */
396               if (first_int_addr)
397                 (void) nat64_add_del_pool_addr (first_int_addr, ~0, 0);
398
399               vec_del1 (nm->auto_add_sw_if_indices, i);
400               return 0;
401             }
402         }
403     }
404
405   if (!is_add)
406     return VNET_API_ERROR_NO_SUCH_ENTRY;
407
408   /* add to the auto-address list */
409   vec_add1 (nm->auto_add_sw_if_indices, sw_if_index);
410
411   /* If the address is already bound - or static - add it now */
412   if (first_int_addr)
413     (void) nat64_add_del_pool_addr (first_int_addr, ~0, 1);
414
415   return 0;
416 }
417
418 int
419 nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add)
420 {
421   nat64_main_t *nm = &nat64_main;
422   snat_interface_t *interface = 0, *i;
423   snat_address_t *ap;
424   const char *feature_name, *arc_name;
425
426   /* Check if interface already exists */
427   /* *INDENT-OFF* */
428   pool_foreach (i, nm->interfaces,
429   ({
430     if (i->sw_if_index == sw_if_index)
431       {
432         interface = i;
433         break;
434       }
435   }));
436   /* *INDENT-ON* */
437
438   if (is_add)
439     {
440       if (interface)
441         goto set_flags;
442
443       pool_get (nm->interfaces, interface);
444       interface->sw_if_index = sw_if_index;
445       interface->flags = 0;
446     set_flags:
447       if (is_inside)
448         interface->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
449       else
450         interface->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
451
452       nm->total_enabled_count++;
453       vlib_process_signal_event (nm->sm->vlib_main,
454                                  nm->nat64_expire_walk_node_index,
455                                  NAT64_CLEANER_RESCHEDULE, 0);
456
457     }
458   else
459     {
460       if (!interface)
461         return VNET_API_ERROR_NO_SUCH_ENTRY;
462
463       if ((nat_interface_is_inside (interface)
464            && nat_interface_is_outside (interface)))
465         interface->flags &=
466           is_inside ? ~NAT_INTERFACE_FLAG_IS_INSIDE :
467           ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
468       else
469         pool_put (nm->interfaces, interface);
470
471       nm->total_enabled_count--;
472     }
473
474   if (!is_inside)
475     {
476       /* *INDENT-OFF* */
477       vec_foreach (ap, nm->addr_pool)
478         snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, is_add);
479       /* *INDENT-ON* */
480     }
481
482   if (nm->sm->num_workers > 1)
483     {
484       feature_name =
485         is_inside ? "nat64-in2out-handoff" : "nat64-out2in-handoff";
486       if (nm->fq_in2out_index == ~0)
487         nm->fq_in2out_index =
488           vlib_frame_queue_main_init (nat64_in2out_node.index, 0);
489       if (nm->fq_out2in_index == ~0)
490         nm->fq_out2in_index =
491           vlib_frame_queue_main_init (nat64_out2in_node.index, 0);
492     }
493   else
494     feature_name = is_inside ? "nat64-in2out" : "nat64-out2in";
495
496   arc_name = is_inside ? "ip6-unicast" : "ip4-unicast";
497
498   return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index,
499                                       is_add, 0, 0);
500 }
501
502 void
503 nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx)
504 {
505   nat64_main_t *nm = &nat64_main;
506   snat_interface_t *i = 0;
507
508   /* *INDENT-OFF* */
509   pool_foreach (i, nm->interfaces,
510   ({
511     if (fn (i, ctx))
512       break;
513   }));
514   /* *INDENT-ON* */
515 }
516
517 int
518 nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto,
519                                ip4_address_t * addr, u16 * port,
520                                u32 thread_index)
521 {
522   nat64_main_t *nm = &nat64_main;
523   snat_main_t *sm = nm->sm;
524   snat_session_key_t k;
525   u32 worker_index = 0;
526   int rv;
527
528   k.protocol = proto;
529
530   if (sm->num_workers > 1)
531     worker_index = thread_index - sm->first_worker_index;
532
533   rv =
534     sm->alloc_addr_and_port (nm->addr_pool, fib_index, thread_index, &k,
535                              sm->port_per_thread, worker_index);
536
537   if (!rv)
538     {
539       *port = k.port;
540       addr->as_u32 = k.addr.as_u32;
541     }
542
543   return rv;
544 }
545
546 static void
547 nat64_free_out_addr_and_port (struct nat64_db_s *db, ip4_address_t * addr,
548                               u16 port, u8 protocol)
549 {
550   nat64_main_t *nm = &nat64_main;
551   int i;
552   snat_address_t *a;
553   u32 thread_index = db - nm->db;
554   snat_protocol_t proto = ip_proto_to_snat_proto (protocol);
555   u16 port_host_byte_order = clib_net_to_host_u16 (port);
556
557   for (i = 0; i < vec_len (nm->addr_pool); i++)
558     {
559       a = nm->addr_pool + i;
560       if (addr->as_u32 != a->addr.as_u32)
561         continue;
562       switch (proto)
563         {
564 #define _(N, j, n, s) \
565         case SNAT_PROTOCOL_##N: \
566           ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
567                   port_host_byte_order) == 1); \
568           clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, port, 0); \
569           a->busy_##n##_ports--; \
570           a->busy_##n##_ports_per_thread[thread_index]--; \
571           break;
572           foreach_snat_protocol
573 #undef _
574         default:
575           nat_log_notice ("unknown protocol");
576           return;
577         }
578       break;
579     }
580 }
581
582 /**
583  * @brief Add/delete static BIB entry in worker thread.
584  */
585 static uword
586 nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
587                             vlib_frame_t * f)
588 {
589   nat64_main_t *nm = &nat64_main;
590   u32 thread_index = vm->thread_index;
591   nat64_db_t *db = &nm->db[thread_index];
592   nat64_static_bib_to_update_t *static_bib;
593   nat64_db_bib_entry_t *bibe;
594   ip46_address_t addr;
595
596   /* *INDENT-OFF* */
597   pool_foreach (static_bib, nm->static_bibs,
598   ({
599     if ((static_bib->thread_index != thread_index) || (static_bib->done))
600       continue;
601
602     if (static_bib->is_add)
603       {
604           (void) nat64_db_bib_entry_create (db, &static_bib->in_addr,
605                                             &static_bib->out_addr,
606                                             static_bib->in_port,
607                                             static_bib->out_port,
608                                             static_bib->fib_index,
609                                             static_bib->proto, 1);
610           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
611                                    db->bib.bib_entries_num);
612       }
613     else
614       {
615         addr.as_u64[0] = static_bib->in_addr.as_u64[0];
616         addr.as_u64[1] = static_bib->in_addr.as_u64[1];
617         bibe = nat64_db_bib_entry_find (db, &addr, static_bib->in_port,
618                                         static_bib->proto,
619                                         static_bib->fib_index, 1);
620         if (bibe)
621           {
622             nat64_db_bib_entry_free (db, bibe);
623             vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
624                                      db->bib.bib_entries_num);
625             vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
626                                      db->st.st_entries_num);
627           }
628       }
629
630       static_bib->done = 1;
631   }));
632   /* *INDENT-ON* */
633
634   return 0;
635 }
636
637 static vlib_node_registration_t nat64_static_bib_worker_node;
638
639 /* *INDENT-OFF* */
640 VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = {
641     .function = nat64_static_bib_worker_fn,
642     .type = VLIB_NODE_TYPE_INPUT,
643     .state = VLIB_NODE_STATE_INTERRUPT,
644     .name = "nat64-static-bib-worker",
645 };
646 /* *INDENT-ON* */
647
648 int
649 nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
650                                 ip4_address_t * out_addr, u16 in_port,
651                                 u16 out_port, u8 proto, u32 vrf_id, u8 is_add)
652 {
653   nat64_main_t *nm = &nat64_main;
654   nat64_db_bib_entry_t *bibe;
655   u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
656                                                      FIB_SOURCE_PLUGIN_HI);
657   snat_protocol_t p = ip_proto_to_snat_proto (proto);
658   ip46_address_t addr;
659   int i;
660   snat_address_t *a;
661   u32 thread_index = 0;
662   nat64_db_t *db;
663   nat64_static_bib_to_update_t *static_bib;
664   vlib_main_t *worker_vm;
665   u32 *to_be_free = 0, *index;
666
667   if (nm->sm->num_workers > 1)
668     {
669       thread_index = nat64_get_worker_in2out (in_addr);
670       db = &nm->db[thread_index];
671     }
672   else
673     db = &nm->db[nm->sm->num_workers];
674
675   addr.as_u64[0] = in_addr->as_u64[0];
676   addr.as_u64[1] = in_addr->as_u64[1];
677   bibe =
678     nat64_db_bib_entry_find (db, &addr, clib_host_to_net_u16 (in_port),
679                              proto, fib_index, 1);
680
681   if (is_add)
682     {
683       if (bibe)
684         return VNET_API_ERROR_VALUE_EXIST;
685
686       /* outside port must be assigned to same thread as internall address */
687       if ((out_port > 1024) && (nm->sm->num_workers > 1))
688         {
689           if (thread_index != ((out_port - 1024) / nm->sm->port_per_thread))
690             return VNET_API_ERROR_INVALID_VALUE_2;
691         }
692
693       for (i = 0; i < vec_len (nm->addr_pool); i++)
694         {
695           a = nm->addr_pool + i;
696           if (out_addr->as_u32 != a->addr.as_u32)
697             continue;
698           switch (p)
699             {
700 #define _(N, j, n, s) \
701             case SNAT_PROTOCOL_##N: \
702               if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
703                                             out_port)) \
704                 return VNET_API_ERROR_INVALID_VALUE; \
705               clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
706                                         out_port, 1); \
707               if (out_port > 1024) \
708                 { \
709                   a->busy_##n##_ports++; \
710                   a->busy_##n##_ports_per_thread[thread_index]++; \
711                 } \
712               break;
713               foreach_snat_protocol
714 #undef _
715             default:
716               clib_memset (&addr, 0, sizeof (addr));
717               addr.ip4.as_u32 = out_addr->as_u32;
718               if (nat64_db_bib_entry_find (db, &addr, 0, proto, fib_index, 0))
719                 return VNET_API_ERROR_INVALID_VALUE;
720             }
721           break;
722         }
723       if (!nm->sm->num_workers)
724         {
725           bibe =
726             nat64_db_bib_entry_create (db, in_addr, out_addr,
727                                        clib_host_to_net_u16 (in_port),
728                                        clib_host_to_net_u16 (out_port),
729                                        fib_index, proto, 1);
730           if (!bibe)
731             return VNET_API_ERROR_UNSPECIFIED;
732
733           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
734                                    db->bib.bib_entries_num);
735         }
736     }
737   else
738     {
739       if (!bibe)
740         return VNET_API_ERROR_NO_SUCH_ENTRY;
741
742       if (!nm->sm->num_workers)
743         {
744           nat64_db_bib_entry_free (db, bibe);
745           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
746                                    db->bib.bib_entries_num);
747         }
748     }
749
750   if (nm->sm->num_workers)
751     {
752       /* *INDENT-OFF* */
753       pool_foreach (static_bib, nm->static_bibs,
754       ({
755         if (static_bib->done)
756           vec_add1 (to_be_free, static_bib - nm->static_bibs);
757       }));
758       vec_foreach (index, to_be_free)
759         pool_put_index (nm->static_bibs, index[0]);
760       /* *INDENT-ON* */
761       vec_free (to_be_free);
762       pool_get (nm->static_bibs, static_bib);
763       static_bib->in_addr.as_u64[0] = in_addr->as_u64[0];
764       static_bib->in_addr.as_u64[1] = in_addr->as_u64[1];
765       static_bib->in_port = clib_host_to_net_u16 (in_port);
766       static_bib->out_addr.as_u32 = out_addr->as_u32;
767       static_bib->out_port = clib_host_to_net_u16 (out_port);
768       static_bib->fib_index = fib_index;
769       static_bib->proto = proto;
770       static_bib->is_add = is_add;
771       static_bib->thread_index = thread_index;
772       static_bib->done = 0;
773       worker_vm = vlib_mains[thread_index];
774       if (worker_vm)
775         vlib_node_set_interrupt_pending (worker_vm,
776                                          nat64_static_bib_worker_node.index);
777       else
778         return VNET_API_ERROR_UNSPECIFIED;
779     }
780
781   return 0;
782 }
783
784 int
785 nat64_set_udp_timeout (u32 timeout)
786 {
787   nat64_main_t *nm = &nat64_main;
788
789   if (timeout == 0)
790     nm->udp_timeout = SNAT_UDP_TIMEOUT;
791   else
792     nm->udp_timeout = timeout;
793
794   return 0;
795 }
796
797 u32
798 nat64_get_udp_timeout (void)
799 {
800   nat64_main_t *nm = &nat64_main;
801
802   return nm->udp_timeout;
803 }
804
805 int
806 nat64_set_icmp_timeout (u32 timeout)
807 {
808   nat64_main_t *nm = &nat64_main;
809
810   if (timeout == 0)
811     nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
812   else
813     nm->icmp_timeout = timeout;
814
815   return 0;
816 }
817
818 u32
819 nat64_get_icmp_timeout (void)
820 {
821   nat64_main_t *nm = &nat64_main;
822
823   return nm->icmp_timeout;
824 }
825
826 int
827 nat64_set_tcp_timeouts (u32 trans, u32 est)
828 {
829   nat64_main_t *nm = &nat64_main;
830
831   if (trans == 0)
832     nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
833   else
834     nm->tcp_trans_timeout = trans;
835
836   if (est == 0)
837     nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
838   else
839     nm->tcp_est_timeout = est;
840
841   return 0;
842 }
843
844 u32
845 nat64_get_tcp_trans_timeout (void)
846 {
847   nat64_main_t *nm = &nat64_main;
848
849   return nm->tcp_trans_timeout;
850 }
851
852 u32
853 nat64_get_tcp_est_timeout (void)
854 {
855   nat64_main_t *nm = &nat64_main;
856
857   return nm->tcp_est_timeout;
858 }
859
860 void
861 nat64_session_reset_timeout (nat64_db_st_entry_t * ste, vlib_main_t * vm)
862 {
863   nat64_main_t *nm = &nat64_main;
864   u32 now = (u32) vlib_time_now (vm);
865
866   switch (ip_proto_to_snat_proto (ste->proto))
867     {
868     case SNAT_PROTOCOL_ICMP:
869       ste->expire = now + nm->icmp_timeout;
870       return;
871     case SNAT_PROTOCOL_TCP:
872       {
873         switch (ste->tcp_state)
874           {
875           case NAT64_TCP_STATE_V4_INIT:
876           case NAT64_TCP_STATE_V6_INIT:
877           case NAT64_TCP_STATE_V4_FIN_RCV:
878           case NAT64_TCP_STATE_V6_FIN_RCV:
879           case NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV:
880           case NAT64_TCP_STATE_TRANS:
881             ste->expire = now + nm->tcp_trans_timeout;
882             return;
883           case NAT64_TCP_STATE_ESTABLISHED:
884             ste->expire = now + nm->tcp_est_timeout;
885             return;
886           default:
887             return;
888           }
889       }
890     case SNAT_PROTOCOL_UDP:
891       ste->expire = now + nm->udp_timeout;
892       return;
893     default:
894       ste->expire = now + nm->udp_timeout;
895       return;
896     }
897 }
898
899 void
900 nat64_tcp_session_set_state (nat64_db_st_entry_t * ste, tcp_header_t * tcp,
901                              u8 is_ip6)
902 {
903   switch (ste->tcp_state)
904     {
905     case NAT64_TCP_STATE_CLOSED:
906       {
907         if (tcp->flags & TCP_FLAG_SYN)
908           {
909             if (is_ip6)
910               ste->tcp_state = NAT64_TCP_STATE_V6_INIT;
911             else
912               ste->tcp_state = NAT64_TCP_STATE_V4_INIT;
913           }
914         return;
915       }
916     case NAT64_TCP_STATE_V4_INIT:
917       {
918         if (is_ip6 && (tcp->flags & TCP_FLAG_SYN))
919           ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
920         return;
921       }
922     case NAT64_TCP_STATE_V6_INIT:
923       {
924         if (!is_ip6 && (tcp->flags & TCP_FLAG_SYN))
925           ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
926         return;
927       }
928     case NAT64_TCP_STATE_ESTABLISHED:
929       {
930         if (tcp->flags & TCP_FLAG_FIN)
931           {
932             if (is_ip6)
933               ste->tcp_state = NAT64_TCP_STATE_V6_FIN_RCV;
934             else
935               ste->tcp_state = NAT64_TCP_STATE_V4_FIN_RCV;
936           }
937         else if (tcp->flags & TCP_FLAG_RST)
938           {
939             ste->tcp_state = NAT64_TCP_STATE_TRANS;
940           }
941         return;
942       }
943     case NAT64_TCP_STATE_V4_FIN_RCV:
944       {
945         if (is_ip6 && (tcp->flags & TCP_FLAG_FIN))
946           ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
947         return;
948       }
949     case NAT64_TCP_STATE_V6_FIN_RCV:
950       {
951         if (!is_ip6 && (tcp->flags & TCP_FLAG_FIN))
952           ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
953         return;
954       }
955     case NAT64_TCP_STATE_TRANS:
956       {
957         if (!(tcp->flags & TCP_FLAG_RST))
958           ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
959         return;
960       }
961     default:
962       return;
963     }
964 }
965
966 int
967 nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add)
968 {
969   nat64_main_t *nm = &nat64_main;
970   nat64_prefix_t *p = 0;
971   int i;
972
973   /* Verify prefix length */
974   if (plen != 32 && plen != 40 && plen != 48 && plen != 56 && plen != 64
975       && plen != 96)
976     return VNET_API_ERROR_INVALID_VALUE;
977
978   /* Check if tenant already have prefix */
979   for (i = 0; i < vec_len (nm->pref64); i++)
980     {
981       if (nm->pref64[i].vrf_id == vrf_id)
982         {
983           p = nm->pref64 + i;
984           break;
985         }
986     }
987
988   if (is_add)
989     {
990       if (!p)
991         {
992           vec_add2 (nm->pref64, p, 1);
993           p->fib_index =
994             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
995                                                FIB_SOURCE_PLUGIN_HI);
996           p->vrf_id = vrf_id;
997         }
998
999       p->prefix.as_u64[0] = prefix->as_u64[0];
1000       p->prefix.as_u64[1] = prefix->as_u64[1];
1001       p->plen = plen;
1002     }
1003   else
1004     {
1005       if (!p)
1006         return VNET_API_ERROR_NO_SUCH_ENTRY;
1007
1008       vec_del1 (nm->pref64, i);
1009     }
1010
1011   return 0;
1012 }
1013
1014 void
1015 nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx)
1016 {
1017   nat64_main_t *nm = &nat64_main;
1018   nat64_prefix_t *p = 0;
1019
1020   /* *INDENT-OFF* */
1021   vec_foreach (p, nm->pref64)
1022     {
1023       if (fn (p, ctx))
1024         break;
1025     };
1026   /* *INDENT-ON* */
1027 }
1028
1029 void
1030 nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
1031 {
1032   nat64_main_t *nm = &nat64_main;
1033   nat64_prefix_t *p, *gp = 0, *prefix = 0;
1034
1035   /* *INDENT-OFF* */
1036   vec_foreach (p, nm->pref64)
1037     {
1038       if (p->fib_index == fib_index)
1039         {
1040           prefix = p;
1041           break;
1042         }
1043
1044       if (p->fib_index == 0)
1045         gp = p;
1046     };
1047   /* *INDENT-ON* */
1048
1049   if (!prefix)
1050     prefix = gp;
1051
1052   if (prefix)
1053     {
1054       clib_memcpy_fast (ip6, &p->prefix, sizeof (ip6_address_t));
1055       switch (p->plen)
1056         {
1057         case 32:
1058           ip6->as_u32[1] = ip4->as_u32;
1059           break;
1060         case 40:
1061           ip6->as_u8[5] = ip4->as_u8[0];
1062           ip6->as_u8[6] = ip4->as_u8[1];
1063           ip6->as_u8[7] = ip4->as_u8[2];
1064           ip6->as_u8[9] = ip4->as_u8[3];
1065           break;
1066         case 48:
1067           ip6->as_u8[6] = ip4->as_u8[0];
1068           ip6->as_u8[7] = ip4->as_u8[1];
1069           ip6->as_u8[9] = ip4->as_u8[2];
1070           ip6->as_u8[10] = ip4->as_u8[3];
1071           break;
1072         case 56:
1073           ip6->as_u8[7] = ip4->as_u8[0];
1074           ip6->as_u8[9] = ip4->as_u8[1];
1075           ip6->as_u8[10] = ip4->as_u8[2];
1076           ip6->as_u8[11] = ip4->as_u8[3];
1077           break;
1078         case 64:
1079           ip6->as_u8[9] = ip4->as_u8[0];
1080           ip6->as_u8[10] = ip4->as_u8[1];
1081           ip6->as_u8[11] = ip4->as_u8[2];
1082           ip6->as_u8[12] = ip4->as_u8[3];
1083           break;
1084         case 96:
1085           ip6->as_u32[3] = ip4->as_u32;
1086           break;
1087         default:
1088           nat_log_notice ("invalid prefix length");
1089           break;
1090         }
1091     }
1092   else
1093     {
1094       clib_memcpy_fast (ip6, well_known_prefix, sizeof (ip6_address_t));
1095       ip6->as_u32[3] = ip4->as_u32;
1096     }
1097 }
1098
1099 void
1100 nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
1101 {
1102   nat64_main_t *nm = &nat64_main;
1103   nat64_prefix_t *p, *gp = 0;
1104   u8 plen = 0;
1105
1106   /* *INDENT-OFF* */
1107   vec_foreach (p, nm->pref64)
1108     {
1109       if (p->fib_index == fib_index)
1110         {
1111           plen = p->plen;
1112           break;
1113         }
1114
1115       if (p->vrf_id == 0)
1116         gp = p;
1117     };
1118   /* *INDENT-ON* */
1119
1120   if (!plen)
1121     {
1122       if (gp)
1123         plen = gp->plen;
1124       else
1125         plen = 96;
1126     }
1127
1128   switch (plen)
1129     {
1130     case 32:
1131       ip4->as_u32 = ip6->as_u32[1];
1132       break;
1133     case 40:
1134       ip4->as_u8[0] = ip6->as_u8[5];
1135       ip4->as_u8[1] = ip6->as_u8[6];
1136       ip4->as_u8[2] = ip6->as_u8[7];
1137       ip4->as_u8[3] = ip6->as_u8[9];
1138       break;
1139     case 48:
1140       ip4->as_u8[0] = ip6->as_u8[6];
1141       ip4->as_u8[1] = ip6->as_u8[7];
1142       ip4->as_u8[2] = ip6->as_u8[9];
1143       ip4->as_u8[3] = ip6->as_u8[10];
1144       break;
1145     case 56:
1146       ip4->as_u8[0] = ip6->as_u8[7];
1147       ip4->as_u8[1] = ip6->as_u8[9];
1148       ip4->as_u8[2] = ip6->as_u8[10];
1149       ip4->as_u8[3] = ip6->as_u8[11];
1150       break;
1151     case 64:
1152       ip4->as_u8[0] = ip6->as_u8[9];
1153       ip4->as_u8[1] = ip6->as_u8[10];
1154       ip4->as_u8[2] = ip6->as_u8[11];
1155       ip4->as_u8[3] = ip6->as_u8[12];
1156       break;
1157     case 96:
1158       ip4->as_u32 = ip6->as_u32[3];
1159       break;
1160     default:
1161       nat_log_notice ("invalid prefix length");
1162       break;
1163     }
1164 }
1165
1166 /**
1167  * @brief Per worker process checking expire time for NAT64 sessions.
1168  */
1169 static uword
1170 nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
1171                              vlib_frame_t * f)
1172 {
1173   nat64_main_t *nm = &nat64_main;
1174   u32 thread_index = vm->thread_index;
1175   nat64_db_t *db = &nm->db[thread_index];
1176   u32 now = (u32) vlib_time_now (vm);
1177
1178   nad64_db_st_free_expired (db, now);
1179   vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
1180                            db->bib.bib_entries_num);
1181   vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
1182                            db->st.st_entries_num);
1183
1184   return 0;
1185 }
1186
1187 static vlib_node_registration_t nat64_expire_worker_walk_node;
1188
1189 /* *INDENT-OFF* */
1190 VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = {
1191     .function = nat64_expire_worker_walk_fn,
1192     .type = VLIB_NODE_TYPE_INPUT,
1193     .state = VLIB_NODE_STATE_INTERRUPT,
1194     .name = "nat64-expire-worker-walk",
1195 };
1196 /* *INDENT-ON* */
1197
1198 static vlib_node_registration_t nat64_expire_walk_node;
1199
1200 /**
1201  * @brief Centralized process to drive per worker expire walk.
1202  */
1203 static uword
1204 nat64_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
1205                       vlib_frame_t * f)
1206 {
1207   nat64_main_t *nm = &nat64_main;
1208   vlib_main_t **worker_vms = 0, *worker_vm;
1209   int i;
1210   uword event_type, *event_data = 0;
1211
1212   nm->nat64_expire_walk_node_index = nat64_expire_walk_node.index;
1213
1214   if (vec_len (vlib_mains) == 0)
1215     vec_add1 (worker_vms, vm);
1216   else
1217     {
1218       for (i = 0; i < vec_len (vlib_mains); i++)
1219         {
1220           worker_vm = vlib_mains[i];
1221           if (worker_vm)
1222             vec_add1 (worker_vms, worker_vm);
1223         }
1224     }
1225
1226   while (1)
1227     {
1228       if (nm->total_enabled_count)
1229         {
1230           vlib_process_wait_for_event_or_clock (vm, 10.0);
1231           event_type = vlib_process_get_events (vm, &event_data);
1232         }
1233       else
1234         {
1235           vlib_process_wait_for_event (vm);
1236           event_type = vlib_process_get_events (vm, &event_data);
1237         }
1238
1239       switch (event_type)
1240         {
1241         case ~0:
1242           break;
1243         case NAT64_CLEANER_RESCHEDULE:
1244           break;
1245         default:
1246           nat_log_notice ("unknown event %u", event_type);
1247           break;
1248         }
1249
1250       for (i = 0; i < vec_len (worker_vms); i++)
1251         {
1252           worker_vm = worker_vms[i];
1253           vlib_node_set_interrupt_pending (worker_vm,
1254                                            nat64_expire_worker_walk_node.index);
1255         }
1256     }
1257
1258   return 0;
1259 }
1260
1261 /* *INDENT-OFF* */
1262 VLIB_REGISTER_NODE (nat64_expire_walk_node, static) = {
1263     .function = nat64_expire_walk_fn,
1264     .type = VLIB_NODE_TYPE_PROCESS,
1265     .name = "nat64-expire-walk",
1266 };
1267 /* *INDENT-ON* */
1268
1269 /*
1270  * fd.io coding-style-patch-verification: ON
1271  *
1272  * Local Variables:
1273  * eval: (c-set-style "gnu")
1274  * End:
1275  */