nat: use SVR
[vpp.git] / src / plugins / nat / nat64.c
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief NAT64 implementation
18  */
19
20 #include <nat/nat64.h>
21 #include <nat/nat64_db.h>
22 #include <nat/nat_inlines.h>
23 #include <vnet/fib/ip4_fib.h>
24 #include <vppinfra/crc32.h>
25 #include <vnet/ip/reass/ip4_sv_reass.h>
26 #include <vnet/ip/reass/ip6_sv_reass.h>
27
28
29 nat64_main_t nat64_main;
30
31 /* *INDENT-OFF* */
32
33 /* Hook up input features */
34 VNET_FEATURE_INIT (nat64_in2out, static) = {
35   .arc_name = "ip6-unicast",
36   .node_name = "nat64-in2out",
37   .runs_before = VNET_FEATURES ("ip6-lookup"),
38   .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
39 };
40 VNET_FEATURE_INIT (nat64_out2in, static) = {
41   .arc_name = "ip4-unicast",
42   .node_name = "nat64-out2in",
43   .runs_before = VNET_FEATURES ("ip4-lookup"),
44   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
45 };
46 VNET_FEATURE_INIT (nat64_in2out_handoff, static) = {
47   .arc_name = "ip6-unicast",
48   .node_name = "nat64-in2out-handoff",
49   .runs_before = VNET_FEATURES ("ip6-lookup"),
50   .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"),
51 };
52 VNET_FEATURE_INIT (nat64_out2in_handoff, static) = {
53   .arc_name = "ip4-unicast",
54   .node_name = "nat64-out2in-handoff",
55   .runs_before = VNET_FEATURES ("ip4-lookup"),
56   .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
57 };
58
59
60 static u8 well_known_prefix[] = {
61   0x00, 0x64, 0xff, 0x9b,
62   0x00, 0x00, 0x00, 0x00,
63   0x00, 0x00, 0x00, 0x00,
64   0x00, 0x00, 0x00, 0x00
65 };
66
67 /* *INDENT-ON* */
68
69 static void
70 nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque,
71                                         u32 sw_if_index,
72                                         ip4_address_t * address,
73                                         u32 address_length,
74                                         u32 if_address_index, u32 is_delete)
75 {
76   nat64_main_t *nm = &nat64_main;
77   int i, j;
78
79   for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
80     {
81       if (sw_if_index == nm->auto_add_sw_if_indices[i])
82         {
83           if (!is_delete)
84             {
85               /* Don't trip over lease renewal, static config */
86               for (j = 0; j < vec_len (nm->addr_pool); j++)
87                 if (nm->addr_pool[j].addr.as_u32 == address->as_u32)
88                   return;
89
90               (void) nat64_add_del_pool_addr (vlib_get_thread_index (),
91                                               address, ~0, 1);
92               return;
93             }
94           else
95             {
96               (void) nat64_add_del_pool_addr (vlib_get_thread_index (),
97                                               address, ~0, 0);
98               return;
99             }
100         }
101     }
102 }
103
104 u32
105 nat64_get_worker_in2out (ip6_address_t * addr)
106 {
107   nat64_main_t *nm = &nat64_main;
108   snat_main_t *sm = nm->sm;
109   u32 next_worker_index = nm->sm->first_worker_index;
110   u32 hash;
111
112 #ifdef clib_crc32c_uses_intrinsics
113   hash = clib_crc32c ((u8 *) addr->as_u32, 16);
114 #else
115   u64 tmp = addr->as_u64[0] ^ addr->as_u64[1];
116   hash = clib_xxhash (tmp);
117 #endif
118
119   if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
120     next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
121   else
122     next_worker_index += sm->workers[hash % _vec_len (sm->workers)];
123
124   return next_worker_index;
125 }
126
127 u32
128 nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
129 {
130   nat64_main_t *nm = &nat64_main;
131   snat_main_t *sm = nm->sm;
132   udp_header_t *udp;
133   u16 port;
134   u32 proto;
135
136   proto = ip_proto_to_snat_proto (ip->protocol);
137   udp = ip4_next_header (ip);
138   port = udp->dst_port;
139
140   /* unknown protocol */
141   if (PREDICT_FALSE (proto == ~0))
142     {
143       nat64_db_t *db;
144       ip46_address_t daddr;
145       nat64_db_bib_entry_t *bibe;
146
147       clib_memset (&daddr, 0, sizeof (daddr));
148       daddr.ip4.as_u32 = ip->dst_address.as_u32;
149
150       /* *INDENT-OFF* */
151       vec_foreach (db, nm->db)
152         {
153           bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0);
154           if (bibe)
155             return (u32) (db - nm->db);
156         }
157       /* *INDENT-ON* */
158       return vlib_get_thread_index ();
159     }
160
161   /* ICMP */
162   if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
163     {
164       icmp46_header_t *icmp = (icmp46_header_t *) udp;
165       icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
166       if (!icmp_type_is_error_message
167           (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
168         port = vnet_buffer (b)->ip.reass.l4_src_port;
169       else
170         {
171           /* if error message, then it's not fragmented and we can access it */
172           ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
173           proto = ip_proto_to_snat_proto (inner_ip->protocol);
174           void *l4_header = ip4_next_header (inner_ip);
175           switch (proto)
176             {
177             case SNAT_PROTOCOL_ICMP:
178               icmp = (icmp46_header_t *) l4_header;
179               echo = (icmp_echo_header_t *) (icmp + 1);
180               port = echo->identifier;
181               break;
182             case SNAT_PROTOCOL_UDP:
183             case SNAT_PROTOCOL_TCP:
184               port = ((tcp_udp_header_t *) l4_header)->src_port;
185               break;
186             default:
187               return vlib_get_thread_index ();
188             }
189         }
190     }
191
192   /* worker by outside port  (TCP/UDP) */
193   port = clib_net_to_host_u16 (port);
194   if (port > 1024)
195     return nm->sm->first_worker_index + ((port - 1024) / sm->port_per_thread);
196
197   return vlib_get_thread_index ();
198 }
199
200 clib_error_t *
201 nat64_init (vlib_main_t * vm)
202 {
203   nat64_main_t *nm = &nat64_main;
204   vlib_thread_main_t *tm = vlib_get_thread_main ();
205   ip4_add_del_interface_address_callback_t cb4;
206   ip4_main_t *im = &ip4_main;
207   nm->sm = &snat_main;
208   vlib_node_t *node;
209
210   vec_validate (nm->db, tm->n_vlib_mains - 1);
211
212   nm->fq_in2out_index = ~0;
213   nm->fq_out2in_index = ~0;
214
215   node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
216   nm->error_node_index = node->index;
217
218   node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out");
219   nm->in2out_node_index = node->index;
220
221   node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-slowpath");
222   nm->in2out_slowpath_node_index = node->index;
223
224   node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in");
225   nm->out2in_node_index = node->index;
226
227   /* set session timeouts to default values */
228   nm->udp_timeout = SNAT_UDP_TIMEOUT;
229   nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
230   nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
231   nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
232
233   nm->total_enabled_count = 0;
234
235   /* Set up the interface address add/del callback */
236   cb4.function = nat64_ip4_add_del_interface_address_cb;
237   cb4.function_opaque = 0;
238   vec_add1 (im->add_del_interface_address_callbacks, cb4);
239   nm->ip4_main = im;
240
241   /* Init counters */
242   nm->total_bibs.name = "total-bibs";
243   nm->total_bibs.stat_segment_name = "/nat64/total-bibs";
244   vlib_validate_simple_counter (&nm->total_bibs, 0);
245   vlib_zero_simple_counter (&nm->total_bibs, 0);
246   nm->total_sessions.name = "total-sessions";
247   nm->total_sessions.stat_segment_name = "/nat64/total-sessions";
248   vlib_validate_simple_counter (&nm->total_sessions, 0);
249   vlib_zero_simple_counter (&nm->total_sessions, 0);
250
251   return 0;
252 }
253
254 static void nat64_free_out_addr_and_port (struct nat64_db_s *db,
255                                           ip4_address_t * addr, u16 port,
256                                           u8 protocol);
257
258 void
259 nat64_set_hash (u32 bib_buckets, u32 bib_memory_size, u32 st_buckets,
260                 u32 st_memory_size)
261 {
262   nat64_main_t *nm = &nat64_main;
263   nat64_db_t *db;
264
265   nm->bib_buckets = bib_buckets;
266   nm->bib_memory_size = bib_memory_size;
267   nm->st_buckets = st_buckets;
268   nm->st_memory_size = st_memory_size;
269
270   /* *INDENT-OFF* */
271   vec_foreach (db, nm->db)
272     {
273       if (nat64_db_init (db, bib_buckets, bib_memory_size, st_buckets,
274                          st_memory_size, nat64_free_out_addr_and_port))
275         nat_elog_err ("NAT64 DB init failed");
276     }
277   /* *INDENT-ON* */
278 }
279
280 int
281 nat64_add_del_pool_addr (u32 thread_index,
282                          ip4_address_t * addr, u32 vrf_id, u8 is_add)
283 {
284   nat64_main_t *nm = &nat64_main;
285   snat_address_t *a = 0;
286   snat_interface_t *interface;
287   int i;
288   nat64_db_t *db;
289   vlib_thread_main_t *tm = vlib_get_thread_main ();
290
291   /* Check if address already exists */
292   for (i = 0; i < vec_len (nm->addr_pool); i++)
293     {
294       if (nm->addr_pool[i].addr.as_u32 == addr->as_u32)
295         {
296           a = nm->addr_pool + i;
297           break;
298         }
299     }
300
301   if (is_add)
302     {
303       if (a)
304         return VNET_API_ERROR_VALUE_EXIST;
305
306       vec_add2 (nm->addr_pool, a, 1);
307       a->addr = *addr;
308       a->fib_index = ~0;
309       if (vrf_id != ~0)
310         a->fib_index =
311           fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
312                                              nat_fib_src_hi);
313 #define _(N, id, n, s) \
314       clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); \
315       a->busy_##n##_ports = 0; \
316       vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
317       foreach_snat_protocol
318 #undef _
319     }
320   else
321     {
322       if (!a)
323         return VNET_API_ERROR_NO_SUCH_ENTRY;
324
325       if (a->fib_index != ~0)
326         fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nat_fib_src_hi);
327       /* Delete sessions using address */
328         /* *INDENT-OFF* */
329         vec_foreach (db, nm->db)
330           {
331             nat64_db_free_out_addr (thread_index, db, &a->addr);
332             vlib_set_simple_counter (&nm->total_bibs, db - nm->db, 0,
333                                      db->bib.bib_entries_num);
334             vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0,
335                                      db->st.st_entries_num);
336           }
337 #define _(N, id, n, s) \
338       clib_bitmap_free (a->busy_##n##_port_bitmap);
339       foreach_snat_protocol
340 #undef _
341         /* *INDENT-ON* */
342       vec_del1 (nm->addr_pool, i);
343     }
344
345   /* Add/del external address to FIB */
346   /* *INDENT-OFF* */
347   pool_foreach (interface, nm->interfaces,
348   ({
349     if (nat_interface_is_inside(interface))
350       continue;
351
352     snat_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add);
353     break;
354   }));
355   /* *INDENT-ON* */
356
357   return 0;
358 }
359
360 void
361 nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx)
362 {
363   nat64_main_t *nm = &nat64_main;
364   snat_address_t *a = 0;
365
366   /* *INDENT-OFF* */
367   vec_foreach (a, nm->addr_pool)
368     {
369       if (fn (a, ctx))
370         break;
371     };
372   /* *INDENT-ON* */
373 }
374
375 int
376 nat64_add_interface_address (u32 sw_if_index, int is_add)
377 {
378   nat64_main_t *nm = &nat64_main;
379   ip4_main_t *ip4_main = nm->ip4_main;
380   ip4_address_t *first_int_addr;
381   int i;
382
383   first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
384
385   for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
386     {
387       if (nm->auto_add_sw_if_indices[i] == sw_if_index)
388         {
389           if (is_add)
390             return VNET_API_ERROR_VALUE_EXIST;
391           else
392             {
393               /* if have address remove it */
394               if (first_int_addr)
395                 (void) nat64_add_del_pool_addr (vlib_get_thread_index (),
396                                                 first_int_addr, ~0, 0);
397               vec_del1 (nm->auto_add_sw_if_indices, i);
398               return 0;
399             }
400         }
401     }
402
403   if (!is_add)
404     return VNET_API_ERROR_NO_SUCH_ENTRY;
405
406   /* add to the auto-address list */
407   vec_add1 (nm->auto_add_sw_if_indices, sw_if_index);
408
409   /* If the address is already bound - or static - add it now */
410   if (first_int_addr)
411     (void) nat64_add_del_pool_addr (vlib_get_thread_index (),
412                                     first_int_addr, ~0, 1);
413
414   return 0;
415 }
416
417 int
418 nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add)
419 {
420   nat64_main_t *nm = &nat64_main;
421   snat_interface_t *interface = 0, *i;
422   snat_address_t *ap;
423   const char *feature_name, *arc_name;
424
425   /* Check if interface already exists */
426   /* *INDENT-OFF* */
427   pool_foreach (i, nm->interfaces,
428   ({
429     if (i->sw_if_index == sw_if_index)
430       {
431         interface = i;
432         break;
433       }
434   }));
435   /* *INDENT-ON* */
436
437   if (is_add)
438     {
439       if (interface)
440         goto set_flags;
441
442       pool_get (nm->interfaces, interface);
443       interface->sw_if_index = sw_if_index;
444       interface->flags = 0;
445     set_flags:
446       if (is_inside)
447         interface->flags |= NAT_INTERFACE_FLAG_IS_INSIDE;
448       else
449         interface->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE;
450
451       nm->total_enabled_count++;
452       vlib_process_signal_event (nm->sm->vlib_main,
453                                  nm->nat64_expire_walk_node_index,
454                                  NAT64_CLEANER_RESCHEDULE, 0);
455
456     }
457   else
458     {
459       if (!interface)
460         return VNET_API_ERROR_NO_SUCH_ENTRY;
461
462       if ((nat_interface_is_inside (interface)
463            && nat_interface_is_outside (interface)))
464         interface->flags &=
465           is_inside ? ~NAT_INTERFACE_FLAG_IS_INSIDE :
466           ~NAT_INTERFACE_FLAG_IS_OUTSIDE;
467       else
468         pool_put (nm->interfaces, interface);
469
470       nm->total_enabled_count--;
471     }
472
473   if (!is_inside)
474     {
475       /* *INDENT-OFF* */
476       vec_foreach (ap, nm->addr_pool)
477         snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, is_add);
478       /* *INDENT-ON* */
479     }
480
481   if (nm->sm->num_workers > 1)
482     {
483       feature_name =
484         is_inside ? "nat64-in2out-handoff" : "nat64-out2in-handoff";
485       if (nm->fq_in2out_index == ~0)
486         nm->fq_in2out_index =
487           vlib_frame_queue_main_init (nat64_in2out_node.index, 0);
488       if (nm->fq_out2in_index == ~0)
489         nm->fq_out2in_index =
490           vlib_frame_queue_main_init (nat64_out2in_node.index, 0);
491     }
492   else
493     feature_name = is_inside ? "nat64-in2out" : "nat64-out2in";
494
495   arc_name = is_inside ? "ip6-unicast" : "ip4-unicast";
496
497   if (is_inside)
498     {
499       int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add);
500       if (rv)
501         return rv;
502     }
503   else
504     {
505       int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add);
506       if (rv)
507         return rv;
508     }
509
510   return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index,
511                                       is_add, 0, 0);
512 }
513
514 void
515 nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx)
516 {
517   nat64_main_t *nm = &nat64_main;
518   snat_interface_t *i = 0;
519
520   /* *INDENT-OFF* */
521   pool_foreach (i, nm->interfaces,
522   ({
523     if (fn (i, ctx))
524       break;
525   }));
526   /* *INDENT-ON* */
527 }
528
529 int
530 nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto,
531                                ip4_address_t * addr, u16 * port,
532                                u32 thread_index)
533 {
534   nat64_main_t *nm = &nat64_main;
535   snat_main_t *sm = nm->sm;
536   snat_session_key_t k;
537   u32 worker_index = 0;
538   int rv;
539
540   k.protocol = proto;
541
542   if (sm->num_workers > 1)
543     worker_index = thread_index - sm->first_worker_index;
544
545   rv =
546     sm->alloc_addr_and_port (nm->addr_pool, fib_index, thread_index, &k,
547                              sm->port_per_thread, worker_index);
548
549   if (!rv)
550     {
551       *port = k.port;
552       addr->as_u32 = k.addr.as_u32;
553     }
554
555   return rv;
556 }
557
558 static void
559 nat64_free_out_addr_and_port (struct nat64_db_s *db, ip4_address_t * addr,
560                               u16 port, u8 protocol)
561 {
562   nat64_main_t *nm = &nat64_main;
563   int i;
564   snat_address_t *a;
565   u32 thread_index = db - nm->db;
566   snat_protocol_t proto = ip_proto_to_snat_proto (protocol);
567   u16 port_host_byte_order = clib_net_to_host_u16 (port);
568
569   for (i = 0; i < vec_len (nm->addr_pool); i++)
570     {
571       a = nm->addr_pool + i;
572       if (addr->as_u32 != a->addr.as_u32)
573         continue;
574       switch (proto)
575         {
576 #define _(N, j, n, s) \
577         case SNAT_PROTOCOL_##N: \
578           ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
579                   port_host_byte_order) == 1); \
580           clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, port_host_byte_order, 0); \
581           a->busy_##n##_ports--; \
582           a->busy_##n##_ports_per_thread[thread_index]--; \
583           break;
584           foreach_snat_protocol
585 #undef _
586         default:
587           nat_elog_notice ("unknown protocol");
588           return;
589         }
590       break;
591     }
592 }
593
594 /**
595  * @brief Add/delete static BIB entry in worker thread.
596  */
597 static uword
598 nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
599                             vlib_frame_t * f)
600 {
601   nat64_main_t *nm = &nat64_main;
602   u32 thread_index = vm->thread_index;
603   nat64_db_t *db = &nm->db[thread_index];
604   nat64_static_bib_to_update_t *static_bib;
605   nat64_db_bib_entry_t *bibe;
606   ip46_address_t addr;
607
608   /* *INDENT-OFF* */
609   pool_foreach (static_bib, nm->static_bibs,
610   ({
611     if ((static_bib->thread_index != thread_index) || (static_bib->done))
612       continue;
613
614     if (static_bib->is_add)
615       {
616           (void) nat64_db_bib_entry_create (thread_index, db,
617                                             &static_bib->in_addr,
618                                             &static_bib->out_addr,
619                                             static_bib->in_port,
620                                             static_bib->out_port,
621                                             static_bib->fib_index,
622                                             static_bib->proto, 1);
623           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
624                                    db->bib.bib_entries_num);
625       }
626     else
627       {
628         addr.as_u64[0] = static_bib->in_addr.as_u64[0];
629         addr.as_u64[1] = static_bib->in_addr.as_u64[1];
630         bibe = nat64_db_bib_entry_find (db, &addr, static_bib->in_port,
631                                         static_bib->proto,
632                                         static_bib->fib_index, 1);
633         if (bibe)
634           {
635             nat64_db_bib_entry_free (thread_index, db, bibe);
636             vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
637                                      db->bib.bib_entries_num);
638             vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
639                                      db->st.st_entries_num);
640           }
641       }
642
643       static_bib->done = 1;
644   }));
645   /* *INDENT-ON* */
646
647   return 0;
648 }
649
650 static vlib_node_registration_t nat64_static_bib_worker_node;
651
652 /* *INDENT-OFF* */
653 VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = {
654     .function = nat64_static_bib_worker_fn,
655     .type = VLIB_NODE_TYPE_INPUT,
656     .state = VLIB_NODE_STATE_INTERRUPT,
657     .name = "nat64-static-bib-worker",
658 };
659 /* *INDENT-ON* */
660
661 int
662 nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
663                                 ip4_address_t * out_addr, u16 in_port,
664                                 u16 out_port, u8 proto, u32 vrf_id, u8 is_add)
665 {
666   nat64_main_t *nm = &nat64_main;
667   nat64_db_bib_entry_t *bibe;
668   u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
669                                                      nat_fib_src_hi);
670   snat_protocol_t p = ip_proto_to_snat_proto (proto);
671   ip46_address_t addr;
672   int i;
673   snat_address_t *a;
674   u32 thread_index = 0;
675   nat64_db_t *db;
676   nat64_static_bib_to_update_t *static_bib;
677   vlib_main_t *worker_vm;
678   u32 *to_be_free = 0, *index;
679
680   if (nm->sm->num_workers > 1)
681     {
682       thread_index = nat64_get_worker_in2out (in_addr);
683       db = &nm->db[thread_index];
684     }
685   else
686     db = &nm->db[nm->sm->num_workers];
687
688   addr.as_u64[0] = in_addr->as_u64[0];
689   addr.as_u64[1] = in_addr->as_u64[1];
690   bibe =
691     nat64_db_bib_entry_find (db, &addr, clib_host_to_net_u16 (in_port),
692                              proto, fib_index, 1);
693
694   if (is_add)
695     {
696       if (bibe)
697         return VNET_API_ERROR_VALUE_EXIST;
698
699       /* outside port must be assigned to same thread as internall address */
700       if ((out_port > 1024) && (nm->sm->num_workers > 1))
701         {
702           if (thread_index != ((out_port - 1024) / nm->sm->port_per_thread))
703             return VNET_API_ERROR_INVALID_VALUE_2;
704         }
705
706       for (i = 0; i < vec_len (nm->addr_pool); i++)
707         {
708           a = nm->addr_pool + i;
709           if (out_addr->as_u32 != a->addr.as_u32)
710             continue;
711           switch (p)
712             {
713 #define _(N, j, n, s) \
714             case SNAT_PROTOCOL_##N: \
715               if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \
716                                             out_port)) \
717                 return VNET_API_ERROR_INVALID_VALUE; \
718               clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \
719                                         out_port, 1); \
720               if (out_port > 1024) \
721                 { \
722                   a->busy_##n##_ports++; \
723                   a->busy_##n##_ports_per_thread[thread_index]++; \
724                 } \
725               break;
726               foreach_snat_protocol
727 #undef _
728             default:
729               clib_memset (&addr, 0, sizeof (addr));
730               addr.ip4.as_u32 = out_addr->as_u32;
731               if (nat64_db_bib_entry_find (db, &addr, 0, proto, fib_index, 0))
732                 return VNET_API_ERROR_INVALID_VALUE;
733             }
734           break;
735         }
736       if (!nm->sm->num_workers)
737         {
738           bibe =
739             nat64_db_bib_entry_create (thread_index, db, in_addr, out_addr,
740                                        clib_host_to_net_u16 (in_port),
741                                        clib_host_to_net_u16 (out_port),
742                                        fib_index, proto, 1);
743           if (!bibe)
744             return VNET_API_ERROR_UNSPECIFIED;
745
746           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
747                                    db->bib.bib_entries_num);
748         }
749     }
750   else
751     {
752       if (!bibe)
753         return VNET_API_ERROR_NO_SUCH_ENTRY;
754
755       if (!nm->sm->num_workers)
756         {
757           nat64_db_bib_entry_free (thread_index, db, bibe);
758           vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
759                                    db->bib.bib_entries_num);
760         }
761     }
762
763   if (nm->sm->num_workers)
764     {
765       /* *INDENT-OFF* */
766       pool_foreach (static_bib, nm->static_bibs,
767       ({
768         if (static_bib->done)
769           vec_add1 (to_be_free, static_bib - nm->static_bibs);
770       }));
771       vec_foreach (index, to_be_free)
772         pool_put_index (nm->static_bibs, index[0]);
773       /* *INDENT-ON* */
774       vec_free (to_be_free);
775       pool_get (nm->static_bibs, static_bib);
776       static_bib->in_addr.as_u64[0] = in_addr->as_u64[0];
777       static_bib->in_addr.as_u64[1] = in_addr->as_u64[1];
778       static_bib->in_port = clib_host_to_net_u16 (in_port);
779       static_bib->out_addr.as_u32 = out_addr->as_u32;
780       static_bib->out_port = clib_host_to_net_u16 (out_port);
781       static_bib->fib_index = fib_index;
782       static_bib->proto = proto;
783       static_bib->is_add = is_add;
784       static_bib->thread_index = thread_index;
785       static_bib->done = 0;
786       worker_vm = vlib_mains[thread_index];
787       if (worker_vm)
788         vlib_node_set_interrupt_pending (worker_vm,
789                                          nat64_static_bib_worker_node.index);
790       else
791         return VNET_API_ERROR_UNSPECIFIED;
792     }
793
794   return 0;
795 }
796
797 int
798 nat64_set_udp_timeout (u32 timeout)
799 {
800   nat64_main_t *nm = &nat64_main;
801
802   if (timeout == 0)
803     nm->udp_timeout = SNAT_UDP_TIMEOUT;
804   else
805     nm->udp_timeout = timeout;
806
807   return 0;
808 }
809
810 u32
811 nat64_get_udp_timeout (void)
812 {
813   nat64_main_t *nm = &nat64_main;
814
815   return nm->udp_timeout;
816 }
817
818 int
819 nat64_set_icmp_timeout (u32 timeout)
820 {
821   nat64_main_t *nm = &nat64_main;
822
823   if (timeout == 0)
824     nm->icmp_timeout = SNAT_ICMP_TIMEOUT;
825   else
826     nm->icmp_timeout = timeout;
827
828   return 0;
829 }
830
831 u32
832 nat64_get_icmp_timeout (void)
833 {
834   nat64_main_t *nm = &nat64_main;
835
836   return nm->icmp_timeout;
837 }
838
839 int
840 nat64_set_tcp_timeouts (u32 trans, u32 est)
841 {
842   nat64_main_t *nm = &nat64_main;
843
844   if (trans == 0)
845     nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT;
846   else
847     nm->tcp_trans_timeout = trans;
848
849   if (est == 0)
850     nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT;
851   else
852     nm->tcp_est_timeout = est;
853
854   return 0;
855 }
856
857 u32
858 nat64_get_tcp_trans_timeout (void)
859 {
860   nat64_main_t *nm = &nat64_main;
861
862   return nm->tcp_trans_timeout;
863 }
864
865 u32
866 nat64_get_tcp_est_timeout (void)
867 {
868   nat64_main_t *nm = &nat64_main;
869
870   return nm->tcp_est_timeout;
871 }
872
873 void
874 nat64_session_reset_timeout (nat64_db_st_entry_t * ste, vlib_main_t * vm)
875 {
876   nat64_main_t *nm = &nat64_main;
877   u32 now = (u32) vlib_time_now (vm);
878
879   switch (ip_proto_to_snat_proto (ste->proto))
880     {
881     case SNAT_PROTOCOL_ICMP:
882       ste->expire = now + nm->icmp_timeout;
883       return;
884     case SNAT_PROTOCOL_TCP:
885       {
886         switch (ste->tcp_state)
887           {
888           case NAT64_TCP_STATE_V4_INIT:
889           case NAT64_TCP_STATE_V6_INIT:
890           case NAT64_TCP_STATE_V4_FIN_RCV:
891           case NAT64_TCP_STATE_V6_FIN_RCV:
892           case NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV:
893           case NAT64_TCP_STATE_TRANS:
894             ste->expire = now + nm->tcp_trans_timeout;
895             return;
896           case NAT64_TCP_STATE_ESTABLISHED:
897             ste->expire = now + nm->tcp_est_timeout;
898             return;
899           default:
900             return;
901           }
902       }
903     case SNAT_PROTOCOL_UDP:
904       ste->expire = now + nm->udp_timeout;
905       return;
906     default:
907       ste->expire = now + nm->udp_timeout;
908       return;
909     }
910 }
911
912 void
913 nat64_tcp_session_set_state (nat64_db_st_entry_t * ste, tcp_header_t * tcp,
914                              u8 is_ip6)
915 {
916   switch (ste->tcp_state)
917     {
918     case NAT64_TCP_STATE_CLOSED:
919       {
920         if (tcp->flags & TCP_FLAG_SYN)
921           {
922             if (is_ip6)
923               ste->tcp_state = NAT64_TCP_STATE_V6_INIT;
924             else
925               ste->tcp_state = NAT64_TCP_STATE_V4_INIT;
926           }
927         return;
928       }
929     case NAT64_TCP_STATE_V4_INIT:
930       {
931         if (is_ip6 && (tcp->flags & TCP_FLAG_SYN))
932           ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
933         return;
934       }
935     case NAT64_TCP_STATE_V6_INIT:
936       {
937         if (!is_ip6 && (tcp->flags & TCP_FLAG_SYN))
938           ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
939         return;
940       }
941     case NAT64_TCP_STATE_ESTABLISHED:
942       {
943         if (tcp->flags & TCP_FLAG_FIN)
944           {
945             if (is_ip6)
946               ste->tcp_state = NAT64_TCP_STATE_V6_FIN_RCV;
947             else
948               ste->tcp_state = NAT64_TCP_STATE_V4_FIN_RCV;
949           }
950         else if (tcp->flags & TCP_FLAG_RST)
951           {
952             ste->tcp_state = NAT64_TCP_STATE_TRANS;
953           }
954         return;
955       }
956     case NAT64_TCP_STATE_V4_FIN_RCV:
957       {
958         if (is_ip6 && (tcp->flags & TCP_FLAG_FIN))
959           ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
960         return;
961       }
962     case NAT64_TCP_STATE_V6_FIN_RCV:
963       {
964         if (!is_ip6 && (tcp->flags & TCP_FLAG_FIN))
965           ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV;
966         return;
967       }
968     case NAT64_TCP_STATE_TRANS:
969       {
970         if (!(tcp->flags & TCP_FLAG_RST))
971           ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED;
972         return;
973       }
974     default:
975       return;
976     }
977 }
978
979 int
980 nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add)
981 {
982   nat64_main_t *nm = &nat64_main;
983   nat64_prefix_t *p = 0;
984   int i;
985
986   /* Verify prefix length */
987   if (plen != 32 && plen != 40 && plen != 48 && plen != 56 && plen != 64
988       && plen != 96)
989     return VNET_API_ERROR_INVALID_VALUE;
990
991   /* Check if tenant already have prefix */
992   for (i = 0; i < vec_len (nm->pref64); i++)
993     {
994       if (nm->pref64[i].vrf_id == vrf_id)
995         {
996           p = nm->pref64 + i;
997           break;
998         }
999     }
1000
1001   if (is_add)
1002     {
1003       if (!p)
1004         {
1005           vec_add2 (nm->pref64, p, 1);
1006           p->fib_index =
1007             fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id,
1008                                                nat_fib_src_hi);
1009           p->vrf_id = vrf_id;
1010         }
1011
1012       p->prefix.as_u64[0] = prefix->as_u64[0];
1013       p->prefix.as_u64[1] = prefix->as_u64[1];
1014       p->plen = plen;
1015     }
1016   else
1017     {
1018       if (!p)
1019         return VNET_API_ERROR_NO_SUCH_ENTRY;
1020
1021       vec_del1 (nm->pref64, i);
1022     }
1023
1024   return 0;
1025 }
1026
1027 void
1028 nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx)
1029 {
1030   nat64_main_t *nm = &nat64_main;
1031   nat64_prefix_t *p = 0;
1032
1033   /* *INDENT-OFF* */
1034   vec_foreach (p, nm->pref64)
1035     {
1036       if (fn (p, ctx))
1037         break;
1038     };
1039   /* *INDENT-ON* */
1040 }
1041
1042 void
1043 nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
1044 {
1045   nat64_main_t *nm = &nat64_main;
1046   nat64_prefix_t *p, *gp = 0, *prefix = 0;
1047
1048   /* *INDENT-OFF* */
1049   vec_foreach (p, nm->pref64)
1050     {
1051       if (p->fib_index == fib_index)
1052         {
1053           prefix = p;
1054           break;
1055         }
1056
1057       if (p->fib_index == 0)
1058         gp = p;
1059     };
1060   /* *INDENT-ON* */
1061
1062   if (!prefix)
1063     prefix = gp;
1064
1065   if (prefix)
1066     {
1067       clib_memcpy_fast (ip6, &p->prefix, sizeof (ip6_address_t));
1068       switch (p->plen)
1069         {
1070         case 32:
1071           ip6->as_u32[1] = ip4->as_u32;
1072           break;
1073         case 40:
1074           ip6->as_u8[5] = ip4->as_u8[0];
1075           ip6->as_u8[6] = ip4->as_u8[1];
1076           ip6->as_u8[7] = ip4->as_u8[2];
1077           ip6->as_u8[9] = ip4->as_u8[3];
1078           break;
1079         case 48:
1080           ip6->as_u8[6] = ip4->as_u8[0];
1081           ip6->as_u8[7] = ip4->as_u8[1];
1082           ip6->as_u8[9] = ip4->as_u8[2];
1083           ip6->as_u8[10] = ip4->as_u8[3];
1084           break;
1085         case 56:
1086           ip6->as_u8[7] = ip4->as_u8[0];
1087           ip6->as_u8[9] = ip4->as_u8[1];
1088           ip6->as_u8[10] = ip4->as_u8[2];
1089           ip6->as_u8[11] = ip4->as_u8[3];
1090           break;
1091         case 64:
1092           ip6->as_u8[9] = ip4->as_u8[0];
1093           ip6->as_u8[10] = ip4->as_u8[1];
1094           ip6->as_u8[11] = ip4->as_u8[2];
1095           ip6->as_u8[12] = ip4->as_u8[3];
1096           break;
1097         case 96:
1098           ip6->as_u32[3] = ip4->as_u32;
1099           break;
1100         default:
1101           nat_elog_notice ("invalid prefix length");
1102           break;
1103         }
1104     }
1105   else
1106     {
1107       clib_memcpy_fast (ip6, well_known_prefix, sizeof (ip6_address_t));
1108       ip6->as_u32[3] = ip4->as_u32;
1109     }
1110 }
1111
1112 void
1113 nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
1114 {
1115   nat64_main_t *nm = &nat64_main;
1116   nat64_prefix_t *p, *gp = 0;
1117   u8 plen = 0;
1118
1119   /* *INDENT-OFF* */
1120   vec_foreach (p, nm->pref64)
1121     {
1122       if (p->fib_index == fib_index)
1123         {
1124           plen = p->plen;
1125           break;
1126         }
1127
1128       if (p->vrf_id == 0)
1129         gp = p;
1130     };
1131   /* *INDENT-ON* */
1132
1133   if (!plen)
1134     {
1135       if (gp)
1136         plen = gp->plen;
1137       else
1138         plen = 96;
1139     }
1140
1141   switch (plen)
1142     {
1143     case 32:
1144       ip4->as_u32 = ip6->as_u32[1];
1145       break;
1146     case 40:
1147       ip4->as_u8[0] = ip6->as_u8[5];
1148       ip4->as_u8[1] = ip6->as_u8[6];
1149       ip4->as_u8[2] = ip6->as_u8[7];
1150       ip4->as_u8[3] = ip6->as_u8[9];
1151       break;
1152     case 48:
1153       ip4->as_u8[0] = ip6->as_u8[6];
1154       ip4->as_u8[1] = ip6->as_u8[7];
1155       ip4->as_u8[2] = ip6->as_u8[9];
1156       ip4->as_u8[3] = ip6->as_u8[10];
1157       break;
1158     case 56:
1159       ip4->as_u8[0] = ip6->as_u8[7];
1160       ip4->as_u8[1] = ip6->as_u8[9];
1161       ip4->as_u8[2] = ip6->as_u8[10];
1162       ip4->as_u8[3] = ip6->as_u8[11];
1163       break;
1164     case 64:
1165       ip4->as_u8[0] = ip6->as_u8[9];
1166       ip4->as_u8[1] = ip6->as_u8[10];
1167       ip4->as_u8[2] = ip6->as_u8[11];
1168       ip4->as_u8[3] = ip6->as_u8[12];
1169       break;
1170     case 96:
1171       ip4->as_u32 = ip6->as_u32[3];
1172       break;
1173     default:
1174       nat_elog_notice ("invalid prefix length");
1175       break;
1176     }
1177 }
1178
1179 /**
1180  * @brief Per worker process checking expire time for NAT64 sessions.
1181  */
1182 static uword
1183 nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
1184                              vlib_frame_t * f)
1185 {
1186   nat64_main_t *nm = &nat64_main;
1187   u32 thread_index = vm->thread_index;
1188   nat64_db_t *db = &nm->db[thread_index];
1189   u32 now = (u32) vlib_time_now (vm);
1190
1191   nad64_db_st_free_expired (thread_index, db, now);
1192   vlib_set_simple_counter (&nm->total_bibs, thread_index, 0,
1193                            db->bib.bib_entries_num);
1194   vlib_set_simple_counter (&nm->total_sessions, thread_index, 0,
1195                            db->st.st_entries_num);
1196
1197   return 0;
1198 }
1199
1200 static vlib_node_registration_t nat64_expire_worker_walk_node;
1201
1202 /* *INDENT-OFF* */
1203 VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = {
1204     .function = nat64_expire_worker_walk_fn,
1205     .type = VLIB_NODE_TYPE_INPUT,
1206     .state = VLIB_NODE_STATE_INTERRUPT,
1207     .name = "nat64-expire-worker-walk",
1208 };
1209 /* *INDENT-ON* */
1210
1211 static vlib_node_registration_t nat64_expire_walk_node;
1212
1213 /**
1214  * @brief Centralized process to drive per worker expire walk.
1215  */
1216 static uword
1217 nat64_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
1218                       vlib_frame_t * f)
1219 {
1220   nat64_main_t *nm = &nat64_main;
1221   vlib_main_t **worker_vms = 0, *worker_vm;
1222   int i;
1223   uword event_type, *event_data = 0;
1224
1225   nm->nat64_expire_walk_node_index = nat64_expire_walk_node.index;
1226
1227   if (vec_len (vlib_mains) == 0)
1228     vec_add1 (worker_vms, vm);
1229   else
1230     {
1231       for (i = 0; i < vec_len (vlib_mains); i++)
1232         {
1233           worker_vm = vlib_mains[i];
1234           if (worker_vm)
1235             vec_add1 (worker_vms, worker_vm);
1236         }
1237     }
1238
1239   while (1)
1240     {
1241       if (nm->total_enabled_count)
1242         {
1243           vlib_process_wait_for_event_or_clock (vm, 10.0);
1244           event_type = vlib_process_get_events (vm, &event_data);
1245         }
1246       else
1247         {
1248           vlib_process_wait_for_event (vm);
1249           event_type = vlib_process_get_events (vm, &event_data);
1250         }
1251
1252       switch (event_type)
1253         {
1254         case ~0:
1255           break;
1256         case NAT64_CLEANER_RESCHEDULE:
1257           break;
1258         default:
1259           nat_elog_notice_X1 ("unknown event %d", "i4", event_type);
1260           break;
1261         }
1262
1263       for (i = 0; i < vec_len (worker_vms); i++)
1264         {
1265           worker_vm = worker_vms[i];
1266           vlib_node_set_interrupt_pending (worker_vm,
1267                                            nat64_expire_worker_walk_node.index);
1268         }
1269     }
1270
1271   return 0;
1272 }
1273
1274 /* *INDENT-OFF* */
1275 VLIB_REGISTER_NODE (nat64_expire_walk_node, static) = {
1276     .function = nat64_expire_walk_fn,
1277     .type = VLIB_NODE_TYPE_PROCESS,
1278     .name = "nat64-expire-walk",
1279 };
1280 /* *INDENT-ON* */
1281
1282 /*
1283  * fd.io coding-style-patch-verification: ON
1284  *
1285  * Local Variables:
1286  * eval: (c-set-style "gnu")
1287  * End:
1288  */