X-Git-Url: https://gerrit.fd.io/r/gitweb?p=vpp.git;a=blobdiff_plain;f=src%2Fplugins%2Fnat%2Fnat64.c;h=3aff99ddf859eb882e7da6449e7136554dd93a26;hp=b04901fade420da8b8147e22aed614fdcc64b1ee;hb=61717cc38;hpb=2ba92e32e0197f676dd905e5edcb4ff3e1bec241 diff --git a/src/plugins/nat/nat64.c b/src/plugins/nat/nat64.c index b04901fade4..3aff99ddf85 100644 --- a/src/plugins/nat/nat64.c +++ b/src/plugins/nat/nat64.c @@ -19,7 +19,11 @@ #include #include +#include #include +#include +#include +#include nat64_main_t nat64_main; @@ -31,13 +35,28 @@ VNET_FEATURE_INIT (nat64_in2out, static) = { .arc_name = "ip6-unicast", .node_name = "nat64-in2out", .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), }; VNET_FEATURE_INIT (nat64_out2in, static) = { .arc_name = "ip4-unicast", .node_name = "nat64-out2in", .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (nat64_in2out_handoff, static) = { + .arc_name = "ip6-unicast", + .node_name = "nat64-in2out-handoff", + .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (nat64_out2in_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat64-out2in-handoff", + .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), }; + static u8 well_known_prefix[] = { 0x00, 0x64, 0xff, 0x9b, 0x00, 0x00, 0x00, 0x00, @@ -47,45 +66,227 @@ static u8 well_known_prefix[] = { /* *INDENT-ON* */ -clib_error_t * -nat64_init (vlib_main_t * vm) +static void +nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_delete) { nat64_main_t *nm = &nat64_main; - clib_error_t *error = 0; - vlib_thread_main_t *tm = vlib_get_thread_main (); + int i, j; + + for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++) + { + if (sw_if_index == nm->auto_add_sw_if_indices[i]) + { + if (!is_delete) + { + /* Don't trip over lease renewal, static config */ + for (j = 0; j < vec_len (nm->addr_pool); j++) + if (nm->addr_pool[j].addr.as_u32 == address->as_u32) + return; + + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + address, ~0, 1); + return; + } + else + { + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + address, ~0, 0); + return; + } + } + } +} + +u32 +nat64_get_worker_in2out (ip6_address_t * addr) +{ + nat64_main_t *nm = &nat64_main; + snat_main_t *sm = nm->sm; + u32 next_worker_index = nm->sm->first_worker_index; + u32 hash; + +#ifdef clib_crc32c_uses_intrinsics + hash = clib_crc32c ((u8 *) addr->as_u32, 16); +#else + u64 tmp = addr->as_u64[0] ^ addr->as_u64[1]; + hash = clib_xxhash (tmp); +#endif + + if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers)))) + next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)]; + else + next_worker_index += sm->workers[hash % _vec_len (sm->workers)]; + + return next_worker_index; +} + +u32 +nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip) +{ + nat64_main_t *nm = &nat64_main; + snat_main_t *sm = nm->sm; + udp_header_t *udp; + u16 port; + u32 proto; - nm->is_disabled = 0; + proto = ip_proto_to_snat_proto (ip->protocol); + udp = ip4_next_header (ip); + port = udp->dst_port; - if (tm->n_vlib_mains > 1) + /* unknown protocol */ + if (PREDICT_FALSE (proto == ~0)) { - nm->is_disabled = 1; - goto error; + nat64_db_t *db; + ip46_address_t daddr; + nat64_db_bib_entry_t *bibe; + + clib_memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ip->dst_address.as_u32; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0); + if (bibe) + return (u32) (db - nm->db); + } + /* *INDENT-ON* */ + return vlib_get_thread_index (); } - if (nat64_db_init (&nm->db)) + /* ICMP */ + if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP)) { - error = clib_error_return (0, "NAT64 DB init failed"); - goto error; + icmp46_header_t *icmp = (icmp46_header_t *) udp; + icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + port = vnet_buffer (b)->ip.reass.l4_src_port; + else + { + /* if error message, then it's not fragmented and we can access it */ + ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); + proto = ip_proto_to_snat_proto (inner_ip->protocol); + void *l4_header = ip4_next_header (inner_ip); + switch (proto) + { + case SNAT_PROTOCOL_ICMP: + icmp = (icmp46_header_t *) l4_header; + echo = (icmp_echo_header_t *) (icmp + 1); + port = echo->identifier; + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + port = ((tcp_udp_header_t *) l4_header)->src_port; + break; + default: + return vlib_get_thread_index (); + } + } } + /* worker by outside port (TCP/UDP) */ + port = clib_net_to_host_u16 (port); + if (port > 1024) + return nm->sm->first_worker_index + ((port - 1024) / sm->port_per_thread); + + return vlib_get_thread_index (); +} + +clib_error_t * +nat64_init (vlib_main_t * vm) +{ + nat64_main_t *nm = &nat64_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + ip4_add_del_interface_address_callback_t cb4; + ip4_main_t *im = &ip4_main; + nm->sm = &snat_main; + vlib_node_t *node; + + vec_validate (nm->db, tm->n_vlib_mains - 1); + + nm->fq_in2out_index = ~0; + nm->fq_out2in_index = ~0; + + node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + nm->error_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out"); + nm->in2out_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-slowpath"); + nm->in2out_slowpath_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in"); + nm->out2in_node_index = node->index; + /* set session timeouts to default values */ nm->udp_timeout = SNAT_UDP_TIMEOUT; nm->icmp_timeout = SNAT_ICMP_TIMEOUT; nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT; nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT; - nm->tcp_incoming_syn_timeout = SNAT_TCP_INCOMING_SYN; -error: - return error; + nm->total_enabled_count = 0; + + /* Set up the interface address add/del callback */ + cb4.function = nat64_ip4_add_del_interface_address_cb; + cb4.function_opaque = 0; + vec_add1 (im->add_del_interface_address_callbacks, cb4); + nm->ip4_main = im; + + /* Init counters */ + nm->total_bibs.name = "total-bibs"; + nm->total_bibs.stat_segment_name = "/nat64/total-bibs"; + vlib_validate_simple_counter (&nm->total_bibs, 0); + vlib_zero_simple_counter (&nm->total_bibs, 0); + nm->total_sessions.name = "total-sessions"; + nm->total_sessions.stat_segment_name = "/nat64/total-sessions"; + vlib_validate_simple_counter (&nm->total_sessions, 0); + vlib_zero_simple_counter (&nm->total_sessions, 0); + + return 0; +} + +static void nat64_free_out_addr_and_port (struct nat64_db_s *db, + ip4_address_t * addr, u16 port, + u8 protocol); + +void +nat64_set_hash (u32 bib_buckets, uword bib_memory_size, u32 st_buckets, + uword st_memory_size) +{ + nat64_main_t *nm = &nat64_main; + nat64_db_t *db; + + nm->bib_buckets = bib_buckets; + nm->bib_memory_size = bib_memory_size; + nm->st_buckets = st_buckets; + nm->st_memory_size = st_memory_size; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + if (nat64_db_init (db, bib_buckets, bib_memory_size, st_buckets, + st_memory_size, nat64_free_out_addr_and_port)) + nat_elog_err ("NAT64 DB init failed"); + } + /* *INDENT-ON* */ } int -nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) +nat64_add_del_pool_addr (u32 thread_index, + ip4_address_t * addr, u32 vrf_id, u8 is_add) { nat64_main_t *nm = &nat64_main; snat_address_t *a = 0; snat_interface_t *interface; int i; + nat64_db_t *db; + vlib_thread_main_t *tm = vlib_get_thread_main (); /* Check if address already exists */ for (i = 0; i < vec_len (nm->addr_pool); i++) @@ -104,12 +305,15 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) vec_add2 (nm->addr_pool, a, 1); a->addr = *addr; - a->fib_index = 0; + a->fib_index = ~0; if (vrf_id != ~0) a->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); -#define _(N, i, n, s) \ - clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + nat_fib_src_hi); +#define _(N, id, n, s) \ + clib_memset (a->busy_##n##_port_refcounts, 0, sizeof(a->busy_##n##_port_refcounts)); \ + a->busy_##n##_ports = 0; \ + vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0); foreach_snat_protocol #undef _ } @@ -118,15 +322,19 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) if (!a) return VNET_API_ERROR_NO_SUCH_ENTRY; - if (a->fib_index) - fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6); - -#define _(N, id, n, s) \ - clib_bitmap_free (a->busy_##n##_port_bitmap); - foreach_snat_protocol -#undef _ - /* Delete sessions using address */ - nat64_db_free_out_addr (&nm->db, &a->addr); + if (a->fib_index != ~0) + fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nat_fib_src_hi); + /* Delete sessions using address */ + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + nat64_db_free_out_addr (thread_index, db, &a->addr); + vlib_set_simple_counter (&nm->total_bibs, db - nm->db, 0, + db->bib.bib_entries_num); + vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0, + db->st.st_entries_num); + } + /* *INDENT-ON* */ vec_del1 (nm->addr_pool, i); } @@ -134,7 +342,7 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) /* *INDENT-OFF* */ pool_foreach (interface, nm->interfaces, ({ - if (interface->is_inside) + if (nat_interface_is_inside(interface)) continue; snat_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add); @@ -160,6 +368,48 @@ nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx) /* *INDENT-ON* */ } +int +nat64_add_interface_address (u32 sw_if_index, int is_add) +{ + nat64_main_t *nm = &nat64_main; + ip4_main_t *ip4_main = nm->ip4_main; + ip4_address_t *first_int_addr; + int i; + + first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0); + + for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++) + { + if (nm->auto_add_sw_if_indices[i] == sw_if_index) + { + if (is_add) + return VNET_API_ERROR_VALUE_EXIST; + else + { + /* if have address remove it */ + if (first_int_addr) + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + first_int_addr, ~0, 0); + vec_del1 (nm->auto_add_sw_if_indices, i); + return 0; + } + } + } + + if (!is_add) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* add to the auto-address list */ + vec_add1 (nm->auto_add_sw_if_indices, sw_if_index); + + /* If the address is already bound - or static - add it now */ + if (first_int_addr) + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + first_int_addr, ~0, 1); + + return 0; +} + int nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add) { @@ -168,7 +418,7 @@ nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add) snat_address_t *ap; const char *feature_name, *arc_name; - /* Check if address already exists */ + /* Check if interface already exists */ /* *INDENT-OFF* */ pool_foreach (i, nm->interfaces, ({ @@ -183,11 +433,21 @@ nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add) if (is_add) { if (interface) - return VNET_API_ERROR_VALUE_EXIST; + goto set_flags; pool_get (nm->interfaces, interface); interface->sw_if_index = sw_if_index; - interface->is_inside = is_inside; + interface->flags = 0; + set_flags: + if (is_inside) + interface->flags |= NAT_INTERFACE_FLAG_IS_INSIDE; + else + interface->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE; + + nm->total_enabled_count++; + vlib_process_signal_event (nm->sm->vlib_main, + nm->nat64_expire_walk_node_index, + NAT64_CLEANER_RESCHEDULE, 0); } else @@ -195,7 +455,15 @@ nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add) if (!interface) return VNET_API_ERROR_NO_SUCH_ENTRY; - pool_put (nm->interfaces, interface); + if ((nat_interface_is_inside (interface) + && nat_interface_is_outside (interface))) + interface->flags &= + is_inside ? ~NAT_INTERFACE_FLAG_IS_INSIDE : + ~NAT_INTERFACE_FLAG_IS_OUTSIDE; + else + pool_put (nm->interfaces, interface); + + nm->total_enabled_count--; } if (!is_inside) @@ -206,8 +474,34 @@ nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add) /* *INDENT-ON* */ } + if (nm->sm->num_workers > 1) + { + feature_name = + is_inside ? "nat64-in2out-handoff" : "nat64-out2in-handoff"; + if (nm->fq_in2out_index == ~0) + nm->fq_in2out_index = + vlib_frame_queue_main_init (nat64_in2out_node.index, 0); + if (nm->fq_out2in_index == ~0) + nm->fq_out2in_index = + vlib_frame_queue_main_init (nat64_out2in_node.index, 0); + } + else + feature_name = is_inside ? "nat64-in2out" : "nat64-out2in"; + arc_name = is_inside ? "ip6-unicast" : "ip4-unicast"; - feature_name = is_inside ? "nat64-in2out" : "nat64-out2in"; + + if (is_inside) + { + int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add); + if (rv) + return rv; + } + else + { + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add); + if (rv) + return rv; + } return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index, is_add, 0, 0); @@ -230,97 +524,43 @@ nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx) int nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto, - ip4_address_t * addr, u16 * port) + ip4_address_t * addr, u16 * port, + u32 thread_index) { nat64_main_t *nm = &nat64_main; - snat_main_t *sm = &snat_main; - int i; - snat_address_t *a, *ga = 0; - u32 portnum; + snat_main_t *sm = nm->sm; + snat_session_key_t k; + u32 worker_index = 0; + int rv; - for (i = 0; i < vec_len (nm->addr_pool); i++) - { - a = nm->addr_pool + i; - switch (proto) - { -#define _(N, j, n, s) \ - case SNAT_PROTOCOL_##N: \ - if (a->busy_##n##_ports < (65535-1024)) \ - { \ - if (a->fib_index == fib_index) \ - { \ - while (1) \ - { \ - portnum = random_u32 (&sm->random_seed); \ - portnum &= 0xFFFF; \ - if (portnum < 1024) \ - continue; \ - if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \ - portnum)) \ - continue; \ - clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \ - portnum, 1); \ - a->busy_##n##_ports++; \ - *port = portnum; \ - addr->as_u32 = a->addr.as_u32; \ - return 0; \ - } \ - } \ - else if (a->fib_index == 0) \ - ga = a; \ - } \ - break; - foreach_snat_protocol -#undef _ - default: - clib_warning ("unknown protocol"); - return 1; - } - } + k.protocol = proto; - if (ga) + if (sm->num_workers > 1) + worker_index = thread_index - sm->first_worker_index; + + rv = + sm->alloc_addr_and_port (nm->addr_pool, fib_index, thread_index, &k, + sm->port_per_thread, worker_index); + + if (!rv) { - switch (proto) - { -#define _(N, j, n, s) \ - case SNAT_PROTOCOL_##N: \ - while (1) \ - { \ - portnum = random_u32 (&sm->random_seed); \ - portnum &= 0xFFFF; \ - if (portnum < 1024) \ - continue; \ - if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \ - portnum)) \ - continue; \ - clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \ - portnum, 1); \ - a->busy_##n##_ports++; \ - *port = portnum; \ - addr->as_u32 = a->addr.as_u32; \ - return 0; \ - } - break; - foreach_snat_protocol -#undef _ - default: - clib_warning ("unknown protocol"); - return 1; - } + *port = k.port; + addr->as_u32 = k.addr.as_u32; } - /* Totally out of translations to use... */ - //TODO: IPFix - return 1; + return rv; } -void -nat64_free_out_addr_and_port (ip4_address_t * addr, u16 port, - snat_protocol_t proto) +static void +nat64_free_out_addr_and_port (struct nat64_db_s *db, ip4_address_t * addr, + u16 port, u8 protocol) { nat64_main_t *nm = &nat64_main; int i; snat_address_t *a; + u32 thread_index = db - nm->db; + snat_protocol_t proto = ip_proto_to_snat_proto (protocol); + u16 port_host_byte_order = clib_net_to_host_u16 (port); for (i = 0; i < vec_len (nm->addr_pool); i++) { @@ -331,21 +571,88 @@ nat64_free_out_addr_and_port (ip4_address_t * addr, u16 port, { #define _(N, j, n, s) \ case SNAT_PROTOCOL_##N: \ - ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \ - port) == 1); \ - clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, port, 0); \ + ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \ + --a->busy_##n##_port_refcounts[port_host_byte_order]; \ a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[thread_index]--; \ break; foreach_snat_protocol #undef _ default: - clib_warning ("unknown protocol"); + nat_elog_notice ("unknown protocol"); return; } break; } } +/** + * @brief Add/delete static BIB entry in worker thread. + */ +static uword +nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = vm->thread_index; + nat64_db_t *db = &nm->db[thread_index]; + nat64_static_bib_to_update_t *static_bib; + nat64_db_bib_entry_t *bibe; + ip46_address_t addr; + + /* *INDENT-OFF* */ + pool_foreach (static_bib, nm->static_bibs, + ({ + if ((static_bib->thread_index != thread_index) || (static_bib->done)) + continue; + + if (static_bib->is_add) + { + (void) nat64_db_bib_entry_create (thread_index, db, + &static_bib->in_addr, + &static_bib->out_addr, + static_bib->in_port, + static_bib->out_port, + static_bib->fib_index, + static_bib->proto, 1); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + else + { + addr.as_u64[0] = static_bib->in_addr.as_u64[0]; + addr.as_u64[1] = static_bib->in_addr.as_u64[1]; + bibe = nat64_db_bib_entry_find (db, &addr, static_bib->in_port, + static_bib->proto, + static_bib->fib_index, 1); + if (bibe) + { + nat64_db_bib_entry_free (thread_index, db, bibe); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); + } + } + + static_bib->done = 1; + })); + /* *INDENT-ON* */ + + return 0; +} + +static vlib_node_registration_t nat64_static_bib_worker_node; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = { + .function = nat64_static_bib_worker_fn, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .name = "nat64-static-bib-worker", +}; +/* *INDENT-ON* */ + int nat64_add_del_static_bib_entry (ip6_address_t * in_addr, ip4_address_t * out_addr, u16 in_port, @@ -353,17 +660,30 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; - u32 fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + nat_fib_src_hi); snat_protocol_t p = ip_proto_to_snat_proto (proto); ip46_address_t addr; int i; snat_address_t *a; + u32 thread_index = 0; + nat64_db_t *db; + nat64_static_bib_to_update_t *static_bib; + vlib_main_t *worker_vm; + u32 *to_be_free = 0, *index; + + if (nm->sm->num_workers > 1) + { + thread_index = nat64_get_worker_in2out (in_addr); + db = &nm->db[thread_index]; + } + else + db = &nm->db[nm->sm->num_workers]; addr.as_u64[0] = in_addr->as_u64[0]; addr.as_u64[1] = in_addr->as_u64[1]; bibe = - nat64_db_bib_entry_find (&nm->db, &addr, clib_host_to_net_u16 (in_port), + nat64_db_bib_entry_find (db, &addr, clib_host_to_net_u16 (in_port), proto, fib_index, 1); if (is_add) @@ -371,6 +691,13 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, if (bibe) return VNET_API_ERROR_VALUE_EXIST; + /* outside port must be assigned to same thread as internall address */ + if ((out_port > 1024) && (nm->sm->num_workers > 1)) + { + if (thread_index != ((out_port - 1024) / nm->sm->port_per_thread)) + return VNET_API_ERROR_INVALID_VALUE_2; + } + for (i = 0; i < vec_len (nm->addr_pool); i++) { a = nm->addr_pool + i; @@ -380,40 +707,81 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, { #define _(N, j, n, s) \ case SNAT_PROTOCOL_##N: \ - if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \ - out_port)) \ + if (a->busy_##n##_port_refcounts[out_port]) \ return VNET_API_ERROR_INVALID_VALUE; \ - clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \ - out_port, 1); \ + ++a->busy_##n##_port_refcounts[out_port]; \ if (out_port > 1024) \ - a->busy_##n##_ports++; \ + { \ + a->busy_##n##_ports++; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + } \ break; foreach_snat_protocol #undef _ default: - memset (&addr, 0, sizeof (addr)); + clib_memset (&addr, 0, sizeof (addr)); addr.ip4.as_u32 = out_addr->as_u32; - if (nat64_db_bib_entry_find - (&nm->db, &addr, 0, proto, fib_index, 0)) + if (nat64_db_bib_entry_find (db, &addr, 0, proto, fib_index, 0)) return VNET_API_ERROR_INVALID_VALUE; } break; } - bibe = - nat64_db_bib_entry_create (&nm->db, in_addr, out_addr, - clib_host_to_net_u16 (in_port), - clib_host_to_net_u16 (out_port), fib_index, - proto, 1); - if (!bibe) - return VNET_API_ERROR_UNSPECIFIED; + if (!nm->sm->num_workers) + { + bibe = + nat64_db_bib_entry_create (thread_index, db, in_addr, out_addr, + clib_host_to_net_u16 (in_port), + clib_host_to_net_u16 (out_port), + fib_index, proto, 1); + if (!bibe) + return VNET_API_ERROR_UNSPECIFIED; + + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } } else { if (!bibe) return VNET_API_ERROR_NO_SUCH_ENTRY; - nat64_free_out_addr_and_port (out_addr, out_port, p); - nat64_db_bib_entry_free (&nm->db, bibe); + if (!nm->sm->num_workers) + { + nat64_db_bib_entry_free (thread_index, db, bibe); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + } + + if (nm->sm->num_workers) + { + /* *INDENT-OFF* */ + pool_foreach (static_bib, nm->static_bibs, + ({ + if (static_bib->done) + vec_add1 (to_be_free, static_bib - nm->static_bibs); + })); + vec_foreach (index, to_be_free) + pool_put_index (nm->static_bibs, index[0]); + /* *INDENT-ON* */ + vec_free (to_be_free); + pool_get (nm->static_bibs, static_bib); + static_bib->in_addr.as_u64[0] = in_addr->as_u64[0]; + static_bib->in_addr.as_u64[1] = in_addr->as_u64[1]; + static_bib->in_port = clib_host_to_net_u16 (in_port); + static_bib->out_addr.as_u32 = out_addr->as_u32; + static_bib->out_port = clib_host_to_net_u16 (out_port); + static_bib->fib_index = fib_index; + static_bib->proto = proto; + static_bib->is_add = is_add; + static_bib->thread_index = thread_index; + static_bib->done = 0; + worker_vm = vlib_mains[thread_index]; + if (worker_vm) + vlib_node_set_interrupt_pending (worker_vm, + nat64_static_bib_worker_node.index); + else + return VNET_API_ERROR_UNSPECIFIED; } return 0; @@ -426,8 +794,6 @@ nat64_set_udp_timeout (u32 timeout) if (timeout == 0) nm->udp_timeout = SNAT_UDP_TIMEOUT; - else if (timeout < SNAT_UDP_TIMEOUT_MIN) - return VNET_API_ERROR_INVALID_VALUE; else nm->udp_timeout = timeout; @@ -464,7 +830,7 @@ nat64_get_icmp_timeout (void) } int -nat64_set_tcp_timeouts (u32 trans, u32 est, u32 incoming_syn) +nat64_set_tcp_timeouts (u32 trans, u32 est) { nat64_main_t *nm = &nat64_main; @@ -478,11 +844,6 @@ nat64_set_tcp_timeouts (u32 trans, u32 est, u32 incoming_syn) else nm->tcp_est_timeout = est; - if (incoming_syn == 0) - nm->tcp_incoming_syn_timeout = SNAT_TCP_INCOMING_SYN; - else - nm->tcp_incoming_syn_timeout = incoming_syn; - return 0; } @@ -502,14 +863,6 @@ nat64_get_tcp_est_timeout (void) return nm->tcp_est_timeout; } -u32 -nat64_get_tcp_incoming_syn_timeout (void) -{ - nat64_main_t *nm = &nat64_main; - - return nm->tcp_incoming_syn_timeout; -} - void nat64_session_reset_timeout (nat64_db_st_entry_t * ste, vlib_main_t * vm) { @@ -644,7 +997,8 @@ nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add) { vec_add2 (nm->pref64, p, 1); p->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + nat_fib_src_hi); p->vrf_id = vrf_id; } @@ -703,8 +1057,7 @@ nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index) if (prefix) { - memset (ip6, 0, 16); - memcpy (ip6, &p->prefix, p->plen); + clib_memcpy_fast (ip6, &p->prefix, sizeof (ip6_address_t)); switch (p->plen) { case 32: @@ -738,13 +1091,13 @@ nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index) ip6->as_u32[3] = ip4->as_u32; break; default: - clib_warning ("invalid prefix length"); + nat_elog_notice ("invalid prefix length"); break; } } else { - memcpy (ip6, well_known_prefix, 16); + clib_memcpy_fast (ip6, well_known_prefix, sizeof (ip6_address_t)); ip6->as_u32[3] = ip4->as_u32; } } @@ -811,36 +1164,106 @@ nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index) ip4->as_u32 = ip6->as_u32[3]; break; default: - clib_warning ("invalid prefix length"); + nat_elog_notice ("invalid prefix length"); break; } } /** - * @brief The 'nat64-expire-walk' process's main loop. - * - * Check expire time for NAT64 sessions. + * @brief Per worker process checking expire time for NAT64 sessions. + */ +static uword +nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = vm->thread_index; + nat64_db_t *db = &nm->db[thread_index]; + u32 now = (u32) vlib_time_now (vm); + + nad64_db_st_free_expired (thread_index, db, now); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); + + return 0; +} + +static vlib_node_registration_t nat64_expire_worker_walk_node; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = { + .function = nat64_expire_worker_walk_fn, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .name = "nat64-expire-worker-walk", +}; +/* *INDENT-ON* */ + +static vlib_node_registration_t nat64_expire_walk_node; + +/** + * @brief Centralized process to drive per worker expire walk. */ static uword nat64_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { nat64_main_t *nm = &nat64_main; + vlib_main_t **worker_vms = 0, *worker_vm; + int i; + uword event_type, *event_data = 0; + + nm->nat64_expire_walk_node_index = nat64_expire_walk_node.index; - while (!nm->is_disabled) + if (vec_len (vlib_mains) == 0) + vec_add1 (worker_vms, vm); + else { - vlib_process_wait_for_event_or_clock (vm, 10.0); - vlib_process_get_events (vm, NULL); - u32 now = (u32) vlib_time_now (vm); + for (i = 0; i < vec_len (vlib_mains); i++) + { + worker_vm = vlib_mains[i]; + if (worker_vm) + vec_add1 (worker_vms, worker_vm); + } + } + + while (1) + { + if (nm->total_enabled_count) + { + vlib_process_wait_for_event_or_clock (vm, 10.0); + event_type = vlib_process_get_events (vm, &event_data); + } + else + { + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + } - nad64_db_st_free_expired (&nm->db, now); + switch (event_type) + { + case ~0: + break; + case NAT64_CLEANER_RESCHEDULE: + break; + default: + nat_elog_notice_X1 ("unknown event %d", "i4", event_type); + break; + } + + for (i = 0; i < vec_len (worker_vms); i++) + { + worker_vm = worker_vms[i]; + vlib_node_set_interrupt_pending (worker_vm, + nat64_expire_worker_walk_node.index); + } } return 0; } -static vlib_node_registration_t nat64_expire_walk_node; - /* *INDENT-OFF* */ VLIB_REGISTER_NODE (nat64_expire_walk_node, static) = { .function = nat64_expire_walk_fn,