From 51e759fd0655b6089360e1ccf2f5341704549fd4 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Thu, 7 Dec 2017 23:22:51 -0800 Subject: [PATCH] NAT64: multi-thread support (VPP-891) Change-Id: Iebf859b6d86482e4465423bad598eecf87e53ec4 Signed-off-by: Matus Fabian --- src/plugins/nat/dslite_in2out.c | 4 +- src/plugins/nat/in2out.c | 4 +- src/plugins/nat/nat.c | 122 +++++++--- src/plugins/nat/nat.h | 5 - src/plugins/nat/nat64.c | 498 ++++++++++++++++++++++++++++++---------- src/plugins/nat/nat64.h | 86 +++++-- src/plugins/nat/nat64_cli.c | 123 ++++------ src/plugins/nat/nat64_db.c | 8 +- src/plugins/nat/nat64_db.h | 7 +- src/plugins/nat/nat64_in2out.c | 485 ++++++++++++++++++++++++++------------ src/plugins/nat/nat64_out2in.c | 299 +++++++++++++++++++++--- src/plugins/nat/nat_api.c | 88 ++----- src/plugins/nat/nat_reass.c | 22 ++ src/plugins/nat/nat_reass.h | 16 ++ src/plugins/nat/out2in.c | 1 + src/scripts/vnet/nat64 | 42 ++++ src/scripts/vnet/nat64_static | 44 ++++ test/test_nat.py | 6 +- 18 files changed, 1352 insertions(+), 508 deletions(-) create mode 100644 src/scripts/vnet/nat64 create mode 100644 src/scripts/vnet/nat64_static diff --git a/src/plugins/nat/dslite_in2out.c b/src/plugins/nat/dslite_in2out.c index 54568914ce0..98b3a1618f7 100644 --- a/src/plugins/nat/dslite_in2out.c +++ b/src/plugins/nat/dslite_in2out.c @@ -108,13 +108,13 @@ slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key, if (snat_alloc_outside_address_and_port (dm->addr_pool, 0, thread_index, &out2in_key, - &s->outside_address_index, 0, dm->port_per_thread, thread_index)) + &s->outside_address_index, dm->port_per_thread, thread_index)) ASSERT (0); } else { if (snat_alloc_outside_address_and_port - (dm->addr_pool, 0, thread_index, &out2in_key, &address_index, 0, + (dm->addr_pool, 0, thread_index, &out2in_key, &address_index, dm->port_per_thread, thread_index)) { *error = DSLITE_ERROR_OUT_OF_PORTS; diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index 50d8f84f2dd..1052451ed0a 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -398,7 +398,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0, thread_index, &key1, - &address_index, sm->vrf_mode, + &address_index, sm->port_per_thread, sm->per_thread_data[thread_index].snat_thread_index)) { @@ -420,7 +420,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, /* Try to create dynamic translation */ if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0, thread_index, &key1, - &address_index, sm->vrf_mode, + &address_index, sm->port_per_thread, sm->per_thread_data[thread_index].snat_thread_index)) { diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 3c891f274b5..ef26d2243fd 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -332,9 +332,6 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id) snat_interface_t *i; vlib_thread_main_t *tm = vlib_get_thread_main (); - if (vrf_id != ~0) - sm->vrf_mode = 1; - /* Check if address already exists */ vec_foreach (ap, sm->addresses) { @@ -1384,7 +1381,6 @@ nat_alloc_addr_and_port_default (snat_address_t * addresses, u32 thread_index, snat_session_key_t * k, u32 * address_indexp, - u8 vrf_mode, u16 port_per_thread, u32 snat_thread_index); @@ -1604,14 +1600,13 @@ snat_alloc_outside_address_and_port (snat_address_t * addresses, u32 thread_index, snat_session_key_t * k, u32 * address_indexp, - u8 vrf_mode, u16 port_per_thread, u32 snat_thread_index) { snat_main_t *sm = &snat_main; return sm->alloc_addr_and_port(addresses, fib_index, thread_index, k, - address_indexp, vrf_mode, port_per_thread, + address_indexp, port_per_thread, snat_thread_index); } @@ -1621,39 +1616,44 @@ nat_alloc_addr_and_port_default (snat_address_t * addresses, u32 thread_index, snat_session_key_t * k, u32 * address_indexp, - u8 vrf_mode, u16 port_per_thread, u32 snat_thread_index) { - int i; - snat_address_t *a; + int i, gi = 0; + snat_address_t *a, *ga = 0; u32 portnum; for (i = 0; i < vec_len (addresses); i++) { a = addresses + i; - if (vrf_mode && a->fib_index != ~0 && a->fib_index != fib_index) - continue; switch (k->protocol) { #define _(N, j, n, s) \ case SNAT_PROTOCOL_##N: \ if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \ { \ - while (1) \ + if (a->fib_index == fib_index) \ { \ - portnum = (port_per_thread * \ - snat_thread_index) + \ - snat_random_port(1, port_per_thread) + 1024; \ - if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \ - continue; \ - clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \ - a->busy_##n##_ports_per_thread[thread_index]++; \ - a->busy_##n##_ports++; \ - k->addr = a->addr; \ - k->port = clib_host_to_net_u16(portnum); \ - *address_indexp = i; \ - return 0; \ + while (1) \ + { \ + portnum = (port_per_thread * \ + snat_thread_index) + \ + snat_random_port(1, port_per_thread) + 1024; \ + if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \ + continue; \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + k->addr = a->addr; \ + k->port = clib_host_to_net_u16(portnum); \ + *address_indexp = i; \ + return 0; \ + } \ + } \ + else if (a->fib_index == ~0) \ + { \ + ga = a; \ + gi = i; \ } \ } \ break; @@ -1665,6 +1665,38 @@ nat_alloc_addr_and_port_default (snat_address_t * addresses, } } + + if (ga) + { + a = ga; + switch (k->protocol) + { +#define _(N, j, n, s) \ + case SNAT_PROTOCOL_##N: \ + while (1) \ + { \ + portnum = (port_per_thread * \ + snat_thread_index) + \ + snat_random_port(1, port_per_thread) + 1024; \ + if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \ + continue; \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + k->addr = a->addr; \ + k->port = clib_host_to_net_u16(portnum); \ + *address_indexp = gi; \ + return 0; \ + } + break; + foreach_snat_protocol +#undef _ + default: + clib_warning ("unknown protocol"); + return 1; + } + } + /* Totally out of translations to use... */ snat_ipfix_logging_addresses_exhausted(0); return 1; @@ -1676,7 +1708,6 @@ nat_alloc_addr_and_port_mape (snat_address_t * addresses, u32 thread_index, snat_session_key_t * k, u32 * address_indexp, - u8 vrf_mode, u16 port_per_thread, u32 snat_thread_index) { @@ -2462,6 +2493,25 @@ snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0) udp = ip4_next_header (ip0); port = udp->dst_port; + if (PREDICT_FALSE (ip4_is_fragment (ip0))) + { + if (PREDICT_FALSE (nat_reass_is_drop_frag (0))) + return vlib_get_thread_index (); + + if (PREDICT_TRUE (!ip4_is_first_fragment (ip0))) + { + nat_reass_ip4_t *reass; + + reass = nat_ip4_reass_find (ip0->src_address, ip0->dst_address, + ip0->fragment_id, ip0->protocol); + + if (reass && (reass->thread_index != (u32) ~ 0)) + return reass->thread_index; + else + return vlib_get_thread_index (); + } + } + /* unknown protocol */ if (PREDICT_FALSE (proto == ~0)) { @@ -2554,6 +2604,10 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) u32 inside_vrf_id = 0; u32 static_mapping_buckets = 1024; u32 static_mapping_memory_size = 64<<20; + u32 nat64_bib_buckets = 1024; + u32 nat64_bib_memory_size = 128 << 20; + u32 nat64_st_buckets = 2048; + u32 nat64_st_memory_size = 256 << 20; u8 static_mapping_only = 0; u8 static_mapping_connection_tracking = 0; snat_main_per_thread_data_t *tsm; @@ -2588,6 +2642,17 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) } else if (unformat (input, "deterministic")) sm->deterministic = 1; + else if (unformat (input, "nat64 bib hash buckets %d", + &nat64_bib_buckets)) + ; + else if (unformat (input, "nat64 bib hash memory %d", + &nat64_bib_memory_size)) + ; + else if (unformat (input, "nat64 st hash buckets %d", &nat64_st_buckets)) + ; + else if (unformat (input, "nat64 st hash memory %d", + &nat64_st_memory_size)) + ; else return clib_error_return (0, "unknown input '%U'", format_unformat_error, input); @@ -2612,6 +2677,9 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) sm->static_mapping_only = static_mapping_only; sm->static_mapping_connection_tracking = static_mapping_connection_tracking; + nat64_set_hash(nat64_bib_buckets, nat64_bib_memory_size, nat64_st_buckets, + nat64_st_memory_size); + if (sm->deterministic) { sm->in2out_node_index = snat_det_in2out_node.index; @@ -3424,8 +3492,8 @@ done: }; VLIB_CLI_COMMAND (nat44_set_alloc_addr_and_port_alg_command, static) = { - .path = "nat44 addr-port-assignment-alg", - .short_help = "nat44 addr-port-assignment-alg []", + .path = "nat addr-port-assignment-alg", + .short_help = "nat addr-port-assignment-alg []", .function = nat44_set_alloc_addr_and_port_alg_command_fn, }; diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 8178d76a4ac..e82c23026cb 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -274,7 +274,6 @@ typedef int nat_alloc_out_addr_and_port_function_t (snat_address_t * addresses, u32 thread_index, snat_session_key_t * k, u32 * address_indexp, - u8 vrf_mode, u16 port_per_thread, u32 snat_thread_index); @@ -355,9 +354,6 @@ typedef struct snat_main_s { u32 inside_vrf_id; u32 inside_fib_index; - /* tenant VRF aware address pool activation flag */ - u8 vrf_mode; - /* values of various timeouts */ u32 udp_timeout; u32 tcp_established_timeout; @@ -399,7 +395,6 @@ int snat_alloc_outside_address_and_port (snat_address_t * addresses, u32 thread_index, snat_session_key_t * k, u32 * address_indexp, - u8 vrf_mode, u16 port_per_thread, u32 snat_thread_index); diff --git a/src/plugins/nat/nat64.c b/src/plugins/nat/nat64.c index 0054310d18b..deeb0717b7e 100644 --- a/src/plugins/nat/nat64.c +++ b/src/plugins/nat/nat64.c @@ -19,7 +19,9 @@ #include #include +#include #include +#include nat64_main_t nat64_main; @@ -37,6 +39,17 @@ VNET_FEATURE_INIT (nat64_out2in, static) = { .node_name = "nat64-out2in", .runs_before = VNET_FEATURES ("ip4-lookup"), }; +VNET_FEATURE_INIT (nat64_in2out_handoff, static) = { + .arc_name = "ip6-unicast", + .node_name = "nat64-in2out-handoff", + .runs_before = VNET_FEATURES ("ip6-lookup"), +}; +VNET_FEATURE_INIT (nat64_out2in_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat64-out2in-handoff", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; + static u8 well_known_prefix[] = { 0x00, 0x64, 0xff, 0x9b, @@ -80,28 +93,137 @@ nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque, } } +u32 +nat64_get_worker_in2out (ip6_address_t * addr) +{ + nat64_main_t *nm = &nat64_main; + snat_main_t *sm = nm->sm; + u32 next_worker_index = nm->sm->first_worker_index; + u32 hash; + +#ifdef clib_crc32c_uses_intrinsics + hash = clib_crc32c ((u8 *) addr->as_u32, 16); +#else + u64 tmp = addr->as_u64[0] ^ addr->as_u64[1]; + hash = clib_xxhash (tmp); +#endif + + if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers)))) + next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)]; + else + next_worker_index += sm->workers[hash % _vec_len (sm->workers)]; + + return next_worker_index; +} + +u32 +nat64_get_worker_out2in (ip4_header_t * ip) +{ + nat64_main_t *nm = &nat64_main; + snat_main_t *sm = nm->sm; + udp_header_t *udp; + u16 port; + u32 proto; + + proto = ip_proto_to_snat_proto (ip->protocol); + udp = ip4_next_header (ip); + port = udp->dst_port; + + /* fragments */ + if (PREDICT_FALSE (ip4_is_fragment (ip))) + { + if (PREDICT_FALSE (nat_reass_is_drop_frag (0))) + return vlib_get_thread_index (); + + if (PREDICT_TRUE (!ip4_is_first_fragment (ip))) + { + nat_reass_ip4_t *reass; + + reass = nat_ip4_reass_find (ip->src_address, ip->dst_address, + ip->fragment_id, ip->protocol); + + if (reass && (reass->thread_index != (u32) ~ 0)) + return reass->thread_index; + else + return vlib_get_thread_index (); + } + } + + /* unknown protocol */ + if (PREDICT_FALSE (proto == ~0)) + { + nat64_db_t *db; + ip46_address_t daddr; + nat64_db_bib_entry_t *bibe; + + memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ip->dst_address.as_u32; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0); + if (bibe) + return (u32) (db - nm->db); + } + /* *INDENT-ON* */ + return vlib_get_thread_index (); + } + + /* ICMP */ + if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP)) + { + icmp46_header_t *icmp = (icmp46_header_t *) udp; + icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); + if (!icmp_is_error_message (icmp)) + port = echo->identifier; + else + { + ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); + proto = ip_proto_to_snat_proto (inner_ip->protocol); + void *l4_header = ip4_next_header (inner_ip); + switch (proto) + { + case SNAT_PROTOCOL_ICMP: + icmp = (icmp46_header_t *) l4_header; + echo = (icmp_echo_header_t *) (icmp + 1); + port = echo->identifier; + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + port = ((tcp_udp_header_t *) l4_header)->src_port; + break; + default: + return vlib_get_thread_index (); + } + } + } + + /* worker by outside port (TCP/UDP) */ + port = clib_net_to_host_u16 (port); + if (port > 1024) + return (u32) ((port - 1024) / sm->port_per_thread); + + return vlib_get_thread_index (); +} + clib_error_t * nat64_init (vlib_main_t * vm) { nat64_main_t *nm = &nat64_main; - clib_error_t *error = 0; vlib_thread_main_t *tm = vlib_get_thread_main (); ip4_add_del_interface_address_callback_t cb4; ip4_main_t *im = &ip4_main; + vlib_node_t *error_drop_node = + vlib_get_node_by_name (vm, (u8 *) "error-drop"); - nm->is_disabled = 0; + vec_validate (nm->db, tm->n_vlib_mains - 1); - if (tm->n_vlib_mains > 1) - { - nm->is_disabled = 1; - goto error; - } + nm->sm = &snat_main; - if (nat64_db_init (&nm->db)) - { - error = clib_error_return (0, "NAT64 DB init failed"); - goto error; - } + nm->fq_in2out_index = ~0; + nm->fq_out2in_index = ~0; + nm->error_node_index = error_drop_node->index; /* set session timeouts to default values */ nm->udp_timeout = SNAT_UDP_TIMEOUT; @@ -116,8 +238,29 @@ nat64_init (vlib_main_t * vm) vec_add1 (im->add_del_interface_address_callbacks, cb4); nm->ip4_main = im; -error: - return error; + return 0; +} + +void +nat64_set_hash (u32 bib_buckets, u32 bib_memory_size, u32 st_buckets, + u32 st_memory_size) +{ + nat64_main_t *nm = &nat64_main; + nat64_db_t *db; + + nm->bib_buckets = bib_buckets; + nm->bib_memory_size = bib_memory_size; + nm->st_buckets = st_buckets; + nm->st_memory_size = st_memory_size; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + if (nat64_db_init (db, bib_buckets, bib_memory_size, st_buckets, + st_memory_size)) + clib_warning ("NAT64 DB init failed"); + } + /* *INDENT-ON* */ } int @@ -127,6 +270,8 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) snat_address_t *a = 0; snat_interface_t *interface; int i; + nat64_db_t *db; + vlib_thread_main_t *tm = vlib_get_thread_main (); /* Check if address already exists */ for (i = 0; i < vec_len (nm->addr_pool); i++) @@ -145,13 +290,15 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) vec_add2 (nm->addr_pool, a, 1); a->addr = *addr; - a->fib_index = 0; + a->fib_index = ~0; if (vrf_id != ~0) a->fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, FIB_SOURCE_PLUGIN_HI); -#define _(N, i, n, s) \ - clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); +#define _(N, id, n, s) \ + clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); \ + a->busy_##n##_ports = 0; \ + vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0); foreach_snat_protocol #undef _ } @@ -160,17 +307,19 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) if (!a) return VNET_API_ERROR_NO_SUCH_ENTRY; - if (a->fib_index) + if (a->fib_index != ~0) fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_PLUGIN_HI); - #define _(N, id, n, s) \ clib_bitmap_free (a->busy_##n##_port_bitmap); foreach_snat_protocol #undef _ /* Delete sessions using address */ - nat64_db_free_out_addr (&nm->db, &a->addr); - vec_del1 (nm->addr_pool, i); + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + nat64_db_free_out_addr (db, &a->addr); + /* *INDENT-ON* */ + vec_del1 (nm->addr_pool, i); } /* Add/del external address to FIB */ @@ -300,8 +449,21 @@ nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add) /* *INDENT-ON* */ } + if (nm->sm->num_workers > 1) + { + feature_name = + is_inside ? "nat64-in2out-handoff" : "nat64-out2in-handoff"; + if (nm->fq_in2out_index == ~0) + nm->fq_in2out_index = + vlib_frame_queue_main_init (nat64_in2out_node.index, 0); + if (nm->fq_out2in_index == ~0) + nm->fq_out2in_index = + vlib_frame_queue_main_init (nat64_out2in_node.index, 0); + } + else + feature_name = is_inside ? "nat64-in2out" : "nat64-out2in"; + arc_name = is_inside ? "ip6-unicast" : "ip4-unicast"; - feature_name = is_inside ? "nat64-in2out" : "nat64-out2in"; return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index, is_add, 0, 0); @@ -324,93 +486,33 @@ nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx) int nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto, - ip4_address_t * addr, u16 * port) + ip4_address_t * addr, u16 * port, + u32 thread_index) { nat64_main_t *nm = &nat64_main; - snat_main_t *sm = &snat_main; - int i; - snat_address_t *a, *ga = 0; - u32 portnum; + snat_main_t *sm = nm->sm; + snat_session_key_t k; + u32 ai; + int rv; - for (i = 0; i < vec_len (nm->addr_pool); i++) - { - a = nm->addr_pool + i; - switch (proto) - { -#define _(N, j, n, s) \ - case SNAT_PROTOCOL_##N: \ - if (a->busy_##n##_ports < (65535-1024)) \ - { \ - if (a->fib_index == fib_index) \ - { \ - while (1) \ - { \ - portnum = random_u32 (&sm->random_seed); \ - portnum &= 0xFFFF; \ - if (portnum < 1024) \ - continue; \ - if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \ - portnum)) \ - continue; \ - clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \ - portnum, 1); \ - a->busy_##n##_ports++; \ - *port = portnum; \ - addr->as_u32 = a->addr.as_u32; \ - return 0; \ - } \ - } \ - else if (a->fib_index == 0) \ - ga = a; \ - } \ - break; - foreach_snat_protocol -#undef _ - default: - clib_warning ("unknown protocol"); - return 1; - } - } + k.protocol = proto; - if (ga) + rv = + sm->alloc_addr_and_port (nm->addr_pool, fib_index, thread_index, &k, &ai, + sm->port_per_thread, thread_index); + + if (!rv) { - switch (proto) - { -#define _(N, j, n, s) \ - case SNAT_PROTOCOL_##N: \ - while (1) \ - { \ - portnum = random_u32 (&sm->random_seed); \ - portnum &= 0xFFFF; \ - if (portnum < 1024) \ - continue; \ - if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \ - portnum)) \ - continue; \ - clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \ - portnum, 1); \ - a->busy_##n##_ports++; \ - *port = portnum; \ - addr->as_u32 = a->addr.as_u32; \ - return 0; \ - } - break; - foreach_snat_protocol -#undef _ - default: - clib_warning ("unknown protocol"); - return 1; - } + *port = k.port; + addr->as_u32 = k.addr.as_u32; } - /* Totally out of translations to use... */ - //TODO: IPFix - return 1; + return rv; } void nat64_free_out_addr_and_port (ip4_address_t * addr, u16 port, - snat_protocol_t proto) + snat_protocol_t proto, u32 thread_index) { nat64_main_t *nm = &nat64_main; int i; @@ -429,6 +531,7 @@ nat64_free_out_addr_and_port (ip4_address_t * addr, u16 port, port) == 1); \ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, port, 0); \ a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[thread_index]--; \ break; foreach_snat_protocol #undef _ @@ -440,6 +543,62 @@ nat64_free_out_addr_and_port (ip4_address_t * addr, u16 port, } } +/** + * @brief Add/delete static BIB entry in worker thread. + */ +static uword +nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = vlib_get_thread_index (); + nat64_db_t *db = &nm->db[thread_index]; + nat64_static_bib_to_update_t *static_bib; + nat64_db_bib_entry_t *bibe; + ip46_address_t addr; + + /* *INDENT-OFF* */ + pool_foreach (static_bib, nm->static_bibs, + ({ + if ((static_bib->thread_index != thread_index) || (static_bib->done)) + continue; + + if (static_bib->is_add) + (void) nat64_db_bib_entry_create (db, &static_bib->in_addr, + &static_bib->out_addr, + static_bib->in_port, + static_bib->out_port, + static_bib->fib_index, + static_bib->proto, 1); + else + { + addr.as_u64[0] = static_bib->in_addr.as_u64[0]; + addr.as_u64[1] = static_bib->in_addr.as_u64[1]; + bibe = nat64_db_bib_entry_find (db, &addr, static_bib->in_port, + static_bib->proto, + static_bib->fib_index, 1); + if (bibe) + nat64_db_bib_entry_free (db, bibe); + } + + static_bib->done = 1; + })); + /* *INDENT-ON* */ + + return 0; +} + +static vlib_node_registration_t nat64_static_bib_worker_node; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = { + .function = nat64_static_bib_worker_fn, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .name = "nat64-static-bib-worker", +}; +/* *INDENT-ON* */ + int nat64_add_del_static_bib_entry (ip6_address_t * in_addr, ip4_address_t * out_addr, u16 in_port, @@ -453,11 +612,24 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, ip46_address_t addr; int i; snat_address_t *a; + u32 thread_index = 0; + nat64_db_t *db; + nat64_static_bib_to_update_t *static_bib; + vlib_main_t *worker_vm; + u32 *to_be_free = 0, *index; + + if (nm->sm->num_workers > 1) + { + thread_index = nat64_get_worker_in2out (in_addr); + db = &nm->db[thread_index]; + } + else + db = &nm->db[nm->sm->num_workers]; addr.as_u64[0] = in_addr->as_u64[0]; addr.as_u64[1] = in_addr->as_u64[1]; bibe = - nat64_db_bib_entry_find (&nm->db, &addr, clib_host_to_net_u16 (in_port), + nat64_db_bib_entry_find (db, &addr, clib_host_to_net_u16 (in_port), proto, fib_index, 1); if (is_add) @@ -465,6 +637,13 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, if (bibe) return VNET_API_ERROR_VALUE_EXIST; + /* outside port must be assigned to same thread as internall address */ + if ((out_port > 1024) && (nm->sm->num_workers > 1)) + { + if (thread_index != ((out_port - 1024) / nm->sm->port_per_thread)) + return VNET_API_ERROR_INVALID_VALUE_2; + } + for (i = 0; i < vec_len (nm->addr_pool); i++) { a = nm->addr_pool + i; @@ -480,34 +659,73 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \ out_port, 1); \ if (out_port > 1024) \ - a->busy_##n##_ports++; \ + { \ + a->busy_##n##_ports++; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + } \ break; foreach_snat_protocol #undef _ default: memset (&addr, 0, sizeof (addr)); addr.ip4.as_u32 = out_addr->as_u32; - if (nat64_db_bib_entry_find - (&nm->db, &addr, 0, proto, fib_index, 0)) + if (nat64_db_bib_entry_find (db, &addr, 0, proto, fib_index, 0)) return VNET_API_ERROR_INVALID_VALUE; } break; } - bibe = - nat64_db_bib_entry_create (&nm->db, in_addr, out_addr, - clib_host_to_net_u16 (in_port), - clib_host_to_net_u16 (out_port), fib_index, - proto, 1); - if (!bibe) - return VNET_API_ERROR_UNSPECIFIED; + if (!nm->sm->num_workers) + { + bibe = + nat64_db_bib_entry_create (db, in_addr, out_addr, + clib_host_to_net_u16 (in_port), + clib_host_to_net_u16 (out_port), + fib_index, proto, 1); + if (!bibe) + return VNET_API_ERROR_UNSPECIFIED; + } } else { if (!bibe) return VNET_API_ERROR_NO_SUCH_ENTRY; - nat64_free_out_addr_and_port (out_addr, out_port, p); - nat64_db_bib_entry_free (&nm->db, bibe); + if (!nm->sm->num_workers) + { + nat64_free_out_addr_and_port (out_addr, out_port, p, thread_index); + nat64_db_bib_entry_free (db, bibe); + } + } + + if (nm->sm->num_workers) + { + /* *INDENT-OFF* */ + pool_foreach (static_bib, nm->static_bibs, + ({ + if (static_bib->done) + vec_add1 (to_be_free, static_bib - nm->static_bibs); + })); + vec_foreach (index, to_be_free) + pool_put_index (nm->static_bibs, index[0]); + /* *INDENT-ON* */ + vec_free (to_be_free); + pool_get (nm->static_bibs, static_bib); + static_bib->in_addr.as_u64[0] = in_addr->as_u64[0]; + static_bib->in_addr.as_u64[1] = in_addr->as_u64[1]; + static_bib->in_port = clib_host_to_net_u16 (in_port); + static_bib->out_addr.as_u32 = out_addr->as_u32; + static_bib->out_port = clib_host_to_net_u16 (out_port); + static_bib->fib_index = fib_index; + static_bib->proto = proto; + static_bib->is_add = is_add; + static_bib->thread_index = thread_index; + static_bib->done = 0; + worker_vm = vlib_mains[thread_index]; + if (worker_vm) + vlib_node_set_interrupt_pending (worker_vm, + nat64_static_bib_worker_node.index); + else + return VNET_API_ERROR_UNSPECIFIED; } return 0; @@ -911,23 +1129,65 @@ nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index) } /** - * @brief The 'nat64-expire-walk' process's main loop. - * - * Check expire time for NAT64 sessions. + * @brief Per worker process checking expire time for NAT64 sessions. + */ +static uword +nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = vlib_get_thread_index (); + nat64_db_t *db = &nm->db[thread_index]; + u32 now = (u32) vlib_time_now (vm); + + nad64_db_st_free_expired (db, now); + + return 0; +} + +static vlib_node_registration_t nat64_expire_worker_walk_node; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = { + .function = nat64_expire_worker_walk_fn, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .name = "nat64-expire-worker-walk", +}; +/* *INDENT-ON* */ + +/** + * @brief Centralized process to drive per worker expire walk. */ static uword nat64_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { - nat64_main_t *nm = &nat64_main; + vlib_main_t **worker_vms = 0, *worker_vm; + int i; - while (!nm->is_disabled) + if (vec_len (vlib_mains) == 0) + vec_add1 (worker_vms, vm); + else + { + for (i = 0; i < vec_len (vlib_mains); i++) + { + worker_vm = vlib_mains[i]; + if (worker_vm) + vec_add1 (worker_vms, worker_vm); + } + } + + while (1) { vlib_process_wait_for_event_or_clock (vm, 10.0); vlib_process_get_events (vm, NULL); - u32 now = (u32) vlib_time_now (vm); - - nad64_db_st_free_expired (&nm->db, now); + for (i = 0; i < vec_len (worker_vms); i++) + { + worker_vm = worker_vms[i]; + vlib_node_set_interrupt_pending (worker_vm, + nat64_expire_worker_walk_node.index); + } } return 0; diff --git a/src/plugins/nat/nat64.h b/src/plugins/nat/nat64.h index 118076705cc..0c8fd82a730 100644 --- a/src/plugins/nat/nat64.h +++ b/src/plugins/nat/nat64.h @@ -47,6 +47,19 @@ typedef struct u32 fib_index; } nat64_prefix_t; +typedef struct +{ + ip6_address_t in_addr; + u16 in_port; + ip4_address_t out_addr; + u16 out_port; + u32 fib_index; + u32 thread_index; + u8 proto; + u8 is_add; + u8 done; +} nat64_static_bib_to_update_t; + typedef struct { /** Interface pool */ @@ -61,18 +74,31 @@ typedef struct /** Pref64 vector */ nat64_prefix_t *pref64; - /** BIB and session DB */ - nat64_db_t db; + /** BIB and session DB per thread */ + nat64_db_t *db; + + /** Worker handoff */ + u32 fq_in2out_index; + u32 fq_out2in_index; + + /** Pool of static BIB entries to be added/deleted in worker threads */ + nat64_static_bib_to_update_t *static_bibs; - /* values of various timeouts */ + u32 error_node_index; + + /** config parameters */ + u32 bib_buckets; + u32 bib_memory_size; + u32 st_buckets; + u32 st_memory_size; + + /** values of various timeouts */ u32 udp_timeout; u32 icmp_timeout; u32 tcp_trans_timeout; u32 tcp_est_timeout; u32 tcp_incoming_syn_timeout; - u8 is_disabled; - ip4_main_t *ip4_main; snat_main_t *sm; } nat64_main_t; @@ -171,27 +197,30 @@ int nat64_add_del_static_bib_entry (ip6_address_t * in_addr, /** * @brief Alloce IPv4 address and port pair from NAT64 pool. * - * @param fib_index FIB index of tenant. - * @param proto L4 protocol. - * @param addr Allocated IPv4 address. - * @param port Allocated port number. + * @param fib_index FIB index of tenant. + * @param proto L4 protocol. + * @param addr Allocated IPv4 address. + * @param port Allocated port number. + * @param thread_index Thread index. * * @returns 0 on success, non-zero value otherwise. */ int nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto, - ip4_address_t * addr, u16 * port); + ip4_address_t * addr, u16 * port, + u32 thread_index); /** * @brief Free IPv4 address and port pair from NAT64 pool. * - * @param addr IPv4 address to free. - * @param port Port number to free. - * @param proto L4 protocol. + * @param addr IPv4 address to free. + * @param port Port number to free. + * @param proto L4 protocol. + * @param thread_index Thread index. * * @returns 0 on success, non-zero value otherwise. */ void nat64_free_out_addr_and_port (ip4_address_t * addr, u16 port, - snat_protocol_t proto); + snat_protocol_t proto, u32 thread_index); /** * @brief Set UDP session timeout. @@ -322,6 +351,35 @@ void nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, void nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index); +/** + * @brief Set NAT64 hash tables configuration. + * + * @param bib_buckets Number of BIB hash buckets. + * @param bib_memory_size Memory size of BIB hash. + * @param st_buckets Number of session table hash buckets. + * @param st_memory_size Memory size of session table hash. + */ +void nat64_set_hash (u32 bib_buckets, u32 bib_memory_size, u32 st_buckets, + u32 st_memory_size); + +/** + * @brief Get worker thread index for NAT64 in2out. + * + * @param addr IPv6 src address. + * + * @returns worker thread index. + */ +u32 nat64_get_worker_in2out (ip6_address_t * addr); + +/** + * @brief Get worker thread index for NAT64 out2in. + * + * @param ip IPv4 header. + * + * @returns worker thread index. + */ +u32 nat64_get_worker_out2in (ip4_header_t * ip); + #define u8_ptr_add(ptr, index) (((u8 *)ptr) + index) #define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val)) diff --git a/src/plugins/nat/nat64_cli.c b/src/plugins/nat/nat64_cli.c index 3e15beeee3b..22bfa41eb48 100644 --- a/src/plugins/nat/nat64_cli.c +++ b/src/plugins/nat/nat64_cli.c @@ -26,7 +26,6 @@ nat64_add_del_pool_addr_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; unformat_input_t _line_input, *line_input = &_line_input; ip4_address_t start_addr, end_addr, this_addr; u32 start_host_order, end_host_order; @@ -35,10 +34,6 @@ nat64_add_del_pool_addr_command_fn (vlib_main_t * vm, u8 is_add = 1; clib_error_t *error = 0; - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); - /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -129,12 +124,6 @@ nat64_show_pool_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; - - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); - vlib_cli_output (vm, "NAT64 pool:"); nat64_pool_addr_walk (nat64_cli_pool_walk, vm); @@ -146,7 +135,6 @@ nat64_interface_feature_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; unformat_input_t _line_input, *line_input = &_line_input; vnet_main_t *vnm = vnet_get_main (); clib_error_t *error = 0; @@ -156,10 +144,6 @@ nat64_interface_feature_command_fn (vlib_main_t * vm, u8 is_add = 1; int i, rv; - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); - /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -279,12 +263,6 @@ nat64_show_interfaces_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; - - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); - vlib_cli_output (vm, "NAT64 interfaces:"); nat64_interfaces_walk (nat64_cli_interface_walk, vm); @@ -297,7 +275,6 @@ nat64_add_del_static_bib_command_fn (vlib_main_t * unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; unformat_input_t _line_input, *line_input = &_line_input; clib_error_t *error = 0; u8 is_add = 1; @@ -310,10 +287,6 @@ nat64_add_del_static_bib_command_fn (vlib_main_t * u8 p = 0; int rv; - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); - if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -383,6 +356,8 @@ nat64_add_del_static_bib_command_fn (vlib_main_t * clib_error_return (0, "Outside addres %U and port %u already in use.", format_ip4_address, &out_addr, out_port); goto done; + case VNET_API_ERROR_INVALID_VALUE_2: + error = clib_error_return (0, "Invalid outside port."); default: break; } @@ -436,10 +411,7 @@ nat64_show_bib_command_fn (vlib_main_t * vm, clib_error_t *error = 0; u32 proto = ~0; u8 p = 255; - - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); + nat64_db_t *db; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -462,7 +434,11 @@ nat64_show_bib_command_fn (vlib_main_t * vm, else vlib_cli_output (vm, "NAT64 %U BIB entries:", format_snat_protocol, proto); - nat64_db_bib_walk (&nm->db, p, nat64_cli_bib_walk, vm); + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + nat64_db_bib_walk (db, p, nat64_cli_bib_walk, vm); + /* *INDENT-ON* */ done: unformat_free (line_input); @@ -474,7 +450,6 @@ static clib_error_t * nat64_set_timeouts_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; unformat_input_t _line_input, *line_input = &_line_input; clib_error_t *error = 0; u32 timeout, tcp_trans, tcp_est, tcp_incoming_syn; @@ -483,10 +458,6 @@ nat64_set_timeouts_command_fn (vlib_main_t * vm, unformat_input_t * input, tcp_est = nat64_get_tcp_est_timeout (); tcp_incoming_syn = nat64_get_tcp_incoming_syn_timeout (); - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); - if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -563,12 +534,6 @@ static clib_error_t * nat64_show_timeouts_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; - - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); - vlib_cli_output (vm, "NAT64 session timeouts:"); vlib_cli_output (vm, " UDP %usec", nat64_get_udp_timeout ()); vlib_cli_output (vm, " ICMP %usec", nat64_get_icmp_timeout ()); @@ -582,15 +547,21 @@ nat64_show_timeouts_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } +typedef struct nat64_cli_st_walk_ctx_t_ +{ + vlib_main_t *vm; + nat64_db_t *db; +} nat64_cli_st_walk_ctx_t; + static int -nat64_cli_st_walk (nat64_db_st_entry_t * ste, void *ctx) +nat64_cli_st_walk (nat64_db_st_entry_t * ste, void *arg) { - vlib_main_t *vm = ctx; - nat64_main_t *nm = &nat64_main; + nat64_cli_st_walk_ctx_t *ctx = arg; + vlib_main_t *vm = ctx->vm; nat64_db_bib_entry_t *bibe; fib_table_t *fib; - bibe = nat64_db_bib_entry_by_index (&nm->db, ste->proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (ctx->db, ste->proto, ste->bibe_index); if (!bibe) return -1; @@ -642,10 +613,10 @@ nat64_show_st_command_fn (vlib_main_t * vm, clib_error_t *error = 0; u32 proto = ~0; u8 p = 255; - - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); + nat64_db_t *db; + nat64_cli_st_walk_ctx_t ctx = { + .vm = vm, + }; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -667,7 +638,13 @@ nat64_show_st_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "NAT64 sessions:"); else vlib_cli_output (vm, "NAT64 %U sessions:", format_snat_protocol, proto); - nat64_db_st_walk (&nm->db, p, nat64_cli_st_walk, vm); + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + ctx.db = db; + nat64_db_st_walk (db, p, nat64_cli_st_walk, &ctx); + } + /* *INDENT-ON* */ done: unformat_free (line_input); @@ -679,7 +656,6 @@ static clib_error_t * nat64_add_del_prefix_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; vnet_main_t *vnm = vnet_get_main (); clib_error_t *error = 0; unformat_input_t _line_input, *line_input = &_line_input; @@ -689,10 +665,6 @@ nat64_add_del_prefix_command_fn (vlib_main_t * vm, unformat_input_t * input, u32 plen = 0; int rv; - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); - if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -748,26 +720,29 @@ nat64_add_del_prefix_command_fn (vlib_main_t * vm, unformat_input_t * input, fib_prefix_t fibpfx = { .fp_len = plen, .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr = {.ip6 = prefix} + .fp_addr = { + .ip6 = prefix} }; if (is_add) { - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - vrf_id, - FIB_SOURCE_PLUGIN_HI); + fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, + vrf_id, FIB_SOURCE_PLUGIN_HI); fib_table_entry_update_one_path (fib_index, &fibpfx, FIB_SOURCE_PLUGIN_HI, - FIB_ENTRY_FLAG_NONE, DPO_PROTO_IP6, - NULL, sw_if_index, ~0, 0, NULL, - FIB_ROUTE_PATH_INTF_RX); + FIB_ENTRY_FLAG_NONE, + DPO_PROTO_IP6, NULL, + sw_if_index, ~0, 0, + NULL, FIB_ROUTE_PATH_INTF_RX); } else { fib_index = fib_table_find (FIB_PROTOCOL_IP6, vrf_id); fib_table_entry_path_remove (fib_index, &fibpfx, - FIB_SOURCE_PLUGIN_HI, DPO_PROTO_IP6, - NULL, sw_if_index, ~0, 1, + FIB_SOURCE_PLUGIN_HI, + DPO_PROTO_IP6, NULL, + sw_if_index, ~0, 1, FIB_ROUTE_PATH_INTF_RX); fib_table_unlock (fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_PLUGIN_HI); @@ -792,15 +767,10 @@ nat64_cli_prefix_walk (nat64_prefix_t * p, void *ctx) } static clib_error_t * -nat64_show_prefix_command_fn (vlib_main_t * vm, unformat_input_t * input, +nat64_show_prefix_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; - - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); - vlib_cli_output (vm, "NAT64 prefix:"); nat64_prefix_walk (nat64_cli_prefix_walk, vm); @@ -812,7 +782,6 @@ nat64_add_interface_address_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - nat64_main_t *nm = &nat64_main; vnet_main_t *vnm = vnet_get_main (); unformat_input_t _line_input, *line_input = &_line_input; u32 sw_if_index; @@ -820,9 +789,6 @@ nat64_add_interface_address_command_fn (vlib_main_t * vm, int is_add = 1; clib_error_t *error = 0; - if (nm->is_disabled) - return clib_error_return (0, - "NAT64 disabled, multi thread not supported"); /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -830,8 +796,7 @@ nat64_add_interface_address_command_fn (vlib_main_t * vm, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat - (line_input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) - ; + (line_input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)); else if (unformat (line_input, "del")) is_add = 0; else diff --git a/src/plugins/nat/nat64_db.c b/src/plugins/nat/nat64_db.c index 008a137da10..97da672a928 100644 --- a/src/plugins/nat/nat64_db.c +++ b/src/plugins/nat/nat64_db.c @@ -19,13 +19,9 @@ #include int -nat64_db_init (nat64_db_t * db) +nat64_db_init (nat64_db_t * db, u32 bib_buckets, u32 bib_memory_size, + u32 st_buckets, u32 st_memory_size) { - u32 bib_buckets = 1024; - u32 bib_memory_size = 128 << 20; - u32 st_buckets = 2048; - u32 st_memory_size = 256 << 20; - clib_bihash_init_24_8 (&db->bib.in2out, "bib-in2out", bib_buckets, bib_memory_size); diff --git a/src/plugins/nat/nat64_db.h b/src/plugins/nat/nat64_db.h index 94d9a8bdebf..7cda8b09a72 100644 --- a/src/plugins/nat/nat64_db.h +++ b/src/plugins/nat/nat64_db.h @@ -127,10 +127,15 @@ typedef struct * @brief Initialize NAT64 DB. * * @param db NAT64 DB. + * @param bib_buckets Number of BIB hash buckets. + * @param bib_memory_size Memory size of BIB hash. + * @param st_buckets Number of session table hash buckets. + * @param st_memory_size Memory size of session table hash. * * @returns 0 on success, non-zero value otherwise. */ -int nat64_db_init (nat64_db_t * db); +int nat64_db_init (nat64_db_t * db, u32 bib_buckets, u32 bib_memory_size, + u32 st_buckets, u32 st_memory_size); /** * @brief Create new NAT64 BIB entry. diff --git a/src/plugins/nat/nat64_in2out.c b/src/plugins/nat/nat64_in2out.c index 4f94575ebcc..9f77ca33fa4 100644 --- a/src/plugins/nat/nat64_in2out.c +++ b/src/plugins/nat/nat64_in2out.c @@ -72,6 +72,7 @@ format_nat64_in2out_reass_trace (u8 * s, va_list * args) vlib_node_registration_t nat64_in2out_node; vlib_node_registration_t nat64_in2out_slowpath_node; vlib_node_registration_t nat64_in2out_reass_node; +vlib_node_registration_t nat64_in2out_handoff_node; #define foreach_nat64_in2out_error \ _(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ @@ -111,6 +112,7 @@ typedef struct nat64_in2out_set_ctx_t_ { vlib_buffer_t *b; vlib_main_t *vm; + u32 thread_index; } nat64_in2out_set_ctx_t; /** @@ -152,6 +154,7 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, u8 proto = ip6->protocol; u16 sport = udp->src_port; u16 dport = udp->dst_port; + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = @@ -163,19 +166,18 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, daddr.as_u64[1] = ip6->dst_address.as_u64[1]; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto, + nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, fib_index, 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, sport, proto, fib_index, 1); + bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1); if (!bibe) { @@ -183,11 +185,11 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, ip4_address_t out_addr; if (nat64_alloc_out_addr_and_port (fib_index, ip_proto_to_snat_proto (proto), &out_addr, - &out_port)) + &out_port, ctx->thread_index)) return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, &out_addr, + nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, sport, clib_host_to_net_u16 (out_port), fib_index, proto, 0); if (!bibe) @@ -196,7 +198,7 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, dport); if (!ste) return -1; @@ -234,6 +236,7 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) ip46_address_t saddr, daddr; u32 sw_if_index, fib_index; icmp46_header_t *icmp = ip6_next_header (ip6); + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = @@ -248,13 +251,13 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) { u16 in_id = ((u16 *) (icmp))[2]; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, in_id, 0, + nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0, IP_PROTOCOL_ICMP, fib_index, 1); if (ste) { bibe = - nat64_db_bib_entry_by_index (&nm->db, IP_PROTOCOL_ICMP, + nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP, ste->bibe_index); if (!bibe) return -1; @@ -262,7 +265,7 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) else { bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, in_id, + nat64_db_bib_entry_find (db, &saddr, in_id, IP_PROTOCOL_ICMP, fib_index, 1); if (!bibe) @@ -270,11 +273,12 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) u16 out_id; ip4_address_t out_addr; if (nat64_alloc_out_addr_and_port - (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id)) + (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id, + ctx->thread_index)) return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, + nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, in_id, clib_host_to_net_u16 (out_id), fib_index, IP_PROTOCOL_ICMP, 0); @@ -284,7 +288,7 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; @@ -320,6 +324,7 @@ nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, ip46_address_t saddr, daddr; u32 sw_if_index, fib_index; u8 proto = ip6->protocol; + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = @@ -342,12 +347,12 @@ nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, return -1; ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, in_id, 0, proto, + nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto, fib_index, 1); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -366,12 +371,12 @@ nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, u16 dport = udp->dst_port; ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto, + nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto, fib_index, 1); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -397,6 +402,7 @@ typedef struct unk_proto_st_walk_ctx_t_ ip6_address_t dst_addr; ip4_address_t out_addr; u32 fib_index; + u32 thread_index; u8 proto; } unk_proto_st_walk_ctx_t; @@ -407,11 +413,11 @@ unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg) unk_proto_st_walk_ctx_t *ctx = arg; nat64_db_bib_entry_t *bibe; ip46_address_t saddr, daddr; + nat64_db_t *db = &nm->db[ctx->thread_index]; if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr)) { - bibe = - nat64_db_bib_entry_by_index (&nm->db, ste->proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index); if (!bibe) return -1; @@ -424,7 +430,7 @@ unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg) nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index); if (nat64_db_st_entry_find - (&nm->db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0)) + (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0)) return -1; ctx->out_addr.as_u32 = bibe->out_addr.as_u32; @@ -440,15 +446,16 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) { nat64_main_t *nm = &nat64_main; - nat64_in2out_set_ctx_t *ctx = arg; + nat64_in2out_set_ctx_t *s_ctx = arg; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; ip46_address_t saddr, daddr, addr; u32 sw_if_index, fib_index; u8 proto = ip6->protocol; int i; + nat64_db_t *db = &nm->db[s_ctx->thread_index]; - sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; + sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX]; fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); @@ -458,19 +465,17 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, daddr.as_u64[1] = ip6->dst_address.as_u64[1]; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, 0, 0, proto, fib_index, - 1); + nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, 0, proto, fib_index, 1); + bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1); if (!bibe) { @@ -483,19 +488,18 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, .out_addr.as_u32 = 0, .fib_index = fib_index, .proto = proto, + .thread_index = s_ctx->thread_index, }; - nat64_db_st_walk (&nm->db, IP_PROTOCOL_TCP, unk_proto_st_walk, - &ctx); + nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx); if (!ctx.out_addr.as_u32) - nat64_db_st_walk (&nm->db, IP_PROTOCOL_UDP, unk_proto_st_walk, - &ctx); + nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx); /* Verify if out address is not already in use for protocol */ memset (&addr, 0, sizeof (addr)); addr.ip4.as_u32 = ctx.out_addr.as_u32; - if (nat64_db_bib_entry_find (&nm->db, &addr, 0, proto, 0, 0)) + if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) ctx.out_addr.as_u32 = 0; if (!ctx.out_addr.as_u32) @@ -503,8 +507,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, for (i = 0; i < vec_len (nm->addr_pool); i++) { addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32; - if (!nat64_db_bib_entry_find - (&nm->db, &addr, 0, proto, 0, 0)) + if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) break; } } @@ -513,7 +516,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, + nat64_db_bib_entry_create (db, &ip6->src_address, &ctx.out_addr, 0, 0, fib_index, proto, 0); if (!bibe) @@ -522,13 +525,12 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, - &daddr.ip4, 0); + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; } - nat64_session_reset_timeout (ste, ctx->vm); + nat64_session_reset_timeout (ste, s_ctx->vm); ip4->src_address.as_u32 = bibe->out_addr.as_u32; ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; @@ -540,7 +542,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, static int nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, - ip6_header_t * ip6) + ip6_header_t * ip6, u32 thread_index) { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; @@ -554,6 +556,7 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, u16 dport = udp->dst_port; u16 *checksum; ip_csum_t csum; + nat64_db_t *db = &nm->db[thread_index]; sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; fib_index = @@ -577,19 +580,18 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, csum = ip_csum_sub_even (csum, dport); ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto, + nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, fib_index, 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, sport, proto, fib_index, 1); + bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1); if (!bibe) { @@ -597,11 +599,11 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, ip4_address_t out_addr; if (nat64_alloc_out_addr_and_port (fib_index, ip_proto_to_snat_proto (proto), &out_addr, - &out_port)) + &out_port, thread_index)) return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, &out_addr, + nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, sport, clib_host_to_net_u16 (out_port), fib_index, proto, 0); if (!bibe) @@ -610,7 +612,7 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, dport); if (!ste) return -1; @@ -621,32 +623,22 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, sport = udp->src_port = bibe->out_port; nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index); - memset (&saddr, 0, sizeof (saddr)); memset (&daddr, 0, sizeof (daddr)); - saddr.ip4.as_u32 = bibe->out_addr.as_u32; daddr.ip4.as_u32 = ste->out_r_addr.as_u32; - ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto, 0, - 0); - - if (ste) + bibe = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); - if (!bibe) - return -1; - } - else - { - bibe = nat64_db_bib_entry_find (&nm->db, &daddr, dport, proto, 0, 0); - - if (!bibe) - return -1; + bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0); - ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address, - &saddr.ip4, sport); + if (bibe) + break; } + /* *INDENT-ON* */ + + if (!bibe) + return -1; ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; @@ -665,7 +657,7 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, static int nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, - ip6_header_t * ip6) + ip6_header_t * ip6, u32 thread_index) { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; @@ -679,6 +671,7 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, tcp_header_t *tcp; u16 *checksum, sport, dport; ip_csum_t csum; + nat64_db_t *db = &nm->db[thread_index]; if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) return -1; @@ -718,12 +711,12 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, csum = ip_csum_sub_even (csum, dport); ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto, + nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto, fib_index, 1); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -735,13 +728,22 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, saddr.ip4.as_u32 = ste->out_r_addr.as_u32; daddr.ip4.as_u32 = bibe->out_addr.as_u32; - ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto, 0, - 0); + ste = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, + 0, 0); + + if (ste) + break; + } + /* *INDENT-ON* */ + if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -781,7 +783,7 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, static int nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, - ip6_header_t * ip6) + ip6_header_t * ip6, u32 thread_index) { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; @@ -790,6 +792,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, u32 sw_if_index, fib_index; u8 proto = ip6->protocol; int i; + nat64_db_t *db = &nm->db[thread_index]; sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; fib_index = @@ -801,19 +804,17 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, daddr.as_u64[1] = ip6->dst_address.as_u64[1]; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, 0, 0, proto, fib_index, - 1); + nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, 0, proto, fib_index, 1); + bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1); if (!bibe) { @@ -826,19 +827,18 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, .out_addr.as_u32 = 0, .fib_index = fib_index, .proto = proto, + .thread_index = thread_index, }; - nat64_db_st_walk (&nm->db, IP_PROTOCOL_TCP, unk_proto_st_walk, - &ctx); + nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx); if (!ctx.out_addr.as_u32) - nat64_db_st_walk (&nm->db, IP_PROTOCOL_UDP, unk_proto_st_walk, - &ctx); + nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx); /* Verify if out address is not already in use for protocol */ memset (&addr, 0, sizeof (addr)); addr.ip4.as_u32 = ctx.out_addr.as_u32; - if (nat64_db_bib_entry_find (&nm->db, &addr, 0, proto, 0, 0)) + if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) ctx.out_addr.as_u32 = 0; if (!ctx.out_addr.as_u32) @@ -846,8 +846,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, for (i = 0; i < vec_len (nm->addr_pool); i++) { addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32; - if (!nat64_db_bib_entry_find - (&nm->db, &addr, 0, proto, 0, 0)) + if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) break; } } @@ -856,7 +855,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, + nat64_db_bib_entry_create (db, &ip6->src_address, &ctx.out_addr, 0, 0, fib_index, proto, 0); if (!bibe) @@ -865,8 +864,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, - &daddr.ip4, 0); + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; } @@ -875,30 +873,22 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index); - memset (&saddr, 0, sizeof (saddr)); memset (&daddr, 0, sizeof (daddr)); - saddr.ip4.as_u32 = bibe->out_addr.as_u32; daddr.ip4.as_u32 = ste->out_r_addr.as_u32; - ste = nat64_db_st_entry_find (&nm->db, &daddr, &saddr, 0, 0, proto, 0, 0); - - if (ste) + bibe = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); - if (!bibe) - return -1; - } - else - { - bibe = nat64_db_bib_entry_find (&nm->db, &daddr, 0, proto, 0, 0); + bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0); - if (!bibe) - return -1; - - ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address, - &saddr.ip4, 0); + if (bibe) + break; } + /* *INDENT-ON* */ + + if (!bibe) + return -1; ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; @@ -914,6 +904,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, nat64_in2out_next_t next_index; u32 pkts_processed = 0; u32 stats_node_index; + u32 thread_index = vlib_get_thread_index (); stats_node_index = is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index; @@ -952,6 +943,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, ctx0.b = b0; ctx0.vm = vm; + ctx0.thread_index = thread_index; next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP; @@ -974,7 +966,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (is_hairpinning (&ip60->dst_address)) { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; - if (nat64_in2out_unk_proto_hairpinning (vm, b0, ip60)) + if (nat64_in2out_unk_proto_hairpinning + (vm, b0, ip60, thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1014,7 +1007,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (is_hairpinning (&ip60->dst_address)) { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; - if (nat64_in2out_icmp_hairpinning (vm, b0, ip60)) + if (nat64_in2out_icmp_hairpinning + (vm, b0, ip60, thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1037,7 +1031,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (is_hairpinning (&ip60->dst_address)) { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; - if (nat64_in2out_tcp_udp_hairpinning (vm, b0, ip60)) + if (nat64_in2out_tcp_udp_hairpinning + (vm, b0, ip60, thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1145,6 +1140,7 @@ typedef struct nat64_in2out_frag_set_ctx_t_ { vlib_main_t *vm; u32 sess_index; + u32 thread_index; u16 l4_offset; u8 proto; u8 first_frag; @@ -1158,12 +1154,13 @@ nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) nat64_db_st_entry_t *ste; nat64_db_bib_entry_t *bibe; udp_header_t *udp; + nat64_db_t *db = &nm->db[ctx->thread_index]; - ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index); + ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); if (!bibe) return -1; @@ -1213,7 +1210,8 @@ nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6, u16 dport = udp->dst_port; u16 *checksum; ip_csum_t csum; - ip46_address_t saddr, daddr; + ip46_address_t daddr; + nat64_db_t *db = &nm->db[ctx->thread_index]; if (ctx->first_frag) { @@ -1230,11 +1228,11 @@ nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6, csum = ip_csum_sub_even (csum, dport); } - ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index); + ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); if (!bibe) return -1; @@ -1245,34 +1243,22 @@ nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6, nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index); - memset (&saddr, 0, sizeof (saddr)); memset (&daddr, 0, sizeof (daddr)); - saddr.ip4.as_u32 = bibe->out_addr.as_u32; daddr.ip4.as_u32 = ste->out_r_addr.as_u32; - ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, ctx->proto, - 0, 0); - - if (ste) - { - bibe = - nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index); - if (!bibe) - return -1; - } - else + bibe = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) { - bibe = - nat64_db_bib_entry_find (&nm->db, &daddr, dport, ctx->proto, 0, 0); + bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0); - if (!bibe) - return -1; - - ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address, - &saddr.ip4, sport); + if (bibe) + break; } + /* *INDENT-ON* */ + + if (!bibe) + return -1; ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; @@ -1303,6 +1289,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, u32 *fragments_to_drop = 0; u32 *fragments_to_loopback = 0; nat64_main_t *nm = &nat64_main; + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -1332,6 +1319,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, u32 sw_if_index0, fib_index0; ip46_address_t saddr0, daddr0; nat64_in2out_frag_set_ctx_t ctx0; + nat64_db_t *db = &nm->db[thread_index]; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -1349,6 +1337,8 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index0); + ctx0.thread_index = thread_index; + if (PREDICT_FALSE (nat_reass_is_drop_frag (1))) { next0 = NAT64_IN2OUT_NEXT_DROP; @@ -1419,20 +1409,21 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, daddr0.as_u64[1] = ip60->dst_address.as_u64[1]; ste0 = - nat64_db_st_entry_find (&nm->db, &saddr0, &daddr0, + nat64_db_st_entry_find (db, &saddr0, &daddr0, udp0->src_port, udp0->dst_port, l4_protocol0, fib_index0, 1); if (!ste0) { bibe0 = - nat64_db_bib_entry_find (&nm->db, &saddr0, udp0->src_port, + nat64_db_bib_entry_find (db, &saddr0, udp0->src_port, l4_protocol0, fib_index0, 1); if (!bibe0) { u16 out_port0; ip4_address_t out_addr0; if (nat64_alloc_out_addr_and_port - (fib_index0, proto0, &out_addr0, &out_port0)) + (fib_index0, proto0, &out_addr0, &out_port0, + thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1441,7 +1432,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, } bibe0 = - nat64_db_bib_entry_create (&nm->db, + nat64_db_bib_entry_create (db, &ip60->src_address, &out_addr0, udp0->src_port, clib_host_to_net_u16 @@ -1458,7 +1449,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4, fib_index0); ste0 = - nat64_db_st_entry_create (&nm->db, bibe0, + nat64_db_st_entry_create (db, bibe0, &ip60->dst_address, &daddr0.ip4, udp0->dst_port); if (!ste0) @@ -1469,8 +1460,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, goto trace0; } } - reass0->sess_index = - nat64_db_st_entry_get_index (&nm->db, ste0); + reass0->sess_index = nat64_db_st_entry_get_index (db, ste0); nat_ip6_reass_get_frags (reass0, &fragments_to_loopback); } @@ -1590,6 +1580,213 @@ VLIB_REGISTER_NODE (nat64_in2out_reass_node) = { VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node, nat64_in2out_reass_node_fn); +typedef struct +{ + u32 next_worker_index; + u8 do_handoff; +} nat64_in2out_handoff_trace_t; + +static u8 * +format_nat64_in2out_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat64_in2out_handoff_trace_t *t = + va_arg (*args, nat64_in2out_handoff_trace_t *); + char *m; + + m = t->do_handoff ? "next worker" : "same worker"; + s = format (s, "NAT64-IN2OUT-HANDOFF: %s %d", m, t->next_worker_index); + + return s; +} + +static inline uword +nat64_in2out_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + nat64_main_t *nm = &nat64_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + vlib_frame_queue_t *fq; + vlib_frame_t *f = 0, *d = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 thread_index = vlib_get_thread_index (); + u32 fq_index; + u32 to_node_index; + + fq_index = nm->fq_in2out_index; + to_node_index = nat64_in2out_node.index; + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + tm->n_vlib_mains - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0; + u8 do_handoff; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + + next_worker_index = nat64_get_worker_in2out (&ip0->src_address); + + if (PREDICT_FALSE (next_worker_index != thread_index)) + { + do_handoff = 1; + + if (next_worker_index != current_worker_index) + { + fq = + is_vlib_frame_queue_congested (fq_index, next_worker_index, + 30, + congested_handoff_queue_by_worker_index); + + if (fq) + { + /* if this is 1st frame */ + if (!d) + { + d = vlib_get_frame_to_node (vm, nm->error_node_index); + to_next_drop = vlib_frame_vector_args (d); + } + + to_next_drop[0] = bi0; + to_next_drop += 1; + d->n_vectors++; + goto trace0; + } + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = + vlib_get_worker_handoff_queue_elt (fq_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + } + else + { + do_handoff = 0; + /* if this is 1st frame */ + if (!f) + { + f = vlib_get_frame_to_node (vm, to_node_index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next += 1; + f->n_vectors++; + } + + trace0: + if (PREDICT_FALSE + ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat64_in2out_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_worker_index = next_worker_index; + t->do_handoff = do_handoff; + } + } + + if (f) + vlib_put_frame_to_node (vm, to_node_index, f); + + if (d) + vlib_put_frame_to_node (vm, nm->error_node_index, d); + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = { + .function = nat64_in2out_handoff_node_fn, + .name = "nat64-in2out-handoff", + .vector_size = sizeof (u32), + .format_trace = format_nat64_in2out_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_handoff_node, + nat64_in2out_handoff_node_fn); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/nat/nat64_out2in.c b/src/plugins/nat/nat64_out2in.c index eb5ecb4588d..1ba73a4f84a 100644 --- a/src/plugins/nat/nat64_out2in.c +++ b/src/plugins/nat/nat64_out2in.c @@ -21,6 +21,7 @@ #include #include #include +#include typedef struct { @@ -67,6 +68,7 @@ format_nat64_out2in_reass_trace (u8 * s, va_list * args) vlib_node_registration_t nat64_out2in_node; vlib_node_registration_t nat64_out2in_reass_node; +vlib_node_registration_t nat64_out2in_handoff_node; #define foreach_nat64_out2in_error \ _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ @@ -94,7 +96,8 @@ static char *nat64_out2in_error_strings[] = { typedef enum { - NAT64_OUT2IN_NEXT_LOOKUP, + NAT64_OUT2IN_NEXT_IP6_LOOKUP, + NAT64_OUT2IN_NEXT_IP4_LOOKUP, NAT64_OUT2IN_NEXT_DROP, NAT64_OUT2IN_NEXT_REASS, NAT64_OUT2IN_N_NEXT, @@ -104,6 +107,7 @@ typedef struct nat64_out2in_set_ctx_t_ { vlib_buffer_t *b; vlib_main_t *vm; + u32 thread_index; } nat64_out2in_set_ctx_t; static int @@ -124,6 +128,7 @@ nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, u32 sw_if_index, fib_index; u16 *checksum; ip_csum_t csum; + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); @@ -134,26 +139,24 @@ nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, daddr.ip4.as_u32 = ip4->dst_address.as_u32; ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto, + nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto, fib_index, 0); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &daddr, dport, proto, fib_index, 0); + bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, fib_index, 0); if (!bibe) return -1; nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6_saddr, &saddr.ip4, - sport); + nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, sport); } nat64_session_reset_timeout (ste, ctx->vm); @@ -189,6 +192,7 @@ nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) ip6_address_t ip6_saddr; u32 sw_if_index, fib_index; icmp46_header_t *icmp = ip4_next_header (ip4); + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); @@ -202,13 +206,13 @@ nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) { u16 out_id = ((u16 *) (icmp))[2]; ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, out_id, 0, + nat64_db_st_entry_find (db, &daddr, &saddr, out_id, 0, IP_PROTOCOL_ICMP, fib_index, 0); if (ste) { bibe = - nat64_db_bib_entry_by_index (&nm->db, IP_PROTOCOL_ICMP, + nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP, ste->bibe_index); if (!bibe) return -1; @@ -216,15 +220,14 @@ nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) else { bibe = - nat64_db_bib_entry_find (&nm->db, &daddr, out_id, + nat64_db_bib_entry_find (db, &daddr, out_id, IP_PROTOCOL_ICMP, fib_index, 0); if (!bibe) return -1; nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6_saddr, &saddr.ip4, - 0); + nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0); } nat64_session_reset_timeout (ste, ctx->vm); @@ -262,6 +265,7 @@ nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, ip46_address_t saddr, daddr; u32 sw_if_index, fib_index; u8 proto = ip4->protocol; + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = @@ -284,12 +288,12 @@ nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, return -1; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, out_id, 0, proto, + nat64_db_st_entry_find (db, &saddr, &daddr, out_id, 0, proto, fib_index, 0); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -311,12 +315,12 @@ nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, ip_csum_t csum; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto, + nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, fib_index, 0); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -354,6 +358,7 @@ nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, ip6_address_t ip6_saddr; u32 sw_if_index, fib_index; u8 proto = ip4->protocol; + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); @@ -364,25 +369,22 @@ nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, daddr.ip4.as_u32 = ip4->dst_address.as_u32; ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, 0, 0, proto, fib_index, - 0); + nat64_db_st_entry_find (db, &daddr, &saddr, 0, 0, proto, fib_index, 0); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &daddr, 0, proto, fib_index, 0); + bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, fib_index, 0); if (!bibe) return -1; nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index); - ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6_saddr, &saddr.ip4, 0); + ste = nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0); } nat64_session_reset_timeout (ste, ctx->vm); @@ -405,6 +407,7 @@ nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 n_left_from, *from, *to_next; nat64_out2in_next_t next_index; u32 pkts_processed = 0; + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -423,6 +426,7 @@ nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, ip4_header_t *ip40; u32 proto0; nat64_out2in_set_ctx_t ctx0; + udp_header_t *udp0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -437,8 +441,9 @@ nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, ctx0.b = b0; ctx0.vm = vm; + ctx0.thread_index = thread_index; - next0 = NAT64_OUT2IN_NEXT_LOOKUP; + next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP; proto0 = ip_proto_to_snat_proto (ip40->protocol); @@ -473,6 +478,18 @@ nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, { if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0)) { + udp0 = ip4_next_header (ip40); + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if ((proto0 == SNAT_PROTOCOL_UDP) + && (udp0->dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client))) + { + next0 = NAT64_OUT2IN_NEXT_IP4_LOOKUP; + goto trace0; + } next0 = NAT64_OUT2IN_NEXT_DROP; b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION]; goto trace0; @@ -516,7 +533,8 @@ VLIB_REGISTER_NODE (nat64_out2in_node) = { /* edit / add dispositions here */ .next_nodes = { [NAT64_OUT2IN_NEXT_DROP] = "error-drop", - [NAT64_OUT2IN_NEXT_LOOKUP] = "ip6-lookup", + [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup", + [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup", [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass", }, }; @@ -529,6 +547,7 @@ typedef struct nat64_out2in_frag_set_ctx_t_ vlib_main_t *vm; vlib_buffer_t *b; u32 sess_index; + u32 thread_index; u8 proto; u8 first_frag; } nat64_out2in_frag_set_ctx_t; @@ -543,12 +562,13 @@ nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) udp_header_t *udp = ip4_next_header (ip4); ip_csum_t csum; u16 *checksum; + nat64_db_t *db = &nm->db[ctx->thread_index]; - ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index); + ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); if (!bibe) return -1; @@ -629,6 +649,7 @@ nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 *fragments_to_drop = 0; u32 *fragments_to_loopback = 0; nat64_main_t *nm = &nat64_main; + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -655,6 +676,7 @@ nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, nat64_db_bib_entry_t *bibe0; ip6_address_t ip6_saddr0; nat64_out2in_frag_set_ctx_t ctx0; + nat64_db_t *db = &nm->db[thread_index]; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -665,13 +687,15 @@ nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); - next0 = NAT64_OUT2IN_NEXT_LOOKUP; + next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP; sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0); + ctx0.thread_index = thread_index; + if (PREDICT_FALSE (nat_reass_is_drop_frag (1))) { next0 = NAT64_OUT2IN_NEXT_DROP; @@ -714,13 +738,13 @@ nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, daddr0.ip4.as_u32 = ip40->dst_address.as_u32; ste0 = - nat64_db_st_entry_find (&nm->db, &daddr0, &saddr0, + nat64_db_st_entry_find (db, &daddr0, &saddr0, udp0->dst_port, udp0->src_port, ip40->protocol, fib_index0, 0); if (!ste0) { bibe0 = - nat64_db_bib_entry_find (&nm->db, &daddr0, udp0->dst_port, + nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port, ip40->protocol, fib_index0, 0); if (!bibe0) { @@ -733,7 +757,7 @@ nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address, bibe0->fib_index); ste0 = - nat64_db_st_entry_create (&nm->db, bibe0, &ip6_saddr0, + nat64_db_st_entry_create (db, bibe0, &ip6_saddr0, &saddr0.ip4, udp0->src_port); if (!ste0) @@ -744,8 +768,8 @@ nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, goto trace0; } } - reass0->sess_index = - nat64_db_st_entry_get_index (&nm->db, ste0); + reass0->sess_index = nat64_db_st_entry_get_index (db, ste0); + reass0->thread_index = thread_index; nat_ip4_reass_get_frags (reass0, &fragments_to_loopback); } @@ -857,7 +881,8 @@ VLIB_REGISTER_NODE (nat64_out2in_reass_node) = { /* edit / add dispositions here */ .next_nodes = { [NAT64_OUT2IN_NEXT_DROP] = "error-drop", - [NAT64_OUT2IN_NEXT_LOOKUP] = "ip6-lookup", + [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup", + [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup", [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass", }, }; @@ -866,6 +891,212 @@ VLIB_REGISTER_NODE (nat64_out2in_reass_node) = { VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_reass_node, nat64_out2in_reass_node_fn); +typedef struct +{ + u32 next_worker_index; + u8 do_handoff; +} nat64_out2in_handoff_trace_t; + +static u8 * +format_nat64_out2in_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat64_out2in_handoff_trace_t *t = + va_arg (*args, nat64_out2in_handoff_trace_t *); + char *m; + + m = t->do_handoff ? "next worker" : "same worker"; + s = format (s, "NAT64-OUT2IN-HANDOFF: %s %d", m, t->next_worker_index); + + return s; +} + +static inline uword +nat64_out2in_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + nat64_main_t *nm = &nat64_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + vlib_frame_queue_t *fq; + vlib_frame_t *f = 0, *d = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 thread_index = vlib_get_thread_index (); + u32 fq_index; + u32 to_node_index; + + fq_index = nm->fq_out2in_index; + to_node_index = nat64_out2in_node.index; + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + tm->n_vlib_mains - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip4_header_t *ip0; + u8 do_handoff; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + + next_worker_index = nat64_get_worker_out2in (ip0); + + if (PREDICT_FALSE (next_worker_index != thread_index)) + { + do_handoff = 1; + + if (next_worker_index != current_worker_index) + { + fq = + is_vlib_frame_queue_congested (fq_index, next_worker_index, + 30, + congested_handoff_queue_by_worker_index); + + if (fq) + { + /* if this is 1st frame */ + if (!d) + { + d = vlib_get_frame_to_node (vm, nm->error_node_index); + to_next_drop = vlib_frame_vector_args (d); + } + + to_next_drop[0] = bi0; + to_next_drop += 1; + d->n_vectors++; + goto trace0; + } + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = + vlib_get_worker_handoff_queue_elt (fq_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + } + else + { + do_handoff = 0; + /* if this is 1st frame */ + if (!f) + { + f = vlib_get_frame_to_node (vm, to_node_index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next += 1; + f->n_vectors++; + } + + trace0: + if (PREDICT_FALSE + ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat64_out2in_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_worker_index = next_worker_index; + t->do_handoff = do_handoff; + } + } + + if (f) + vlib_put_frame_to_node (vm, to_node_index, f); + + if (d) + vlib_put_frame_to_node (vm, nm->error_node_index, d); + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = { + .function = nat64_out2in_handoff_node_fn, + .name = "nat64-out2in-handoff", + .vector_size = sizeof (u32), + .format_trace = format_nat64_out2in_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_handoff_node, + nat64_out2in_handoff_node_fn); /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index 3c58097e332..f1d9ec83af3 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -1776,7 +1776,6 @@ static void { vl_api_nat64_add_del_pool_addr_range_reply_t *rmp; snat_main_t *sm = &snat_main; - nat64_main_t *nm = &nat64_main; int rv = 0; ip4_address_t this_addr; u32 start_host_order, end_host_order; @@ -1784,12 +1783,6 @@ static void int i, count; u32 *tmp; - if (nm->is_disabled) - { - rv = VNET_API_ERROR_FEATURE_DISABLED; - goto send_reply; - } - tmp = (u32 *) mp->start_addr; start_host_order = clib_host_to_net_u32 (tmp[0]); tmp = (u32 *) mp->end_addr; @@ -1831,6 +1824,7 @@ typedef struct nat64_api_walk_ctx_t_ { unix_shared_memory_queue_t *q; u32 context; + nat64_db_t *db; } nat64_api_walk_ctx_t; static int @@ -1864,10 +1858,6 @@ static void vl_api_nat64_pool_addr_dump_t_handler (vl_api_nat64_pool_addr_dump_t * mp) { unix_shared_memory_queue_t *q; - nat64_main_t *nm = &nat64_main; - - if (nm->is_disabled) - return; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -1897,16 +1887,9 @@ vl_api_nat64_add_del_interface_t_handler (vl_api_nat64_add_del_interface_t * mp) { snat_main_t *sm = &snat_main; - nat64_main_t *nm = &nat64_main; vl_api_nat64_add_del_interface_reply_t *rmp; int rv = 0; - if (nm->is_disabled) - { - rv = VNET_API_ERROR_FEATURE_DISABLED; - goto send_reply; - } - VALIDATE_SW_IF_INDEX (mp); rv = @@ -1915,7 +1898,6 @@ vl_api_nat64_add_del_interface_t_handler (vl_api_nat64_add_del_interface_t * BAD_SW_IF_INDEX_LABEL; -send_reply: REPLY_MACRO (VL_API_NAT64_ADD_DEL_INTERFACE_REPLY); } @@ -1958,10 +1940,6 @@ static void vl_api_nat64_interface_dump_t_handler (vl_api_nat64_interface_dump_t * mp) { unix_shared_memory_queue_t *q; - nat64_main_t *nm = &nat64_main; - - if (nm->is_disabled) - return; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -1991,18 +1969,11 @@ static void (vl_api_nat64_add_del_static_bib_t * mp) { snat_main_t *sm = &snat_main; - nat64_main_t *nm = &nat64_main; vl_api_nat64_add_del_static_bib_reply_t *rmp; ip6_address_t in_addr; ip4_address_t out_addr; int rv = 0; - if (nm->is_disabled) - { - rv = VNET_API_ERROR_FEATURE_DISABLED; - goto send_reply; - } - memcpy (&in_addr.as_u8, mp->i_addr, 16); memcpy (&out_addr.as_u8, mp->o_addr, 4); @@ -2014,7 +1985,6 @@ static void clib_net_to_host_u32 (mp->vrf_id), mp->is_add); -send_reply: REPLY_MACRO (VL_API_NAT64_ADD_DEL_STATIC_BIB_REPLY); } @@ -2069,9 +2039,7 @@ vl_api_nat64_bib_dump_t_handler (vl_api_nat64_bib_dump_t * mp) { unix_shared_memory_queue_t *q; nat64_main_t *nm = &nat64_main; - - if (nm->is_disabled) - return; + nat64_db_t *db; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -2082,7 +2050,10 @@ vl_api_nat64_bib_dump_t_handler (vl_api_nat64_bib_dump_t * mp) .context = mp->context, }; - nat64_db_bib_walk (&nm->db, mp->proto, nat64_api_bib_walk, &ctx); + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + nat64_db_bib_walk (db, mp->proto, nat64_api_bib_walk, &ctx); + /* *INDENT-ON* */ } static void * @@ -2099,16 +2070,9 @@ static void vl_api_nat64_set_timeouts_t_handler (vl_api_nat64_set_timeouts_t * mp) { snat_main_t *sm = &snat_main; - nat64_main_t *nm = &nat64_main; vl_api_nat64_set_timeouts_reply_t *rmp; int rv = 0; - if (nm->is_disabled) - { - rv = VNET_API_ERROR_FEATURE_DISABLED; - goto send_reply; - } - rv = nat64_set_icmp_timeout (ntohl (mp->icmp)); if (rv) goto send_reply; @@ -2142,13 +2106,9 @@ static void vl_api_nat64_get_timeouts_t_handler (vl_api_nat64_get_timeouts_t * mp) { snat_main_t *sm = &snat_main; - nat64_main_t *nm = &nat64_main; vl_api_nat64_get_timeouts_reply_t *rmp; int rv = 0; - if (nm->is_disabled) - return; - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_NAT64_GET_TIMEOUTS_REPLY, ({ @@ -2177,11 +2137,10 @@ nat64_api_st_walk (nat64_db_st_entry_t * ste, void *arg) vl_api_nat64_st_details_t *rmp; snat_main_t *sm = &snat_main; nat64_api_walk_ctx_t *ctx = arg; - nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; fib_table_t *fib; - bibe = nat64_db_bib_entry_by_index (&nm->db, ste->proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (ctx->db, ste->proto, ste->bibe_index); if (!bibe) return -1; @@ -2213,9 +2172,7 @@ vl_api_nat64_st_dump_t_handler (vl_api_nat64_st_dump_t * mp) { unix_shared_memory_queue_t *q; nat64_main_t *nm = &nat64_main; - - if (nm->is_disabled) - return; + nat64_db_t *db; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -2226,7 +2183,13 @@ vl_api_nat64_st_dump_t_handler (vl_api_nat64_st_dump_t * mp) .context = mp->context, }; - nat64_db_st_walk (&nm->db, mp->proto, nat64_api_st_walk, &ctx); + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + ctx.db = db; + nat64_db_st_walk (db, mp->proto, nat64_api_st_walk, &ctx); + } + /* *INDENT-ON* */ } static void * @@ -2244,22 +2207,14 @@ vl_api_nat64_add_del_prefix_t_handler (vl_api_nat64_add_del_prefix_t * mp) { vl_api_nat64_add_del_prefix_reply_t *rmp; snat_main_t *sm = &snat_main; - nat64_main_t *nm = &nat64_main; ip6_address_t prefix; int rv = 0; - if (nm->is_disabled) - { - rv = VNET_API_ERROR_FEATURE_DISABLED; - goto send_reply; - } - memcpy (&prefix.as_u8, mp->prefix, 16); rv = nat64_add_del_prefix (&prefix, mp->prefix_len, clib_net_to_host_u32 (mp->vrf_id), mp->is_add); -send_reply: REPLY_MACRO (VL_API_NAT64_ADD_DEL_PREFIX_REPLY); } @@ -2300,10 +2255,6 @@ static void vl_api_nat64_prefix_dump_t_handler (vl_api_nat64_prefix_dump_t * mp) { unix_shared_memory_queue_t *q; - nat64_main_t *nm = &nat64_main; - - if (nm->is_disabled) - return; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -2332,24 +2283,17 @@ static void vl_api_nat64_add_del_interface_addr_t_handler (vl_api_nat64_add_del_interface_addr_t * mp) { - nat64_main_t *nm = &nat64_main; snat_main_t *sm = &snat_main; vl_api_nat64_add_del_interface_addr_reply_t *rmp; u32 sw_if_index = ntohl (mp->sw_if_index); int rv = 0; - if (nm->is_disabled) - { - rv = VNET_API_ERROR_FEATURE_DISABLED; - goto send_reply; - } - VALIDATE_SW_IF_INDEX (mp); rv = nat64_add_interface_address (sw_if_index, mp->is_add); BAD_SW_IF_INDEX_LABEL; -send_reply: + REPLY_MACRO (VL_API_NAT64_ADD_DEL_INTERFACE_ADDR_REPLY); } diff --git a/src/plugins/nat/nat_reass.c b/src/plugins/nat/nat_reass.c index 239bc70d836..a97d8f017e9 100644 --- a/src/plugins/nat/nat_reass.c +++ b/src/plugins/nat/nat_reass.c @@ -194,6 +194,27 @@ nat_ip4_reass_lookup (nat_reass_ip4_key_t * k, f64 now) return 0; } +nat_reass_ip4_t * +nat_ip4_reass_find (ip4_address_t src, ip4_address_t dst, u16 frag_id, + u8 proto) +{ + nat_reass_main_t *srm = &nat_reass_main; + nat_reass_ip4_t *reass = 0; + nat_reass_ip4_key_t k; + f64 now = vlib_time_now (srm->vlib_main); + + k.src.as_u32 = src.as_u32; + k.dst.as_u32 = dst.as_u32; + k.frag_id = frag_id; + k.proto = proto; + + clib_spinlock_lock_if_init (&srm->ip4_reass_lock); + reass = nat_ip4_reass_lookup (&k, now); + clib_spinlock_unlock_if_init (&srm->ip4_reass_lock); + + return reass; +} + nat_reass_ip4_t * nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst, u16 frag_id, u8 proto, u8 reset_timeout, @@ -282,6 +303,7 @@ nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst, reass->key.as_u64[1] = kv.key[1] = k.as_u64[1]; kv.value = reass - srm->ip4_reass_pool; reass->sess_index = (u32) ~ 0; + reass->thread_index = (u32) ~ 0; reass->last_heard = now; if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1)) diff --git a/src/plugins/nat/nat_reass.h b/src/plugins/nat/nat_reass.h index ae14a9604aa..4a9137e3255 100644 --- a/src/plugins/nat/nat_reass.h +++ b/src/plugins/nat/nat_reass.h @@ -53,6 +53,7 @@ typedef CLIB_PACKED(struct nat_reass_ip4_key_t key; u32 lru_list_index; u32 sess_index; + u32 thread_index; f64 last_heard; u32 frags_per_reass_list_head_index; u8 frag_n; @@ -184,6 +185,20 @@ u8 nat_reass_is_drop_frag (u8 is_ip6); */ clib_error_t *nat_reass_init (vlib_main_t * vm); +/** + * @brief Find reassembly. + * + * @param src Source IPv4 address. + * @param dst Destination IPv4 address. + * @param frag_id Fragment ID. + * @param proto L4 protocol. + * + * @returns Reassembly data or 0 if not found. + */ +nat_reass_ip4_t *nat_ip4_reass_find (ip4_address_t src, + ip4_address_t dst, + u16 frag_id, u8 proto); + /** * @brief Find or create reassembly. * @@ -201,6 +216,7 @@ nat_reass_ip4_t *nat_ip4_reass_find_or_create (ip4_address_t src, u16 frag_id, u8 proto, u8 reset_timeout, u32 ** bi_to_drop); + /** * @brief Cache fragment. * diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 489afadb4a2..15f6f556d73 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -1701,6 +1701,7 @@ nat44_out2in_reass_node_fn (vlib_main_t * vm, goto trace0; } reass0->sess_index = s0 - per_thread_data->sessions; + reass0->thread_index = thread_index; } else { diff --git a/src/scripts/vnet/nat64 b/src/scripts/vnet/nat64 new file mode 100644 index 00000000000..f0d98ea8c69 --- /dev/null +++ b/src/scripts/vnet/nat64 @@ -0,0 +1,42 @@ +create packet-generator interface pg0 +create packet-generator interface pg1 + +packet-generator new { + name f1 + limit 10 + node ip6-input + size 64-64 + no-recycle + worker 0 + interface pg0 + data { + UDP: 2000::3 -> 64:ff9b::ac10:102 + UDP: 3000 -> 3001 + length 128 checksum 0 incrementing 1 + } +} + + +packet-generator new { + name f2 + limit 10 + node ip6-input + size 64-64 + no-recycle + interface pg0 + worker 1 + data { + UDP: 2000::3 -> 64:ff9b::ac10:102 + UDP: 3005 -> 3006 + length 128 checksum 0 incrementing 1 + } +} + +nat64 add pool address 172.16.1.3 +set int ip address pg0 2000::1/64 +set int ip address pg1 172.16.1.1/24 +set int state pg0 up +set int state pg1 up +set ip6 neighbor pg0 2000::3 abcd.abcd.abcd static +set ip arp static pg1 172.16.1.2 cdef.abcd.abcd +set interface nat64 in pg0 out pg1 diff --git a/src/scripts/vnet/nat64_static b/src/scripts/vnet/nat64_static new file mode 100644 index 00000000000..ff5e8dbb2c7 --- /dev/null +++ b/src/scripts/vnet/nat64_static @@ -0,0 +1,44 @@ +create packet-generator interface pg0 +create packet-generator interface pg1 + +packet-generator new { + name f1 + limit 10 + node ip4-input + size 64-64 + no-recycle + worker 0 + interface pg1 + data { + UDP: 172.16.1.2 -> 172.16.1.3 + UDP: 3000 -> 37678 + length 128 checksum 0 incrementing 1 + } +} + + +packet-generator new { + name f2 + limit 10 + node ip4-input + size 64-64 + no-recycle + interface pg1 + worker 1 + data { + UDP: 172.16.1.2 -> 172.16.1.3 + UDP: 3005 -> 38678 + length 128 checksum 0 incrementing 1 + } +} + +nat64 add pool address 172.16.1.3 +set int ip address pg0 2000::1/64 +set int ip address pg1 172.16.1.1/24 +set int state pg0 up +set int state pg1 up +set ip6 neighbor pg0 2000::3 abcd.abcd.abcd static +set ip arp static pg1 172.16.1.2 cdef.abcd.abcd +set interface nat64 in pg0 out pg1 +nat64 add static bib 2000::3 3001 172.16.1.3 37678 udp +nat64 add static bib 2000::3 3006 172.16.1.3 38678 udp diff --git a/test/test_nat.py b/test/test_nat.py index e7723b1df0e..36163297961 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -2056,7 +2056,7 @@ class TestNAT44(MethodHolder): nat_ip2 = "10.0.0.11" self.nat44_add_address(nat_ip1) - self.nat44_add_address(nat_ip2) + self.nat44_add_address(nat_ip2, vrf_id=99) self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index) self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index) self.vapi.nat44_interface_add_del_feature(self.pg2.sw_if_index, @@ -2832,7 +2832,7 @@ class TestNAT44(MethodHolder): self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index) self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index, is_inside=0) - self.vapi.cli("nat44 addr-port-assignment-alg map-e psid 10 " + self.vapi.cli("nat addr-port-assignment-alg map-e psid 10 " "psid-offset 6 psid-len 6") p = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / @@ -2862,7 +2862,7 @@ class TestNAT44(MethodHolder): if not self.vpp_dead: self.logger.info(self.vapi.cli("show nat44 verbose")) self.logger.info(self.vapi.cli("show nat virtual-reassembly")) - self.vapi.cli("nat44 addr-port-assignment-alg default") + self.vapi.cli("nat addr-port-assignment-alg default") self.clear_nat44() -- 2.16.6