From 8ebe62536223e5a8d827b2b870cbd57aa34fd7ef Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Mon, 6 Nov 2017 05:04:53 -0800 Subject: [PATCH] NAT: DS-Lite (VPP-1040) Dual-Stack Lite enables a broadband service provider to share IPv4 addresses among customers by combining two well-known technologies: IPv4-in-IPv6 and NAT. Change-Id: I039740f8548c623cd1ac89b8ecda1a6cc4aafb9c Signed-off-by: Matus Fabian --- src/plugins/nat.am | 7 +- src/plugins/nat/dslite.c | 164 ++++++++++++++ src/plugins/nat/dslite.h | 133 +++++++++++ src/plugins/nat/dslite_cli.c | 280 +++++++++++++++++++++++ src/plugins/nat/dslite_dpo.c | 80 +++++++ src/plugins/nat/dslite_dpo.h | 36 +++ src/plugins/nat/dslite_in2out.c | 490 ++++++++++++++++++++++++++++++++++++++++ src/plugins/nat/dslite_out2in.c | 297 ++++++++++++++++++++++++ src/plugins/nat/in2out.c | 18 +- src/plugins/nat/nat.api | 27 +++ src/plugins/nat/nat.c | 33 +-- src/plugins/nat/nat.h | 9 +- src/plugins/nat/nat_api.c | 86 ++++++- test/test_nat.py | 148 ++++++++++++ test/vpp_papi_provider.py | 28 +++ 15 files changed, 1812 insertions(+), 24 deletions(-) create mode 100644 src/plugins/nat/dslite.c create mode 100644 src/plugins/nat/dslite.h create mode 100644 src/plugins/nat/dslite_cli.c create mode 100644 src/plugins/nat/dslite_dpo.c create mode 100644 src/plugins/nat/dslite_dpo.h create mode 100644 src/plugins/nat/dslite_in2out.c create mode 100644 src/plugins/nat/dslite_out2in.c diff --git a/src/plugins/nat.am b/src/plugins/nat.am index b967a716c8b..add82f081c9 100644 --- a/src/plugins/nat.am +++ b/src/plugins/nat.am @@ -26,7 +26,12 @@ nat_plugin_la_SOURCES = nat/nat.c \ nat/nat64_cli.c \ nat/nat64_in2out.c \ nat/nat64_out2in.c \ - nat/nat64_db.c + nat/nat64_db.c \ + nat/dslite_dpo.c \ + nat/dslite.c \ + nat/dslite_in2out.c \ + nat/dslite_out2in.c \ + nat/dslite_cli.c API_FILES += nat/nat.api diff --git a/src/plugins/nat/dslite.c b/src/plugins/nat/dslite.c new file mode 100644 index 00000000000..c86cc8d7e1d --- /dev/null +++ b/src/plugins/nat/dslite.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +dslite_main_t dslite_main; + +void +dslite_init (vlib_main_t * vm) +{ + dslite_main_t *dm = &dslite_main; + vlib_thread_registration_t *tr; + vlib_thread_main_t *tm = vlib_get_thread_main (); + uword *p; + dslite_per_thread_data_t *td; + u32 translation_buckets = 1024; + u32 translation_memory_size = 128 << 20; + u32 b4_buckets = 128; + u32 b4_memory_size = 64 << 20; + + dm->first_worker_index = 0; + dm->num_workers = 0; + + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + if (p) + { + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + dm->num_workers = tr->count; + dm->first_worker_index = tr->first_index; + } + } + + if (dm->num_workers) + dm->port_per_thread = (0xffff - 1024) / dm->num_workers; + else + dm->port_per_thread = 0xffff - 1024; + + vec_validate (dm->per_thread_data, tm->n_vlib_mains - 1); + + /* *INDENT-OFF* */ + vec_foreach (td, dm->per_thread_data) + { + clib_bihash_init_24_8 (&td->in2out, "in2out", translation_buckets, + translation_memory_size); + + clib_bihash_init_8_8 (&td->out2in, "out2in", translation_buckets, + translation_memory_size); + + clib_bihash_init_16_8 (&td->b4_hash, "b4s", b4_buckets, b4_memory_size); + } + /* *INDENT-ON* */ + + dslite_dpo_module_init (); +} + +int +dslite_set_aftr_ip6_addr (dslite_main_t * dm, ip6_address_t * addr) +{ + dpo_id_t dpo_v6 = DPO_INVALID; + + dslite_dpo_create (DPO_PROTO_IP6, 0, &dpo_v6); + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr.ip6.as_u64[0] = addr->as_u64[0], + .fp_addr.ip6.as_u64[1] = addr->as_u64[1], + }; + fib_table_entry_special_dpo_add (0, &pfx, FIB_SOURCE_PLUGIN_HI, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6); + dpo_reset (&dpo_v6); + + dm->aftr_ip6_addr.as_u64[0] = addr->as_u64[0]; + dm->aftr_ip6_addr.as_u64[1] = addr->as_u64[1]; + return 0; +} + +int +dslite_add_del_pool_addr (dslite_main_t * dm, ip4_address_t * addr, u8 is_add) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + snat_address_t *a = 0; + int i = 0; + dpo_id_t dpo_v4 = DPO_INVALID; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + .fp_addr.ip4.as_u32 = addr->as_u32, + }; + + for (i = 0; i < vec_len (dm->addr_pool); i++) + { + if (dm->addr_pool[i].addr.as_u32 == addr->as_u32) + { + a = dm->addr_pool + i; + break; + } + } + if (is_add) + { + if (a) + return VNET_API_ERROR_VALUE_EXIST; + vec_add2 (dm->addr_pool, a, 1); + a->addr = *addr; +#define _(N, i, n, s) \ + clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); \ + a->busy_##n##_ports = 0; \ + vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0); + foreach_snat_protocol +#undef _ + dslite_dpo_create (DPO_PROTO_IP4, 0, &dpo_v4); + fib_table_entry_special_dpo_add (0, &pfx, FIB_SOURCE_PLUGIN_HI, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4); + dpo_reset (&dpo_v4); + } + else + { + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; +#define _(N, id, n, s) \ + clib_bitmap_free (a->busy_##n##_port_bitmap); \ + vec_free (a->busy_##n##_ports_per_thread); + foreach_snat_protocol +#undef _ + fib_table_entry_special_remove (0, &pfx, FIB_SOURCE_PLUGIN_HI); + vec_del1 (dm->addr_pool, i); + } + return 0; +} + +u8 * +format_dslite_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dslite_trace_t *t = va_arg (*args, dslite_trace_t *); + + s = + format (s, "next index %d, session %d", t->next_index, t->session_index); + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/dslite.h b/src/plugins/nat/dslite.h new file mode 100644 index 00000000000..4b7089b3190 --- /dev/null +++ b/src/plugins/nat/dslite.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_dslite_h__ +#define __included_dslite_h__ + +#include +#include +#include +#include + +typedef struct +{ + union + { + struct + { + ip6_address_t softwire_id; + ip4_address_t addr; + u16 port; + u8 proto; + u8 pad; + }; + u64 as_u64[3]; + }; +} dslite_session_key_t; + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct +{ + snat_session_key_t out2in; + dslite_session_key_t in2out; + u32 per_b4_index; + u32 per_b4_list_head_index; + f64 last_heard; + u64 total_bytes; + u32 total_pkts; + u32 outside_address_index; +}) dslite_session_t; +/* *INDENT-ON* */ + +typedef struct +{ + ip6_address_t addr; + u32 sessions_per_b4_list_head_index; + u32 nsessions; +} dslite_b4_t; + +typedef struct +{ + /* Main lookup tables */ + clib_bihash_8_8_t out2in; + clib_bihash_24_8_t in2out; + + /* Find a B4 */ + clib_bihash_16_8_t b4_hash; + + /* B4 pool */ + dslite_b4_t *b4s; + + /* Session pool */ + dslite_session_t *sessions; + + /* Pool of doubly-linked list elements */ + dlist_elt_t *list_pool; +} dslite_per_thread_data_t; + +typedef struct +{ + ip6_address_t aftr_ip6_addr; + ip4_address_t aftr_ip4_addr; + dslite_per_thread_data_t *per_thread_data; + snat_address_t *addr_pool; + u32 num_workers; + u32 first_worker_index; + u16 port_per_thread; +} dslite_main_t; + +typedef struct +{ + u32 next_index; + u32 session_index; +} dslite_trace_t; + +#define foreach_dslite_error \ +_(IN2OUT, "valid in2out DS-Lite packets") \ +_(OUT2IN, "valid out2in DS-Lite packets") \ +_(NO_TRANSLATION, "no translation") \ +_(BAD_IP6_PROTOCOL, "bad ip6 protocol") \ +_(OUT_OF_PORTS, "out of ports") \ +_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ +_(BAD_ICMP_TYPE, "unsupported icmp type") \ +_(UNKNOWN, "unknown") + +typedef enum +{ +#define _(sym,str) DSLITE_ERROR_##sym, + foreach_dslite_error +#undef _ + DSLITE_N_ERROR, +} dslite_error_t; + +extern dslite_main_t dslite_main; +extern vlib_node_registration_t dslite_in2out_node; +extern vlib_node_registration_t dslite_in2out_slowpath_node; +extern vlib_node_registration_t dslite_out2in_node; + +void dslite_init (vlib_main_t * vm); +int dslite_set_aftr_ip6_addr (dslite_main_t * dm, ip6_address_t * addr); +int dslite_add_del_pool_addr (dslite_main_t * dm, ip4_address_t * addr, + u8 is_add); +u8 *format_dslite_trace (u8 * s, va_list * args); + +#endif /* __included_dslite_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/dslite_cli.c b/src/plugins/nat/dslite_cli.c new file mode 100644 index 00000000000..7282762ebfc --- /dev/null +++ b/src/plugins/nat/dslite_cli.c @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +static clib_error_t * +dslite_add_del_pool_addr_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dslite_main_t *dm = &dslite_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t start_addr, end_addr, this_addr; + u32 start_host_order, end_host_order; + int i, count, rv; + u8 is_add = 1; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U - %U", + unformat_ip4_address, &start_addr, + unformat_ip4_address, &end_addr)) + ; + else if (unformat (line_input, "%U", unformat_ip4_address, &start_addr)) + end_addr = start_addr; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + start_host_order = clib_host_to_net_u32 (start_addr.as_u32); + end_host_order = clib_host_to_net_u32 (end_addr.as_u32); + + if (end_host_order < start_host_order) + { + error = clib_error_return (0, "end address less than start address"); + goto done; + } + + count = (end_host_order - start_host_order) + 1; + this_addr = start_addr; + + for (i = 0; i < count; i++) + { + rv = dslite_add_del_pool_addr (dm, &this_addr, is_add); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = + clib_error_return (0, "DS-Lite pool address %U not exist.", + format_ip4_address, &this_addr); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = + clib_error_return (0, "DS-Lite pool address %U exist.", + format_ip4_address, &this_addr); + goto done; + default: + break; + + } + increment_v4_address (&this_addr); + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +dslite_show_pool_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dslite_main_t *dm = &dslite_main; + snat_address_t *ap; + + vlib_cli_output (vm, "DS-Lite pool:"); + + /* *INDENT-OFF* */ + vec_foreach (ap, dm->addr_pool) + { + vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr); + } + /* *INDENT-ON* */ + return 0; +} + +static clib_error_t * +dslite_set_aftr_tunnel_addr_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dslite_main_t *dm = &dslite_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip6_address_t ip6_addr; + int rv; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_ip6_address, &ip6_addr)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + rv = dslite_set_aftr_ip6_addr (dm, &ip6_addr); + + if (rv) + error = + clib_error_return (0, + "Set DS-Lite AFTR tunnel endpoint address failed."); + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +dslite_show_aftr_ip6_addr_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dslite_main_t *dm = &dslite_main; + + vlib_cli_output (vm, "%U", format_ip6_address, &dm->aftr_ip6_addr); + return 0; +} + +static u8 * +format_dslite_session (u8 * s, va_list * args) +{ + dslite_session_t *session = va_arg (*args, dslite_session_t *); + u32 indent = format_get_indent (s); + + s = format (s, "%Uin %U:%u out %U:%u protocol %U\n", + format_white_space, indent + 2, + format_ip4_address, &session->in2out.addr, + clib_net_to_host_u16 (session->in2out.port), + format_ip4_address, &session->out2in.addr, + clib_net_to_host_u16 (session->out2in.port), + format_snat_protocol, session->in2out.proto); + s = format (s, "%Utotal pkts %d, total bytes %lld\n", + format_white_space, indent + 4, + session->total_pkts, session->total_bytes); + return s; +} + +static u8 * +format_dslite_b4 (u8 * s, va_list * args) +{ + dslite_per_thread_data_t *td = va_arg (*args, dslite_per_thread_data_t *); + dslite_b4_t *b4 = va_arg (*args, dslite_b4_t *); + dlist_elt_t *head, *elt; + u32 elt_index, head_index; + u32 session_index; + dslite_session_t *session; + + s = + format (s, "B4 %U %d sessions\n", format_ip6_address, &b4->addr, + b4->nsessions); + + if (b4->nsessions == 0) + return s; + + head_index = b4->sessions_per_b4_list_head_index; + head = pool_elt_at_index (td->list_pool, head_index); + elt_index = head->next; + elt = pool_elt_at_index (td->list_pool, elt_index); + session_index = elt->value; + while (session_index != ~0) + { + session = pool_elt_at_index (td->sessions, session_index); + s = format (s, "%U", format_dslite_session, session); + elt_index = elt->next; + elt = pool_elt_at_index (td->list_pool, elt_index); + session_index = elt->value; + } + + return s; +} + +static clib_error_t * +dslite_show_sessions_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + dslite_main_t *dm = &dslite_main; + dslite_per_thread_data_t *td; + dslite_b4_t *b4; + + /* *INDENT-OFF* */ + vec_foreach (td, dm->per_thread_data) + { + pool_foreach (b4, td->b4s, + ({ + vlib_cli_output (vm, "%U", format_dslite_b4, td, b4); + })); + } + /* *INDENT-ON* */ + + return 0; +} + +/* *INDENT-OFF* */ + +VLIB_CLI_COMMAND (dslite_add_pool_address_command, static) = { + .path = "dslite add pool address", + .short_help = "dslite add pool address [- ] " + " [del]", + .function = dslite_add_del_pool_addr_command_fn, +}; + +VLIB_CLI_COMMAND (show_dslite_pool_command, static) = { + .path = "show dslite pool", + .short_help = "show dslite pool", + .function = dslite_show_pool_command_fn, +}; + +VLIB_CLI_COMMAND (dslite_set_aftr_tunnel_addr, static) = { + .path = "dslite set aftr-tunnel-endpoint-address", + .short_help = "dslite set aftr-tunnel-endpoint-address ", + .function = dslite_set_aftr_tunnel_addr_command_fn, +}; + +VLIB_CLI_COMMAND (dslite_show_aftr_ip6_addr, static) = { + .path = "show dslite aftr-tunnel-endpoint-address", + .short_help = "show dslite aftr-tunnel-endpoint-address", + .function = dslite_show_aftr_ip6_addr_command_fn, +}; + +VLIB_CLI_COMMAND (dslite_show_sessions, static) = { + .path = "show dslite sessions", + .short_help = "show dslite sessions", + .function = dslite_show_sessions_command_fn, +}; + +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/dslite_dpo.c b/src/plugins/nat/dslite_dpo.c new file mode 100644 index 00000000000..376d7174858 --- /dev/null +++ b/src/plugins/nat/dslite_dpo.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +dpo_type_t dslite_dpo_type; + +void +dslite_dpo_create (dpo_proto_t dproto, u32 aftr_index, dpo_id_t * dpo) +{ + dpo_set (dpo, dslite_dpo_type, dproto, aftr_index); +} + +u8 * +format_dslite_dpo (u8 * s, va_list * args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + + return (format (s, "DS-Lite: AFTR:%d", index)); +} + +static void +dslite_dpo_lock (dpo_id_t * dpo) +{ +} + +static void +dslite_dpo_unlock (dpo_id_t * dpo) +{ +} + +const static dpo_vft_t dslite_dpo_vft = { + .dv_lock = dslite_dpo_lock, + .dv_unlock = dslite_dpo_unlock, + .dv_format = format_dslite_dpo, +}; + +const static char *const dslite_ip4_nodes[] = { + "dslite-out2in", + NULL, +}; + +const static char *const dslite_ip6_nodes[] = { + "dslite-in2out", + NULL, +}; + +const static char *const *const dslite_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = dslite_ip4_nodes, + [DPO_PROTO_IP6] = dslite_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +dslite_dpo_module_init (void) +{ + dslite_dpo_type = dpo_register_new_type (&dslite_dpo_vft, dslite_nodes); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/dslite_dpo.h b/src/plugins/nat/dslite_dpo.h new file mode 100644 index 00000000000..ead0e7de610 --- /dev/null +++ b/src/plugins/nat/dslite_dpo.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_dslite_dpo_h__ +#define __included_dslite_dpo_h__ + +#include +#include + +void dslite_dpo_create (dpo_proto_t dproto, u32 aftr_index, dpo_id_t * dpo); + +u8 *format_dslite_dpo (u8 * s, va_list * args); + +void dslite_dpo_module_init (void); + +#endif /* __included_dslite_dpo_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/dslite_in2out.c b/src/plugins/nat/dslite_in2out.c new file mode 100644 index 00000000000..9a7751ce11c --- /dev/null +++ b/src/plugins/nat/dslite_in2out.c @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +vlib_node_registration_t dslite_in2out_node; +vlib_node_registration_t dslite_in2out_slowpath_node; + +typedef enum +{ + DSLITE_IN2OUT_NEXT_IP4_LOOKUP, + DSLITE_IN2OUT_NEXT_IP6_LOOKUP, + DSLITE_IN2OUT_NEXT_DROP, + DSLITE_IN2OUT_NEXT_SLOWPATH, + DSLITE_IN2OUT_N_NEXT, +} dslite_in2out_next_t; + +static char *dslite_in2out_error_strings[] = { +#define _(sym,string) string, + foreach_dslite_error +#undef _ +}; + +static u32 +slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key, + dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index) +{ + dslite_b4_t *b4; + clib_bihash_kv_16_8_t b4_kv, b4_value; + clib_bihash_kv_24_8_t in2out_kv; + clib_bihash_kv_8_8_t out2in_kv; + dlist_elt_t *head_elt, *oldest_elt, *elt; + u32 oldest_index; + dslite_session_t *s; + snat_session_key_t out2in_key; + u32 address_index; + + out2in_key.protocol = in2out_key->proto; + out2in_key.fib_index = 0; + + b4_kv.key[0] = in2out_key->softwire_id.as_u64[0]; + b4_kv.key[1] = in2out_key->softwire_id.as_u64[1]; + + if (clib_bihash_search_16_8 + (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value)) + { + pool_get (dm->per_thread_data[thread_index].b4s, b4); + memset (b4, 0, sizeof (*b4)); + b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0]; + b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1]; + + pool_get (dm->per_thread_data[thread_index].list_pool, head_elt); + b4->sessions_per_b4_list_head_index = + head_elt - dm->per_thread_data[thread_index].list_pool; + clib_dlist_init (dm->per_thread_data[thread_index].list_pool, + b4->sessions_per_b4_list_head_index); + + b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s; + clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash, + &b4_kv, 1); + } + else + { + b4 = + pool_elt_at_index (dm->per_thread_data[thread_index].b4s, + b4_value.value); + } + + //TODO configurable quota + if (b4->nsessions >= 1000) + { + oldest_index = + clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool, + b4->sessions_per_b4_list_head_index); + ASSERT (oldest_index != ~0); + clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool, + b4->sessions_per_b4_list_head_index, oldest_index); + oldest_elt = + pool_elt_at_index (dm->per_thread_data[thread_index].list_pool, + oldest_index); + s = + pool_elt_at_index (dm->per_thread_data[thread_index].sessions, + oldest_elt->value); + + in2out_kv.key[0] = s->in2out.as_u64[0]; + in2out_kv.key[1] = s->in2out.as_u64[1]; + in2out_kv.key[2] = s->in2out.as_u64[2]; + clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out, + &in2out_kv, 0); + out2in_kv.key = s->out2in.as_u64; + clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in, + &out2in_kv, 0); + snat_free_outside_address_and_port (dm->addr_pool, thread_index, + &s->out2in, + s->outside_address_index); + s->outside_address_index = ~0; + + if (snat_alloc_outside_address_and_port + (dm->addr_pool, 0, thread_index, &out2in_key, + &s->outside_address_index, 0, dm->port_per_thread, thread_index)) + ASSERT (0); + } + else + { + if (snat_alloc_outside_address_and_port + (dm->addr_pool, 0, thread_index, &out2in_key, &address_index, 0, + dm->port_per_thread, thread_index)) + { + *error = DSLITE_ERROR_OUT_OF_PORTS; + return DSLITE_IN2OUT_NEXT_DROP; + } + pool_get (dm->per_thread_data[thread_index].sessions, s); + memset (s, 0, sizeof (*s)); + s->outside_address_index = address_index; + b4->nsessions++; + + pool_get (dm->per_thread_data[thread_index].list_pool, elt); + clib_dlist_init (dm->per_thread_data[thread_index].list_pool, + elt - dm->per_thread_data[thread_index].list_pool); + elt->value = s - dm->per_thread_data[thread_index].sessions; + s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool; + s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index; + clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool, + s->per_b4_list_head_index, + elt - dm->per_thread_data[thread_index].list_pool); + } + + s->in2out = *in2out_key; + s->out2in = out2in_key; + *sp = s; + in2out_kv.key[0] = s->in2out.as_u64[0]; + in2out_kv.key[1] = s->in2out.as_u64[1]; + in2out_kv.key[2] = s->in2out.as_u64[2]; + in2out_kv.value = s - dm->per_thread_data[thread_index].sessions; + clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out, + &in2out_kv, 1); + out2in_kv.key = s->out2in.as_u64; + out2in_kv.value = s - dm->per_thread_data[thread_index].sessions; + clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in, + &out2in_kv, 1); + + return next; +} + +static inline u32 +dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6, + ip4_header_t * ip4, dslite_session_t ** sp, u32 next, + u8 * error, u32 thread_index) +{ + dslite_session_t *s = 0; + icmp46_header_t *icmp = ip4_next_header (ip4); + clib_bihash_kv_24_8_t kv, value; + dslite_session_key_t key; + u32 n = next; + icmp_echo_header_t *echo; + u32 new_addr, old_addr; + u16 old_id, new_id; + ip_csum_t sum; + + if (icmp_is_error_message (icmp)) + { + n = DSLITE_IN2OUT_NEXT_DROP; + *error = DSLITE_ERROR_BAD_ICMP_TYPE; + goto done; + } + + echo = (icmp_echo_header_t *) (icmp + 1); + + key.addr = ip4->src_address; + key.port = echo->identifier; + key.proto = SNAT_PROTOCOL_ICMP; + key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0]; + key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1]; + key.pad = 0; + kv.key[0] = key.as_u64[0]; + kv.key[1] = key.as_u64[1]; + kv.key[2] = key.as_u64[2]; + + if (clib_bihash_search_24_8 + (&dm->per_thread_data[thread_index].in2out, &kv, &value)) + { + n = slow_path (dm, &key, &s, next, error, thread_index); + if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP)) + goto done; + } + else + { + s = + pool_elt_at_index (dm->per_thread_data[thread_index].sessions, + value.value); + } + + old_addr = ip4->src_address.as_u32; + ip4->src_address = s->out2in.addr; + new_addr = ip4->src_address.as_u32; + sum = ip4->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); + ip4->checksum = ip_csum_fold (sum); + + old_id = echo->identifier; + echo->identifier = new_id = s->out2in.port; + sum = icmp->checksum; + sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier); + icmp->checksum = ip_csum_fold (sum); + +done: + *sp = s; + return n; +} + +static inline uword +dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, u8 is_slow_path) +{ + u32 n_left_from, *from, *to_next; + dslite_in2out_next_t next_index; + u32 node_index; + vlib_node_runtime_t *error_node; + u32 thread_index = vlib_get_thread_index (); + f64 now = vlib_time_now (vm); + dslite_main_t *dm = &dslite_main; + + node_index = + is_slow_path ? dslite_in2out_slowpath_node. + index : dslite_in2out_node.index; + + error_node = vlib_node_get_runtime (vm, node_index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP; + ip4_header_t *ip40; + ip6_header_t *ip60; + u8 error0 = DSLITE_ERROR_IN2OUT; + u32 proto0; + dslite_session_t *s0 = 0; + clib_bihash_kv_24_8_t kv0, value0; + dslite_session_key_t key0; + udp_header_t *udp0; + tcp_header_t *tcp0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 old_port0, new_port0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip60 = vlib_buffer_get_current (b0); + + if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP)) + { + error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL; + next0 = DSLITE_IN2OUT_NEXT_DROP; + goto trace0; + } + + ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t); + proto0 = ip_proto_to_snat_proto (ip40->protocol); + + if (PREDICT_FALSE (proto0 == ~0)) + { + error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL; + next0 = DSLITE_IN2OUT_NEXT_DROP; + goto trace0; + } + + udp0 = ip4_next_header (ip40); + tcp0 = (tcp_header_t *) udp0; + + if (is_slow_path) + { + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = + dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0, + thread_index); + if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP)) + goto trace0; + + goto accounting0; + } + } + else + { + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = DSLITE_IN2OUT_NEXT_SLOWPATH; + goto trace0; + } + } + + key0.addr = ip40->src_address; + key0.port = udp0->src_port; + key0.proto = proto0; + key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0]; + key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1]; + key0.pad = 0; + kv0.key[0] = key0.as_u64[0]; + kv0.key[1] = key0.as_u64[1]; + kv0.key[2] = key0.as_u64[2]; + + if (clib_bihash_search_24_8 + (&dm->per_thread_data[thread_index].in2out, &kv0, &value0)) + { + if (is_slow_path) + { + next0 = + slow_path (dm, &key0, &s0, next0, &error0, thread_index); + if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP)) + goto trace0; + } + else + { + next0 = DSLITE_IN2OUT_NEXT_SLOWPATH; + goto trace0; + } + } + else + { + s0 = + pool_elt_at_index (dm->per_thread_data[thread_index].sessions, + value0.value); + } + + old_addr0 = ip40->src_address.as_u32; + ip40->src_address = s0->out2in.addr; + new_addr0 = ip40->src_address.as_u32; + sum0 = ip40->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + src_address); + ip40->checksum = ip_csum_fold (sum0); + if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; + + sum0 = tcp0->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, + length); + tcp0->checksum = ip_csum_fold (sum0); + } + else + { + old_port0 = udp0->src_port; + udp0->src_port = s0->out2in.port; + udp0->checksum = 0; + } + + accounting0: + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-B4 LRU list maintenance */ + clib_dlist_remove (dm->per_thread_data[thread_index].list_pool, + s0->per_b4_index); + clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool, + s0->per_b4_list_head_index, s0->per_b4_index); + + ip40->tos = + (clib_net_to_host_u32 + (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >> + 20; + vlib_buffer_advance (b0, sizeof (ip6_header_t)); + + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = + s0 - dm->per_thread_data[thread_index].sessions; + } + + b0->error = error_node->errors[error0]; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +dslite_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dslite_in2out_node_fn_inline (vm, node, frame, 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dslite_in2out_node) = { + .function = dslite_in2out_node_fn, + .name = "dslite-in2out", + .vector_size = sizeof (u32), + .format_trace = format_dslite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (dslite_in2out_error_strings), + .error_strings = dslite_in2out_error_strings, + .n_next_nodes = DSLITE_IN2OUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [DSLITE_IN2OUT_NEXT_DROP] = "error-drop", + [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [DSLITE_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_node, dslite_in2out_node_fn); + +static uword +dslite_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dslite_in2out_node_fn_inline (vm, node, frame, 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = { + .function = dslite_in2out_slowpath_node_fn, + .name = "dslite-in2out-slowpath", + .vector_size = sizeof (u32), + .format_trace = format_dslite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (dslite_in2out_error_strings), + .error_strings = dslite_in2out_error_strings, + .n_next_nodes = DSLITE_IN2OUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [DSLITE_IN2OUT_NEXT_DROP] = "error-drop", + [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [DSLITE_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_slowpath_node, + dslite_in2out_slowpath_node_fn); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/dslite_out2in.c b/src/plugins/nat/dslite_out2in.c new file mode 100644 index 00000000000..802b2a91544 --- /dev/null +++ b/src/plugins/nat/dslite_out2in.c @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +vlib_node_registration_t dslite_out2in_node; + +typedef enum +{ + DSLITE_OUT2IN_NEXT_IP4_LOOKUP, + DSLITE_OUT2IN_NEXT_IP6_LOOKUP, + DSLITE_OUT2IN_NEXT_DROP, + DSLITE_OUT2IN_N_NEXT, +} dslite_out2in_next_t; + +static char *dslite_out2in_error_strings[] = { +#define _(sym,string) string, + foreach_dslite_error +#undef _ +}; + +static inline u32 +dslite_icmp_out2in (dslite_main_t * dm, ip4_header_t * ip4, + dslite_session_t ** sp, u32 next, u8 * error, + u32 thread_index) +{ + dslite_session_t *s = 0; + icmp46_header_t *icmp = ip4_next_header (ip4); + clib_bihash_kv_8_8_t kv, value; + snat_session_key_t key; + u32 n = next; + icmp_echo_header_t *echo; + u32 new_addr, old_addr; + u16 old_id, new_id; + ip_csum_t sum; + + echo = (icmp_echo_header_t *) (icmp + 1); + + if (icmp_is_error_message (icmp) || (icmp->type != ICMP4_echo_reply)) + { + n = DSLITE_OUT2IN_NEXT_DROP; + *error = DSLITE_ERROR_BAD_ICMP_TYPE; + goto done; + } + + key.addr = ip4->dst_address; + key.port = echo->identifier; + key.protocol = SNAT_PROTOCOL_ICMP; + key.fib_index = 0; + kv.key = key.as_u64; + + if (clib_bihash_search_8_8 + (&dm->per_thread_data[thread_index].out2in, &kv, &value)) + { + next = DSLITE_OUT2IN_NEXT_DROP; + *error = DSLITE_ERROR_NO_TRANSLATION; + goto done; + } + else + { + s = + pool_elt_at_index (dm->per_thread_data[thread_index].sessions, + value.value); + } + + old_id = echo->identifier; + echo->identifier = new_id = s->in2out.port; + sum = icmp->checksum; + sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier); + icmp->checksum = ip_csum_fold (sum); + + old_addr = ip4->dst_address.as_u32; + ip4->dst_address = s->in2out.addr; + new_addr = ip4->dst_address.as_u32; + + sum = ip4->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); + ip4->checksum = ip_csum_fold (sum); + +done: + *sp = s; + return n; +} + +static uword +dslite_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + dslite_out2in_next_t next_index; + vlib_node_runtime_t *error_node; + u32 thread_index = vlib_get_thread_index (); + f64 now = vlib_time_now (vm); + dslite_main_t *dm = &dslite_main; + + error_node = vlib_node_get_runtime (vm, dslite_out2in_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = DSLITE_OUT2IN_NEXT_IP6_LOOKUP; + u8 error0 = DSLITE_ERROR_OUT2IN; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 proto0; + udp_header_t *udp0; + tcp_header_t *tcp0; + clib_bihash_kv_8_8_t kv0, value0; + snat_session_key_t key0; + dslite_session_t *s0 = 0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip40 = vlib_buffer_get_current (b0); + proto0 = ip_proto_to_snat_proto (ip40->protocol); + + if (PREDICT_FALSE (proto0 == ~0)) + { + error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL; + next0 = DSLITE_OUT2IN_NEXT_DROP; + goto trace0; + } + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = + dslite_icmp_out2in (dm, ip40, &s0, next0, &error0, + thread_index); + if (PREDICT_FALSE (next0 == DSLITE_OUT2IN_NEXT_DROP)) + goto trace0; + + goto encap0; + } + + udp0 = ip4_next_header (ip40); + tcp0 = (tcp_header_t *) udp0; + + key0.addr = ip40->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = 0; + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 + (&dm->per_thread_data[thread_index].out2in, &kv0, &value0)) + { + next0 = DSLITE_OUT2IN_NEXT_DROP; + error0 = DSLITE_ERROR_NO_TRANSLATION; + goto trace0; + } + else + { + s0 = + pool_elt_at_index (dm->per_thread_data[thread_index].sessions, + value0.value); + } + + old_addr0 = ip40->dst_address.as_u32; + ip40->dst_address = s0->in2out.addr; + new_addr0 = ip40->dst_address.as_u32; + + sum0 = ip40->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address); + ip40->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->dst_port; + tcp0->dst_port = s0->in2out.port; + new_port0 = tcp0->dst_port; + + sum0 = tcp0->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, + length); + tcp0->checksum = ip_csum_fold (sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = s0->in2out.port; + udp0->checksum = 0; + } + + encap0: + /* Construct IPv6 header */ + vlib_buffer_advance (b0, -(sizeof (ip6_header_t))); + ip60 = vlib_buffer_get_current (b0); + ip60->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip40->tos << 20)); + ip60->payload_length = ip40->length; + ip60->protocol = IP_PROTOCOL_IP_IN_IP; + ip60->hop_limit = ip40->ttl; + ip60->src_address.as_u64[0] = dm->aftr_ip6_addr.as_u64[0]; + ip60->src_address.as_u64[1] = dm->aftr_ip6_addr.as_u64[1]; + ip60->dst_address.as_u64[0] = s0->in2out.softwire_id.as_u64[0]; + ip60->dst_address.as_u64[1] = s0->in2out.softwire_id.as_u64[1]; + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-B4 LRU list maintenance */ + clib_dlist_remove (dm->per_thread_data[thread_index].list_pool, + s0->per_b4_index); + clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool, + s0->per_b4_list_head_index, s0->per_b4_index); + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = + s0 - dm->per_thread_data[thread_index].sessions; + } + + b0->error = error_node->errors[error0]; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dslite_out2in_node) = { + .function = dslite_out2in_node_fn, + .name = "dslite-out2in", + .vector_size = sizeof (u32), + .format_trace = format_dslite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (dslite_out2in_error_strings), + .error_strings = dslite_out2in_error_strings, + .n_next_nodes = DSLITE_OUT2IN_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [DSLITE_OUT2IN_NEXT_DROP] = "error-drop", + [DSLITE_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup", + [DSLITE_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (dslite_out2in_node, dslite_out2in_node_fn); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index 9f668d89f97..b0593900a93 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -366,12 +366,15 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, s->in2out.fib_index); snat_free_outside_address_and_port - (sm, thread_index, &s->out2in, s->outside_address_index); + (sm->addresses, thread_index, &s->out2in, s->outside_address_index); } s->outside_address_index = ~0; - if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, thread_index, - &key1, &address_index)) + if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0, + thread_index, &key1, + &address_index, sm->vrf_mode, + sm->port_per_thread, + sm->per_thread_data[thread_index].snat_thread_index)) { ASSERT(0); @@ -389,9 +392,11 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, { static_mapping = 0; /* Try to create dynamic translation */ - if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, + if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0, thread_index, &key1, - &address_index)) + &address_index, sm->vrf_mode, + sm->port_per_thread, + sm->per_thread_data[thread_index].snat_thread_index)) { b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; return SNAT_IN2OUT_NEXT_DROP; @@ -1265,7 +1270,8 @@ create_ses: s->out2in.port, s->in2out.fib_index); - snat_free_outside_address_and_port (sm, thread_index, &s->out2in, + snat_free_outside_address_and_port (sm->addresses, thread_index, + &s->out2in, s->outside_address_index); /* Remove in2out, out2in keys */ diff --git a/src/plugins/nat/nat.api b/src/plugins/nat/nat.api index 98a6f0673e4..187de25c8c1 100644 --- a/src/plugins/nat/nat.api +++ b/src/plugins/nat/nat.api @@ -1567,3 +1567,30 @@ define nat64_prefix_details { u8 prefix_len; u32 vrf_id; }; + + +/* + * DS-Lite APIs + */ + +/** \brief Add/delete address range to DS-Lite pool + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param start_addr - start address of the range + @param end_addr - end address of the range + @param is_add - 1 if add, 0 if delete +*/ +autoreply define dslite_add_del_pool_addr_range { + u32 client_index; + u32 context; + u8 start_addr[4]; + u8 end_addr[4]; + u8 is_add; +}; + +autoreply define dslite_set_aftr_addr { + u32 client_index; + u32 context; + u8 ip4_addr[4]; + u8 ip6_addr[16]; +}; diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 189c5940d9c..cd5a6eb8fa8 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -1448,12 +1449,14 @@ static clib_error_t * snat_init (vlib_main_t * vm) error = nat64_init(vm); + dslite_init(vm); + return error; } VLIB_INIT_FUNCTION (snat_init); -void snat_free_outside_address_and_port (snat_main_t * sm, +void snat_free_outside_address_and_port (snat_address_t * addresses, u32 thread_index, snat_session_key_t * k, u32 address_index) @@ -1461,9 +1464,9 @@ void snat_free_outside_address_and_port (snat_main_t * sm, snat_address_t *a; u16 port_host_byte_order = clib_net_to_host_u16 (k->port); - ASSERT (address_index < vec_len (sm->addresses)); + ASSERT (address_index < vec_len (addresses)); - a = sm->addresses + address_index; + a = addresses + address_index; switch (k->protocol) { @@ -1572,38 +1575,42 @@ int snat_static_mapping_match (snat_main_t * sm, } static_always_inline u16 -snat_random_port (snat_main_t * sm, u16 min, u16 max) +snat_random_port (u16 min, u16 max) { + snat_main_t *sm = &snat_main; return min + random_u32 (&sm->random_seed) / (random_u32_max() / (max - min + 1) + 1); } -int snat_alloc_outside_address_and_port (snat_main_t * sm, +int snat_alloc_outside_address_and_port (snat_address_t * addresses, u32 fib_index, u32 thread_index, snat_session_key_t * k, - u32 * address_indexp) + u32 * address_indexp, + u8 vrf_mode, + u16 port_per_thread, + u32 snat_thread_index) { int i; snat_address_t *a; u32 portnum; - for (i = 0; i < vec_len (sm->addresses); i++) + for (i = 0; i < vec_len (addresses); i++) { - a = sm->addresses + i; - if (sm->vrf_mode && a->fib_index != ~0 && a->fib_index != fib_index) + a = addresses + i; + if (vrf_mode && a->fib_index != ~0 && a->fib_index != fib_index) continue; switch (k->protocol) { #define _(N, j, n, s) \ case SNAT_PROTOCOL_##N: \ - if (a->busy_##n##_ports_per_thread[thread_index] < sm->port_per_thread) \ + if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \ { \ while (1) \ { \ - portnum = (sm->port_per_thread * \ - sm->per_thread_data[thread_index].snat_thread_index) + \ - snat_random_port(sm, 1, sm->port_per_thread) + 1024; \ + portnum = (port_per_thread * \ + snat_thread_index) + \ + snat_random_port(1, port_per_thread) + 1024; \ if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \ continue; \ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \ diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index aac46bfc4f8..b72e075df35 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -376,16 +376,19 @@ extern vlib_node_registration_t snat_det_out2in_node; extern vlib_node_registration_t snat_hairpin_dst_node; extern vlib_node_registration_t snat_hairpin_src_node; -void snat_free_outside_address_and_port (snat_main_t * sm, +void snat_free_outside_address_and_port (snat_address_t * addresses, u32 thread_index, snat_session_key_t * k, u32 address_index); -int snat_alloc_outside_address_and_port (snat_main_t * sm, +int snat_alloc_outside_address_and_port (snat_address_t * addresses, u32 fib_index, u32 thread_index, snat_session_key_t * k, - u32 * address_indexp); + u32 * address_indexp, + u8 vrf_mode, + u16 port_per_thread, + u32 snat_thread_index); int snat_static_mapping_match (snat_main_t * sm, snat_session_key_t match, diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index f80a5067a43..0ffa2f0e772 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -3289,6 +3290,87 @@ vl_api_nat64_prefix_dump_t_print (vl_api_nat64_prefix_dump_t * mp, FINISH; } +/***************/ +/*** DS-Lite ***/ +/***************/ + +static void +vl_api_dslite_set_aftr_addr_t_handler (vl_api_dslite_set_aftr_addr_t * mp) +{ + vl_api_dslite_set_aftr_addr_reply_t *rmp; + snat_main_t *sm = &snat_main; + dslite_main_t *dm = &dslite_main; + int rv = 0; + ip6_address_t ip6_addr; + + memcpy (&ip6_addr.as_u8, mp->ip6_addr, 16); + + rv = dslite_set_aftr_ip6_addr (dm, &ip6_addr); + + REPLY_MACRO (VL_API_DSLITE_SET_AFTR_ADDR_REPLY); +} + +static void * +vl_api_dslite_set_aftr_addr_t_print (vl_api_dslite_set_aftr_addr_t * mp, + void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: dslite_set_aftr_addr "); + s = format (s, "ip6_addr %U ip4_addr %U\n", + format_ip6_address, mp->ip6_addr, + format_ip4_address, mp->ip4_addr); + + FINISH; +} + +static void + vl_api_dslite_add_del_pool_addr_range_t_handler + (vl_api_dslite_add_del_pool_addr_range_t * mp) +{ + vl_api_dslite_add_del_pool_addr_range_reply_t *rmp; + snat_main_t *sm = &snat_main; + dslite_main_t *dm = &dslite_main; + int rv = 0; + ip4_address_t this_addr; + u32 start_host_order, end_host_order; + int i, count; + u32 *tmp; + + tmp = (u32 *) mp->start_addr; + start_host_order = clib_host_to_net_u32 (tmp[0]); + tmp = (u32 *) mp->end_addr; + end_host_order = clib_host_to_net_u32 (tmp[0]); + + count = (end_host_order - start_host_order) + 1; + memcpy (&this_addr.as_u8, mp->start_addr, 4); + + for (i = 0; i < count; i++) + { + if ((rv = dslite_add_del_pool_addr (dm, &this_addr, mp->is_add))) + goto send_reply; + + increment_v4_address (&this_addr); + } + +send_reply: + REPLY_MACRO (VL_API_DSLITE_ADD_DEL_POOL_ADDR_RANGE_REPLY); +} + +static void *vl_api_dslite_add_del_pool_addr_range_t_print + (vl_api_dslite_add_del_pool_addr_range_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: dslite_add_del_pool_addr_range "); + s = format (s, "%U - %U\n", + format_ip4_address, mp->start_addr, + format_ip4_address, mp->end_addr); + + FINISH; +} + + /* List of message types that this plugin understands */ #define foreach_snat_plugin_api_msg \ _(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range) \ @@ -3360,7 +3442,9 @@ _(NAT64_SET_TIMEOUTS, nat64_set_timeouts) \ _(NAT64_GET_TIMEOUTS, nat64_get_timeouts) \ _(NAT64_ST_DUMP, nat64_st_dump) \ _(NAT64_ADD_DEL_PREFIX, nat64_add_del_prefix) \ -_(NAT64_PREFIX_DUMP, nat64_prefix_dump) +_(NAT64_PREFIX_DUMP, nat64_prefix_dump) \ +_(DSLITE_ADD_DEL_POOL_ADDR_RANGE, dslite_add_del_pool_addr_range) \ +_(DSLITE_SET_AFTR_ADDR, dslite_set_aftr_addr) /* Set up the API message handling tables */ static clib_error_t * diff --git a/test/test_nat.py b/test/test_nat.py index 37e1b1e7b75..e420baffd1d 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -5,6 +5,7 @@ import unittest import struct from framework import VppTestCase, VppTestRunner, running_extended_tests +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto from scapy.layers.inet import IP, TCP, UDP, ICMP from scapy.layers.inet import IPerror, TCPerror, UDPerror, ICMPerror from scapy.layers.inet6 import IPv6, ICMPv6EchoRequest, ICMPv6EchoReply @@ -4007,5 +4008,152 @@ class TestNAT64(MethodHolder): self.logger.info(self.vapi.cli("show nat64 session table all")) self.clear_nat64() + +class TestDSlite(MethodHolder): + """ DS-Lite Test Cases """ + + @classmethod + def setUpClass(cls): + super(TestDSlite, cls).setUpClass() + + try: + cls.nat_addr = '10.0.0.3' + cls.nat_addr_n = socket.inet_pton(socket.AF_INET, cls.nat_addr) + + cls.create_pg_interfaces(range(2)) + cls.pg0.admin_up() + cls.pg0.config_ip4() + cls.pg0.resolve_arp() + cls.pg1.admin_up() + cls.pg1.config_ip6() + cls.pg1.generate_remote_hosts(2) + cls.pg1.configure_ipv6_neighbors() + + except Exception: + super(TestDSlite, cls).tearDownClass() + raise + + def test_dslite(self): + """ Test DS-Lite """ + self.vapi.dslite_add_del_pool_addr_range(self.nat_addr_n, + self.nat_addr_n) + aftr_ip4 = '192.0.0.1' + aftr_ip4_n = socket.inet_pton(socket.AF_INET, aftr_ip4) + aftr_ip6 = '2001:db8:85a3::8a2e:370:1' + aftr_ip6_n = socket.inet_pton(socket.AF_INET6, aftr_ip6) + self.vapi.dslite_set_aftr_addr(aftr_ip6_n, aftr_ip4_n) + + # UDP + p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IPv6(dst=aftr_ip6, src=self.pg1.remote_hosts[0].ip6) / + IP(dst=self.pg0.remote_ip4, src='192.168.1.1') / + UDP(sport=20000, dport=10000)) + self.pg1.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg0.get_capture(1) + capture = capture[0] + self.assertFalse(capture.haslayer(IPv6)) + self.assertEqual(capture[IP].src, self.nat_addr) + self.assertEqual(capture[IP].dst, self.pg0.remote_ip4) + self.assertNotEqual(capture[UDP].sport, 20000) + self.assertEqual(capture[UDP].dport, 10000) + self.check_ip_checksum(capture) + out_port = capture[UDP].sport + + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(dst=self.nat_addr, src=self.pg0.remote_ip4) / + UDP(sport=10000, dport=out_port)) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg1.get_capture(1) + capture = capture[0] + self.assertEqual(capture[IPv6].src, aftr_ip6) + self.assertEqual(capture[IPv6].dst, self.pg1.remote_hosts[0].ip6) + self.assertEqual(capture[IP].src, self.pg0.remote_ip4) + self.assertEqual(capture[IP].dst, '192.168.1.1') + self.assertEqual(capture[UDP].sport, 10000) + self.assertEqual(capture[UDP].dport, 20000) + self.check_ip_checksum(capture) + + # TCP + p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IPv6(dst=aftr_ip6, src=self.pg1.remote_hosts[1].ip6) / + IP(dst=self.pg0.remote_ip4, src='192.168.1.1') / + TCP(sport=20001, dport=10001)) + self.pg1.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg0.get_capture(1) + capture = capture[0] + self.assertFalse(capture.haslayer(IPv6)) + self.assertEqual(capture[IP].src, self.nat_addr) + self.assertEqual(capture[IP].dst, self.pg0.remote_ip4) + self.assertNotEqual(capture[TCP].sport, 20001) + self.assertEqual(capture[TCP].dport, 10001) + self.check_ip_checksum(capture) + self.check_tcp_checksum(capture) + out_port = capture[TCP].sport + + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(dst=self.nat_addr, src=self.pg0.remote_ip4) / + TCP(sport=10001, dport=out_port)) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg1.get_capture(1) + capture = capture[0] + self.assertEqual(capture[IPv6].src, aftr_ip6) + self.assertEqual(capture[IPv6].dst, self.pg1.remote_hosts[1].ip6) + self.assertEqual(capture[IP].src, self.pg0.remote_ip4) + self.assertEqual(capture[IP].dst, '192.168.1.1') + self.assertEqual(capture[TCP].sport, 10001) + self.assertEqual(capture[TCP].dport, 20001) + self.check_ip_checksum(capture) + self.check_tcp_checksum(capture) + + # ICMP + p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IPv6(dst=aftr_ip6, src=self.pg1.remote_hosts[1].ip6) / + IP(dst=self.pg0.remote_ip4, src='192.168.1.1') / + ICMP(id=4000, type='echo-request')) + self.pg1.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg0.get_capture(1) + capture = capture[0] + self.assertFalse(capture.haslayer(IPv6)) + self.assertEqual(capture[IP].src, self.nat_addr) + self.assertEqual(capture[IP].dst, self.pg0.remote_ip4) + self.assertNotEqual(capture[ICMP].id, 4000) + self.check_ip_checksum(capture) + self.check_icmp_checksum(capture) + out_id = capture[ICMP].id + + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(dst=self.nat_addr, src=self.pg0.remote_ip4) / + ICMP(id=out_id, type='echo-reply')) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg1.get_capture(1) + capture = capture[0] + self.assertEqual(capture[IPv6].src, aftr_ip6) + self.assertEqual(capture[IPv6].dst, self.pg1.remote_hosts[1].ip6) + self.assertEqual(capture[IP].src, self.pg0.remote_ip4) + self.assertEqual(capture[IP].dst, '192.168.1.1') + self.assertEqual(capture[ICMP].id, 4000) + self.check_ip_checksum(capture) + self.check_icmp_checksum(capture) + + def tearDown(self): + super(TestDSlite, self).tearDown() + if not self.vpp_dead: + self.logger.info(self.vapi.cli("show dslite pool")) + self.logger.info( + self.vapi.cli("show dslite aftr-tunnel-endpoint-address")) + self.logger.info(self.vapi.cli("show dslite sessions")) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index d84012b2d2b..468cf83d57b 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1647,6 +1647,34 @@ class VppPapiProvider(object): """ return self.api(self.papi.nat64_prefix_dump, {}) + def dslite_set_aftr_addr(self, ip6, ip4): + """Set DS-Lite AFTR addresses + + :param ip4: IPv4 address + :param ip6: IPv6 address + """ + return self.api( + self.papi.dslite_set_aftr_addr, + {'ip4_addr': ip4, + 'ip6_addr': ip6}) + + def dslite_add_del_pool_addr_range( + self, + start_addr, + end_addr, + is_add=1): + """Add/del address range to DS-Lite pool + + :param start_addr: First IP address + :param end_addr: Last IP address + :param is_add: 1 if add, 0 if delete (Default value = 1) + """ + return self.api( + self.papi.dslite_add_del_pool_addr_range, + {'start_addr': start_addr, + 'end_addr': end_addr, + 'is_add': is_add}) + def control_ping(self): self.api(self.papi.control_ping) -- 2.16.6