From f126e746fc01c75bc99329d10ce9127b26b23814 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Thu, 10 Oct 2019 09:46:06 +0000 Subject: [PATCH] nat: use SVR Remove NAT's implementation of shallow virtual reassembly with corresponding CLIs, APIs & tests. Replace with standalone shallow virtual reassembly provided by ipX-sv-reass* nodes. Type: refactor Change-Id: I7e6c7487a5a500d591f6871474a359e0993e59b6 Signed-off-by: Klement Sekera --- src/plugins/map/ip4_map_t.c | 7 +- src/plugins/map/ip6_map_t.c | 30 +- src/plugins/nat/CMakeLists.txt | 1 - src/plugins/nat/dslite_in2out.c | 2 +- src/plugins/nat/dslite_out2in.c | 3 +- src/plugins/nat/in2out.c | 703 ++++++++-------------------- src/plugins/nat/in2out_ed.c | 811 +++++++++----------------------- src/plugins/nat/nat.api | 79 ---- src/plugins/nat/nat.c | 184 ++++---- src/plugins/nat/nat.h | 28 +- src/plugins/nat/nat44_classify.c | 299 ++---------- src/plugins/nat/nat44_hairpinning.c | 6 +- src/plugins/nat/nat44_handoff.c | 30 +- src/plugins/nat/nat64.c | 69 +-- src/plugins/nat/nat64.h | 4 +- src/plugins/nat/nat64_in2out.c | 748 ++++++++---------------------- src/plugins/nat/nat64_out2in.c | 569 +++++++---------------- src/plugins/nat/nat66.c | 6 + src/plugins/nat/nat66_in2out.c | 2 +- src/plugins/nat/nat66_out2in.c | 2 +- src/plugins/nat/nat_api.c | 154 ------- src/plugins/nat/nat_det_in2out.c | 12 +- src/plugins/nat/nat_det_out2in.c | 12 +- src/plugins/nat/nat_format.c | 14 - src/plugins/nat/nat_inlines.h | 60 +-- src/plugins/nat/nat_reass.c | 893 ------------------------------------ src/plugins/nat/nat_reass.h | 340 -------------- src/plugins/nat/out2in.c | 691 ++++++++-------------------- src/plugins/nat/out2in_ed.c | 767 +++++++++---------------------- src/plugins/nat/test/test_nat.py | 188 +------- src/vnet/buffer.h | 45 +- src/vnet/ip/ip4_packet.h | 4 +- src/vnet/ip/ip4_to_ip6.h | 225 +-------- src/vnet/ip/ip6_packet.h | 61 +++ src/vnet/ip/ip6_to_ip4.h | 325 ++++--------- src/vnet/ip/reass/ip4_sv_reass.c | 208 ++++++++- src/vnet/ip/reass/ip4_sv_reass.h | 2 + src/vnet/ip/reass/ip6_sv_reass.c | 100 +++- src/vnet/vxlan-gbp/vxlan_gbp.h | 1 + 39 files changed, 1884 insertions(+), 5801 deletions(-) delete mode 100755 src/plugins/nat/nat_reass.c delete mode 100644 src/plugins/nat/nat_reass.h diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c index c254efc78b9..a02b55478c2 100644 --- a/src/plugins/map/ip4_map_t.c +++ b/src/plugins/map/ip4_map_t.c @@ -69,7 +69,8 @@ typedef struct } icmp_to_icmp6_ctx_t; static int -ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) +ip4_to_ip6_set_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *arg) { icmp_to_icmp6_ctx_t *ctx = arg; @@ -83,8 +84,8 @@ ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) } static int -ip4_to_ip6_set_inner_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, - void *arg) +ip4_to_ip6_set_inner_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *arg) { icmp_to_icmp6_ctx_t *ctx = arg; diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c index 7999507618a..95104dc78f4 100644 --- a/src/plugins/map/ip6_map_t.c +++ b/src/plugins/map/ip6_map_t.c @@ -145,9 +145,11 @@ ip6_map_t_icmp (vlib_main_t * vm, d0 = pool_elt_at_index (map_main.domains, vnet_buffer (p0)->map_t.map_domain_index); - ctx0.sender_port = ip6_get_port (ip60, 0, p0->current_length); ctx0.d = d0; - if (ctx0.sender_port == 0) + ctx0.sender_port = 0; + if (!ip6_get_port + (vm, p0, ip60, p0->current_length, NULL, &ctx0.sender_port, + NULL, NULL, NULL, NULL)) { // In case of 1:1 mapping, we don't care about the port if (!(d0->ea_bits_len == 0 && d0->rules)) @@ -157,9 +159,8 @@ ip6_map_t_icmp (vlib_main_t * vm, } } - if (icmp6_to_icmp - (p0, ip6_to_ip4_set_icmp_cb, &ctx0, - ip6_to_ip4_set_inner_icmp_cb, &ctx0)) + if (icmp6_to_icmp (vm, p0, ip6_to_ip4_set_icmp_cb, &ctx0, + ip6_to_ip4_set_inner_icmp_cb, &ctx0)) { error0 = MAP_ERROR_ICMP; goto err0; @@ -200,7 +201,7 @@ ip6_map_t_icmp (vlib_main_t * vm, * Translate IPv6 fragmented packet to IPv4. */ always_inline int -map_ip6_to_ip4_fragmented (vlib_buffer_t * p) +map_ip6_to_ip4_fragmented (vlib_main_t * vm, vlib_buffer_t * p) { ip6_header_t *ip6; ip6_frag_hdr_t *frag; @@ -214,7 +215,7 @@ map_ip6_to_ip4_fragmented (vlib_buffer_t * p) ip6 = vlib_buffer_get_current (p); if (ip6_parse - (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) + (vm, p, ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) return -1; frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); @@ -230,7 +231,7 @@ map_ip6_to_ip4_fragmented (vlib_buffer_t * p) ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); + ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label); ip4->length = u16_net_add (ip6->payload_length, sizeof (*ip4) - l4_offset + sizeof (*ip6)); @@ -276,7 +277,7 @@ ip6_map_t_fragmented (vlib_main_t * vm, next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; p0 = vlib_get_buffer (vm, pi0); - if (map_ip6_to_ip4_fragmented (p0)) + if (map_ip6_to_ip4_fragmented (vm, p0)) { p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP; @@ -306,7 +307,8 @@ ip6_map_t_fragmented (vlib_main_t * vm, * Translate IPv6 UDP/TCP packet to IPv4. */ always_inline int -map_ip6_to_ip4_tcp_udp (vlib_buffer_t * p, bool udp_checksum) +map_ip6_to_ip4_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p, + bool udp_checksum) { map_main_t *mm = &map_main; ip6_header_t *ip6; @@ -323,7 +325,7 @@ map_ip6_to_ip4_tcp_udp (vlib_buffer_t * p, bool udp_checksum) ip6 = vlib_buffer_get_current (p); if (ip6_parse - (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) + (vm, p, ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) return -1; if (l4_protocol == IP_PROTOCOL_TCP) @@ -370,7 +372,7 @@ map_ip6_to_ip4_tcp_udp (vlib_buffer_t * p, bool udp_checksum) ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); + ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label); ip4->length = u16_net_add (ip6->payload_length, sizeof (*ip4) + sizeof (*ip6) - l4_offset); @@ -429,7 +431,7 @@ ip6_map_t_tcp_udp (vlib_main_t * vm, p0 = vlib_get_buffer (vm, pi0); - if (map_ip6_to_ip4_tcp_udp (p0, true)) + if (map_ip6_to_ip4_tcp_udp (vm, p0, true)) { p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; next0 = IP6_MAPT_TCP_UDP_NEXT_DROP; @@ -512,7 +514,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; if (PREDICT_FALSE - (ip6_parse (ip60, p0->current_length, + (ip6_parse (vm, p0, ip60, p0->current_length, &(vnet_buffer (p0)->map_t.v6.l4_protocol), &(vnet_buffer (p0)->map_t.v6.l4_offset), &(vnet_buffer (p0)->map_t.v6.frag_offset)))) diff --git a/src/plugins/nat/CMakeLists.txt b/src/plugins/nat/CMakeLists.txt index 4f6ed67a3da..372bbd61bb3 100644 --- a/src/plugins/nat/CMakeLists.txt +++ b/src/plugins/nat/CMakeLists.txt @@ -23,7 +23,6 @@ add_vpp_plugin(nat nat_det.c nat_det_in2out.c nat_det_out2in.c - nat_reass.c nat_dpo.c nat44_cli.c nat44_handoff.c diff --git a/src/plugins/nat/dslite_in2out.c b/src/plugins/nat/dslite_in2out.c index 2a8b548c15f..4494a77701e 100644 --- a/src/plugins/nat/dslite_in2out.c +++ b/src/plugins/nat/dslite_in2out.c @@ -182,7 +182,7 @@ dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6, u16 old_id, new_id; ip_csum_t sum; - if (icmp_is_error_message (icmp)) + if (icmp_type_is_error_message (icmp->type)) { n = DSLITE_IN2OUT_NEXT_DROP; *error = DSLITE_ERROR_BAD_ICMP_TYPE; diff --git a/src/plugins/nat/dslite_out2in.c b/src/plugins/nat/dslite_out2in.c index 18f9a577923..265d79fc53e 100644 --- a/src/plugins/nat/dslite_out2in.c +++ b/src/plugins/nat/dslite_out2in.c @@ -46,7 +46,8 @@ dslite_icmp_out2in (dslite_main_t * dm, ip4_header_t * ip4, echo = (icmp_echo_header_t *) (icmp + 1); - if (icmp_is_error_message (icmp) || (icmp->type != ICMP4_echo_reply)) + if (icmp_type_is_error_message (icmp->type) + || (icmp->type != ICMP4_echo_reply)) { n = DSLITE_OUT2IN_NEXT_DROP; *error = DSLITE_ERROR_BAD_ICMP_TYPE; diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index 6cb111c9b3e..7eaaab29544 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -84,8 +83,6 @@ _(BAD_ICMP_TYPE, "unsupported ICMP type") \ _(NO_TRANSLATION, "no translation") \ _(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ _(DROP_FRAGMENT, "drop fragment") \ -_(MAX_REASS, "maximum reassemblies exceeded") \ -_(MAX_FRAG, "maximum fragments per reassembly exceeded")\ _(TCP_PACKETS, "TCP packets") \ _(UDP_PACKETS, "UDP packets") \ _(ICMP_PACKETS, "ICMP packets") \ @@ -114,7 +111,6 @@ typedef enum SNAT_IN2OUT_NEXT_DROP, SNAT_IN2OUT_NEXT_ICMP_ERROR, SNAT_IN2OUT_NEXT_SLOW_PATH, - SNAT_IN2OUT_NEXT_REASS, SNAT_IN2OUT_N_NEXT, } snat_in2out_next_t; @@ -255,7 +251,6 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0, snat_session_t *s = 0; clib_bihash_kv_8_8_t kv0; snat_session_key_t key1; - udp_header_t *udp0 = ip4_next_header (ip0); u8 is_sm = 0; nat_outside_fib_t *outside_fib; fib_node_index_t fei = FIB_NODE_INDEX_INVALID; @@ -357,7 +352,7 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0, break; } s->ext_host_addr.as_u32 = ip0->dst_address.as_u32; - s->ext_host_port = udp0->dst_port; + s->ext_host_port = vnet_buffer (b0)->ip.reass.l4_dst_port; *sessionp = s; /* Add to translation hashes */ @@ -401,7 +396,7 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0, #ifndef CLIB_MARCH_VARIANT static_always_inline - snat_in2out_error_t icmp_get_key (ip4_header_t * ip0, + snat_in2out_error_t icmp_get_key (vlib_buffer_t * b, ip4_header_t * ip0, snat_session_key_t * p_key0) { icmp46_header_t *icmp0; @@ -414,11 +409,12 @@ static_always_inline icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *) (icmp0 + 1); - if (!icmp_is_error_message (icmp0)) + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) { key0.protocol = SNAT_PROTOCOL_ICMP; key0.addr = ip0->src_address; - key0.port = echo0->identifier; + key0.port = vnet_buffer (b)->ip.reass.l4_src_port; // TODO fixme should this be dst port? } else { @@ -466,7 +462,6 @@ icmp_match_in2out_slow (snat_main_t * sm, vlib_node_runtime_t * node, snat_session_key_t * p_value, u8 * p_dont_translate, void *d, void *e) { - icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; snat_session_key_t key0; @@ -476,11 +471,10 @@ icmp_match_in2out_slow (snat_main_t * sm, vlib_node_runtime_t * node, u32 next0 = ~0; int err; - icmp0 = (icmp46_header_t *) ip4_next_header (ip0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); - err = icmp_get_key (ip0, &key0); + err = icmp_get_key (b0, ip0, &key0); if (err != -1) { b0->error = node->errors[err]; @@ -519,7 +513,9 @@ icmp_match_in2out_slow (snat_main_t * sm, vlib_node_runtime_t * node, } } - if (PREDICT_FALSE (icmp_is_error_message (icmp0))) + if (PREDICT_FALSE + (icmp_type_is_error_message + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))) { b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; next0 = SNAT_IN2OUT_NEXT_DROP; @@ -540,9 +536,13 @@ icmp_match_in2out_slow (snat_main_t * sm, vlib_node_runtime_t * node, } else { - if (PREDICT_FALSE (icmp0->type != ICMP4_echo_request && - icmp0->type != ICMP4_echo_reply && - !icmp_is_error_message (icmp0))) + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request + && vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) { b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; next0 = SNAT_IN2OUT_NEXT_DROP; @@ -585,7 +585,6 @@ icmp_match_in2out_fast (snat_main_t * sm, vlib_node_runtime_t * node, snat_session_key_t * p_value, u8 * p_dont_translate, void *d, void *e) { - icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; snat_session_key_t key0; @@ -595,11 +594,10 @@ icmp_match_in2out_fast (snat_main_t * sm, vlib_node_runtime_t * node, u32 next0 = ~0; int err; - icmp0 = (icmp46_header_t *) ip4_next_header (ip0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); - err = icmp_get_key (ip0, &key0); + err = icmp_get_key (b0, ip0, &key0); if (err != -1) { b0->error = node->errors[err]; @@ -619,7 +617,8 @@ icmp_match_in2out_fast (snat_main_t * sm, vlib_node_runtime_t * node, goto out; } - if (icmp_is_error_message (icmp0)) + if (icmp_type_is_error_message + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) { next0 = SNAT_IN2OUT_NEXT_DROP; goto out; @@ -630,9 +629,12 @@ icmp_match_in2out_fast (snat_main_t * sm, vlib_node_runtime_t * node, goto out; } - if (PREDICT_FALSE (icmp0->type != ICMP4_echo_request && - (icmp0->type != ICMP4_echo_reply || !is_addr_only) && - !icmp_is_error_message (icmp0))) + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_request + && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply || !is_addr_only) + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) { b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; next0 = SNAT_IN2OUT_NEXT_DROP; @@ -706,84 +708,90 @@ icmp_in2out (snat_main_t * sm, src_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); - if (icmp0->checksum == 0) - icmp0->checksum = 0xffff; - - if (!icmp_is_error_message (icmp0)) - { - new_id0 = sm0.port; - if (PREDICT_FALSE (new_id0 != echo0->identifier)) - { - old_id0 = echo0->identifier; - new_id0 = sm0.port; - echo0->identifier = new_id0; - - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, - identifier); - icmp0->checksum = ip_csum_fold (sum0); - } - } - else + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - inner_ip0 = (ip4_header_t *) (echo0 + 1); - l4_header = ip4_next_header (inner_ip0); + if (icmp0->checksum == 0) + icmp0->checksum = 0xffff; - if (!ip4_header_checksum_is_valid (inner_ip0)) + if (!icmp_type_is_error_message (icmp0->type)) { - next0 = SNAT_IN2OUT_NEXT_DROP; - goto out; + new_id0 = sm0.port; + if (PREDICT_FALSE (new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + new_id0 = sm0.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = + ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + } } - - /* update inner destination IP address */ - old_addr0 = inner_ip0->dst_address.as_u32; - inner_ip0->dst_address = sm0.addr; - new_addr0 = inner_ip0->dst_address.as_u32; - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address /* changed member */ ); - icmp0->checksum = ip_csum_fold (sum0); - - /* update inner IP header checksum */ - old_checksum0 = inner_ip0->checksum; - sum0 = inner_ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address /* changed member */ ); - inner_ip0->checksum = ip_csum_fold (sum0); - new_checksum0 = inner_ip0->checksum; - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t, - checksum); - icmp0->checksum = ip_csum_fold (sum0); - - switch (protocol) + else { - case SNAT_PROTOCOL_ICMP: - inner_icmp0 = (icmp46_header_t *) l4_header; - inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); + inner_ip0 = (ip4_header_t *) (echo0 + 1); + l4_header = ip4_next_header (inner_ip0); - old_id0 = inner_echo0->identifier; - new_id0 = sm0.port; - inner_echo0->identifier = new_id0; + if (!ip4_header_checksum_is_valid (inner_ip0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + /* update inner destination IP address */ + old_addr0 = inner_ip0->dst_address.as_u32; + inner_ip0->dst_address = sm0.addr; + new_addr0 = inner_ip0->dst_address.as_u32; sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, - identifier); + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address /* changed member */ ); icmp0->checksum = ip_csum_fold (sum0); - break; - case SNAT_PROTOCOL_UDP: - case SNAT_PROTOCOL_TCP: - old_id0 = ((tcp_udp_header_t *) l4_header)->dst_port; - new_id0 = sm0.port; - ((tcp_udp_header_t *) l4_header)->dst_port = new_id0; + /* update inner IP header checksum */ + old_checksum0 = inner_ip0->checksum; + sum0 = inner_ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address /* changed member */ ); + inner_ip0->checksum = ip_csum_fold (sum0); + new_checksum0 = inner_ip0->checksum; sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t, - dst_port); + sum0 = + ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t, + checksum); icmp0->checksum = ip_csum_fold (sum0); - break; - default: - ASSERT (0); + + switch (protocol) + { + case SNAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t *) l4_header; + inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); + + old_id0 = inner_echo0->identifier; + new_id0 = sm0.port; + inner_echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = + ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + old_id0 = ((tcp_udp_header_t *) l4_header)->dst_port; + new_id0 = sm0.port; + ((tcp_udp_header_t *) l4_header)->dst_port = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t, + dst_port); + icmp0->checksum = ip_csum_fold (sum0); + break; + default: + ASSERT (0); + } } } @@ -939,7 +947,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, b1 = vlib_get_buffer (vm, bi1); if (is_output_feature) - iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length; + iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length; ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0); @@ -999,13 +1007,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace00; } - if (ip4_is_fragment (ip0)) - { - next0 = SNAT_IN2OUT_NEXT_REASS; - fragments++; - goto trace00; - } - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; @@ -1014,7 +1015,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } key0.addr = ip0->src_address; - key0.port = udp0->src_port; + key0.port = vnet_buffer (b0)->ip.reass.l4_src_port; key0.protocol = proto0; key0.fib_index = rx_fib_index0; @@ -1029,13 +1030,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_output_feature) { - if (PREDICT_FALSE (nat_not_translate_output_feature (sm, - ip0, - proto0, - udp0->src_port, - udp0->dst_port, - thread_index, - sw_if_index0))) + if (PREDICT_FALSE + (nat_not_translate_output_feature + (sm, ip0, proto0, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + thread_index, sw_if_index0))) goto trace00; /* @@ -1045,7 +1045,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED) && proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port == + && (vnet_buffer (b0)->ip.reass.l4_dst_port == clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)))) goto trace00; @@ -1092,34 +1092,42 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, src_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); - old_port0 = udp0->src_port; - new_port0 = udp0->src_port = s0->out2in.port; if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - mss_clamping (sm, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); - tcp_packets++; - } - else - { - if (PREDICT_FALSE (udp0->checksum)) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum0 = udp0->checksum; + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + new_port0 = udp0->src_port = s0->out2in.port; + sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); - udp0->checksum = ip_csum_fold (sum0); + mss_clamping (sm, tcp0, &sum0); + tcp0->checksum = ip_csum_fold (sum0); + } + tcp_packets++; + } + else + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + if (PREDICT_FALSE (udp0->checksum)) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + new_port0 = udp0->src_port = s0->out2in.port; + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ + ); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } } udp_packets++; } @@ -1149,7 +1157,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, pkts_processed += next0 == SNAT_IN2OUT_NEXT_LOOKUP; if (is_output_feature) - iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length; + iph_offset1 = vnet_buffer (b1)->ip.reass.save_rewrite_length; ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) + iph_offset1); @@ -1207,13 +1215,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace01; } - if (ip4_is_fragment (ip1)) - { - next1 = SNAT_IN2OUT_NEXT_REASS; - fragments++; - goto trace01; - } - if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) { next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; @@ -1222,7 +1223,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } key1.addr = ip1->src_address; - key1.port = udp1->src_port; + key1.port = vnet_buffer (b1)->ip.reass.l4_src_port; key1.protocol = proto1; key1.fib_index = rx_fib_index1; @@ -1237,13 +1238,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_output_feature) { - if (PREDICT_FALSE (nat_not_translate_output_feature (sm, - ip1, - proto1, - udp1->src_port, - udp1->dst_port, - thread_index, - sw_if_index1))) + if (PREDICT_FALSE + (nat_not_translate_output_feature + (sm, ip1, proto1, + vnet_buffer (b1)->ip.reass.l4_src_port, + vnet_buffer (b1)->ip.reass.l4_dst_port, + thread_index, sw_if_index1))) goto trace01; /* @@ -1253,7 +1253,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE ((b1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED) && proto1 == SNAT_PROTOCOL_UDP - && (udp1->dst_port == + && (vnet_buffer (b1)->ip.reass.l4_dst_port == clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)))) goto trace01; @@ -1300,34 +1300,41 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, src_address /* changed member */ ); ip1->checksum = ip_csum_fold (sum1); - old_port1 = udp1->src_port; - new_port1 = udp1->src_port = s1->out2in.port; - if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP)) { - sum1 = tcp1->checksum; - sum1 = ip_csum_update (sum1, old_addr1, new_addr1, - ip4_header_t, - dst_address /* changed member */ ); - sum1 = ip_csum_update (sum1, old_port1, new_port1, - ip4_header_t /* cheat */ , - length /* changed member */ ); - mss_clamping (sm, tcp1, &sum1); - tcp1->checksum = ip_csum_fold (sum1); - tcp_packets++; - } - else - { - if (PREDICT_FALSE (udp1->checksum)) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum1 = udp1->checksum; + old_port1 = vnet_buffer (b1)->ip.reass.l4_src_port; + new_port1 = udp1->src_port = s1->out2in.port; + sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address /* changed member */ ); sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t /* cheat */ , length /* changed member */ ); - udp1->checksum = ip_csum_fold (sum1); + mss_clamping (sm, tcp1, &sum1); + tcp1->checksum = ip_csum_fold (sum1); + } + tcp_packets++; + } + else + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + if (PREDICT_FALSE (udp1->checksum)) + { + old_port1 = vnet_buffer (b1)->ip.reass.l4_src_port; + new_port1 = udp1->src_port = s1->out2in.port; + sum1 = udp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address /* changed member */ + ); + sum1 = + ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp1->checksum = ip_csum_fold (sum1); + } } udp_packets++; } @@ -1393,7 +1400,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, next0 = SNAT_IN2OUT_NEXT_LOOKUP; if (is_output_feature) - iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length; + iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length; ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0); @@ -1451,13 +1458,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace0; } - if (ip4_is_fragment (ip0)) - { - next0 = SNAT_IN2OUT_NEXT_REASS; - fragments++; - goto trace0; - } - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; @@ -1466,7 +1466,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } key0.addr = ip0->src_address; - key0.port = udp0->src_port; + key0.port = vnet_buffer (b0)->ip.reass.l4_src_port; key0.protocol = proto0; key0.fib_index = rx_fib_index0; @@ -1479,13 +1479,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_output_feature) { - if (PREDICT_FALSE (nat_not_translate_output_feature (sm, - ip0, - proto0, - udp0->src_port, - udp0->dst_port, - thread_index, - sw_if_index0))) + if (PREDICT_FALSE + (nat_not_translate_output_feature + (sm, ip0, proto0, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + thread_index, sw_if_index0))) goto trace0; /* @@ -1495,7 +1494,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED) && proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port == + && (vnet_buffer (b0)->ip.reass.l4_dst_port == clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)))) goto trace0; @@ -1543,34 +1542,41 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, src_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); - old_port0 = udp0->src_port; - new_port0 = udp0->src_port = s0->out2in.port; - if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - mss_clamping (sm, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); - tcp_packets++; - } - else - { - if (PREDICT_FALSE (udp0->checksum)) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum0 = udp0->checksum; + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + new_port0 = udp0->src_port = s0->out2in.port; + sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); - udp0->checksum = ip_csum_fold (sum0); + mss_clamping (sm, tcp0, &sum0); + tcp0->checksum = ip_csum_fold (sum0); + } + tcp_packets++; + } + else + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + if (PREDICT_FALSE (udp0->checksum)) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + new_port0 = udp0->src_port = s0->out2in.port; + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ + ); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } } udp_packets++; } @@ -1654,7 +1660,6 @@ VLIB_REGISTER_NODE (snat_in2out_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; /* *INDENT-ON* */ @@ -1687,7 +1692,6 @@ VLIB_REGISTER_NODE (snat_in2out_output_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; /* *INDENT-ON* */ @@ -1720,7 +1724,6 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; /* *INDENT-ON* */ @@ -1753,307 +1756,6 @@ VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", - }, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FN (nat44_in2out_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 n_left_from, *from, *to_next; - snat_in2out_next_t next_index; - u32 pkts_processed = 0, cached_fragments = 0; - snat_main_t *sm = &snat_main; - f64 now = vlib_time_now (vm); - u32 thread_index = vm->thread_index; - snat_main_per_thread_data_t *per_thread_data = - &sm->per_thread_data[thread_index]; - u32 *fragments_to_drop = 0; - u32 *fragments_to_loopback = 0; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0; - vlib_buffer_t *b0; - u32 next0; - u8 cached0 = 0; - ip4_header_t *ip0; - nat_reass_ip4_t *reass0; - udp_header_t *udp0; - tcp_header_t *tcp0; - icmp46_header_t *icmp0; - snat_session_key_t key0; - clib_bihash_kv_8_8_t kv0, value0; - snat_session_t *s0 = 0; - u16 old_port0, new_port0; - ip_csum_t sum0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - next0 = SNAT_IN2OUT_NEXT_LOOKUP; - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - rx_fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, - sw_if_index0); - - if (PREDICT_FALSE (nat_reass_is_drop_frag (0))) - { - next0 = SNAT_IN2OUT_NEXT_DROP; - b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - ip0 = (ip4_header_t *) vlib_buffer_get_current (b0); - udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; - icmp0 = (icmp46_header_t *) udp0; - proto0 = ip_proto_to_snat_proto (ip0->protocol); - - reass0 = nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, - 1, &fragments_to_drop); - - if (PREDICT_FALSE (!reass0)) - { - next0 = SNAT_IN2OUT_NEXT_DROP; - b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto trace0; - } - - if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) - { - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) - { - next0 = icmp_in2out_slow_path - (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, thread_index, &s0); - - if (PREDICT_TRUE (next0 != SNAT_IN2OUT_NEXT_DROP)) - { - if (s0) - reass0->sess_index = s0 - per_thread_data->sessions; - else - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - } - - goto trace0; - } - - key0.addr = ip0->src_address; - key0.port = udp0->src_port; - key0.protocol = proto0; - key0.fib_index = rx_fib_index0; - kv0.key = key0.as_u64; - - if (clib_bihash_search_8_8 - (&per_thread_data->in2out, &kv0, &value0)) - { - if (PREDICT_FALSE - (snat_not_translate - (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0, - thread_index))) - goto trace0; - - next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, thread_index, now); - - if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) - goto trace0; - - if (PREDICT_FALSE (!s0)) - goto trace0; - - reass0->sess_index = s0 - per_thread_data->sessions; - } - else - { - s0 = pool_elt_at_index (per_thread_data->sessions, - value0.value); - reass0->sess_index = value0.value; - } - nat_ip4_reass_get_frags (reass0, &fragments_to_loopback); - } - else - { - if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) - { - if (nat_ip4_reass_add_fragment - (thread_index, reass0, bi0, &fragments_to_drop)) - { - b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG]; - nat_elog_notice - ("maximum fragments per reassembly exceeded"); - next0 = SNAT_IN2OUT_NEXT_DROP; - goto trace0; - } - cached0 = 1; - goto trace0; - } - s0 = pool_elt_at_index (per_thread_data->sessions, - reass0->sess_index); - } - - old_addr0 = ip0->src_address.as_u32; - ip0->src_address = s0->out2in.addr; - new_addr0 = ip0->src_address.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - src_address /* changed member */ ); - ip0->checksum = ip_csum_fold (sum0); - - if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) - { - old_port0 = udp0->src_port; - new_port0 = udp0->src_port = s0->out2in.port; - - if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) - { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - tcp0->checksum = ip_csum_fold (sum0); - } - else if (PREDICT_FALSE (udp0->checksum)) - { - sum0 = udp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - udp0->checksum = ip_csum_fold (sum0); - } - } - - /* Hairpinning */ - nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port, - s0->ext_host_port, proto0, 0); - - /* Accounting */ - nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain (vm, b0), - thread_index); - /* Per-user LRU list maintenance */ - nat44_session_update_lru (sm, s0, thread_index); - - trace0: - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - nat44_reass_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->cached = cached0; - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - - if (cached0) - { - n_left_to_next++; - to_next--; - cached_fragments++; - } - else - { - pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy_fast (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy_fast (from, fragments_to_loopback + - (len - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, sm->in2out_reass_node_index, - SNAT_IN2OUT_ERROR_PROCESSED_FRAGMENTS, - pkts_processed); - vlib_node_increment_counter (vm, sm->in2out_reass_node_index, - SNAT_IN2OUT_ERROR_CACHED_FRAGMENTS, - cached_fragments); - - nat_send_all_to_node (vm, fragments_to_drop, node, - &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT], - SNAT_IN2OUT_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); - return frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_in2out_reass_node) = { - .name = "nat44-in2out-reass", - .vector_size = sizeof (u32), - .format_trace = format_nat44_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(snat_in2out_error_strings), - .error_strings = snat_in2out_error_strings, - - .n_next_nodes = SNAT_IN2OUT_N_NEXT, - .next_nodes = { - [SNAT_IN2OUT_NEXT_DROP] = "error-drop", - [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", - [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", - [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; /* *INDENT-ON* */ @@ -2264,7 +1966,6 @@ VLIB_REGISTER_NODE (snat_in2out_fast_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; /* *INDENT-ON* */ diff --git a/src/plugins/nat/in2out_ed.c b/src/plugins/nat/in2out_ed.c index f8cd89fb8de..0209a4059db 100644 --- a/src/plugins/nat/in2out_ed.c +++ b/src/plugins/nat/in2out_ed.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -189,8 +188,7 @@ slow_path_ed (snat_main_t * sm, u32 rx_fib_index, clib_bihash_kv_16_8_t * kv, snat_session_t ** sessionp, - vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now, - tcp_header_t * tcp) + vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now) { snat_session_t *s = 0; snat_user_t *u; @@ -254,7 +252,8 @@ slow_path_ed (snat_main_t * sm, if (proto == SNAT_PROTOCOL_TCP) { - if (!tcp_is_init (tcp)) + if (!tcp_flags_is_init + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) { b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; return NAT_NEXT_DROP; @@ -405,7 +404,6 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, { nat_ed_ses_key_t key; clib_bihash_kv_16_8_t kv, value; - udp_header_t *udp; snat_session_t *s = 0; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; @@ -415,7 +413,7 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, if (ip->protocol == IP_PROTOCOL_ICMP) { key.as_u64[0] = key.as_u64[1] = 0; - if (get_icmp_i2o_ed_key (ip, &key)) + if (get_icmp_i2o_ed_key (b, ip, &key)) return 0; key.fib_index = 0; kv.key[0] = key.as_u64[0]; @@ -423,9 +421,9 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, } else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) { - udp = ip4_next_header (ip); make_ed_kv (&kv, &ip->src_address, &ip->dst_address, ip->protocol, 0, - udp->src_port, udp->dst_port); + vnet_buffer (b)->ip.reass.l4_src_port, + vnet_buffer (b)->ip.reass.l4_dst_port); } else { @@ -440,8 +438,7 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, { if (ip->protocol == IP_PROTOCOL_TCP) { - tcp_header_t *tcp = ip4_next_header (ip); - if (nat44_set_tcp_session_state_i2o (sm, s, tcp, thread_index)) + if (nat44_set_tcp_session_state_i2o (sm, s, b, thread_index)) return 1; } /* Accounting */ @@ -518,7 +515,6 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, u8 * p_proto, snat_session_key_t * p_value, u8 * p_dont_translate, void *d, void *e) { - icmp46_header_t *icmp; u32 sw_if_index; u32 rx_fib_index; nat_ed_ses_key_t key; @@ -529,12 +525,11 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, int err; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - icmp = (icmp46_header_t *) ip4_next_header (ip); sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); key.as_u64[0] = key.as_u64[1] = 0; - err = get_icmp_i2o_ed_key (ip, &key); + err = get_icmp_i2o_ed_key (b, ip, &key); if (err != 0) { b->error = node->errors[err]; @@ -550,18 +545,10 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, { if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0) { - if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (sm, ip, - key.proto, - key. - l_port, - key. - r_port, - thread_index, - sw_if_index, - vnet_buffer - (b)-> - sw_if_index - [VLIB_TX]))) + if (PREDICT_FALSE + (nat44_ed_not_translate_output_feature + (sm, ip, key.proto, key.l_port, key.r_port, thread_index, + sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_TX]))) { dont_translate = 1; goto out; @@ -579,7 +566,9 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, } } - if (PREDICT_FALSE (icmp_is_error_message (icmp))) + if (PREDICT_FALSE + (icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) { b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; next = NAT_NEXT_DROP; @@ -587,7 +576,7 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, } next = slow_path_ed (sm, b, rx_fib_index, &kv, &s, node, next, - thread_index, vlib_time_now (sm->vlib_main), 0); + thread_index, vlib_time_now (sm->vlib_main)); if (PREDICT_FALSE (next == NAT_NEXT_DROP)) goto out; @@ -600,9 +589,13 @@ icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, } else { - if (PREDICT_FALSE (icmp->type != ICMP4_echo_request && - icmp->type != ICMP4_echo_reply && - !icmp_is_error_message (icmp))) + if (PREDICT_FALSE + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request + && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply + && !icmp_type_is_error_message (vnet_buffer (b)->ip. + reass.icmp_type_or_tcp_flags))) { b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; next = NAT_NEXT_DROP; @@ -837,14 +830,11 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, u32 thread_index = vm->thread_index; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets = - 0, fragments = 0, def_slow, def_reass; + 0, def_slow; def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH : NAT_NEXT_IN2OUT_ED_SLOW_PATH; - def_reass = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_REASS : - NAT_NEXT_IN2OUT_ED_REASS; - stats_node_index = is_slow_path ? sm->ed_in2out_slowpath_node_index : sm->ed_in2out_node_index; @@ -910,8 +900,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, vnet_feature_next (&nat_buffer_opaque (b1)->arc_next, b1); } - iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length; - iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length; + iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length; + iph_offset1 = vnet_buffer (b1)->ip.reass.save_rewrite_length; } next0 = nat_buffer_opaque (b0)->arc_next; @@ -971,13 +961,6 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, goto trace00; } - if (ip4_is_fragment (ip0)) - { - next0 = def_reass; - fragments++; - goto trace00; - } - if (is_output_feature) { if (PREDICT_FALSE @@ -994,8 +977,9 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, } make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, - ip0->protocol, rx_fib_index0, udp0->src_port, - udp0->dst_port); + ip0->protocol, rx_fib_index0, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port); if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0)) { @@ -1005,8 +989,10 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (nat44_ed_not_translate_output_feature - (sm, ip0, ip0->protocol, udp0->src_port, - udp0->dst_port, thread_index, sw_if_index0, + (sm, ip0, ip0->protocol, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + thread_index, sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX]))) goto trace00; @@ -1017,7 +1003,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED) && proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port == + && (vnet_buffer (b0)->ip.reass.l4_dst_port == clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)))) goto trace00; @@ -1034,7 +1020,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node, - next0, thread_index, now, tcp0); + next0, thread_index, now); if (PREDICT_FALSE (next0 == NAT_NEXT_DROP)) goto trace00; @@ -1069,36 +1055,44 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, dst_address); ip0->checksum = ip_csum_fold (sum0); - old_port0 = udp0->src_port; - new_port0 = udp0->src_port = s0->out2in.port; + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, tcp0->dst_port, - s0->ext_host_port, ip4_header_t, - length); - tcp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; + new_port0 = udp0->src_port = s0->out2in.port; + sum0 = tcp0->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, + length); + if (PREDICT_FALSE (is_twice_nat_session (s0))) + { + sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, + s0->ext_host_addr.as_u32, + ip4_header_t, dst_address); + sum0 = + ip_csum_update (sum0, + vnet_buffer (b0)->ip. + reass.l4_dst_port, s0->ext_host_port, + ip4_header_t, length); + tcp0->dst_port = s0->ext_host_port; + ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; + } + mss_clamping (sm, tcp0, &sum0); + tcp0->checksum = ip_csum_fold (sum0); } - mss_clamping (sm, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); tcp_packets++; - if (nat44_set_tcp_session_state_i2o - (sm, s0, tcp0, thread_index)) + if (nat44_set_tcp_session_state_i2o (sm, s0, b0, thread_index)) goto trace00; } - else if (udp0->checksum) + else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment + && udp0->checksum) { + new_port0 = udp0->src_port = s0->out2in.port; sum0 = udp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address); @@ -1109,9 +1103,10 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, s0->ext_host_addr.as_u32, ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, tcp0->dst_port, - s0->ext_host_port, ip4_header_t, - length); + sum0 = + ip_csum_update (sum0, + vnet_buffer (b0)->ip.reass.l4_dst_port, + s0->ext_host_port, ip4_header_t, length); udp0->dst_port = s0->ext_host_port; ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; } @@ -1120,12 +1115,16 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, } else { - if (PREDICT_FALSE (is_twice_nat_session (s0))) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; + new_port0 = udp0->src_port = s0->out2in.port; + if (PREDICT_FALSE (is_twice_nat_session (s0))) + { + udp0->dst_port = s0->ext_host_port; + ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; + } + udp_packets++; } - udp_packets++; } /* Accounting */ @@ -1181,8 +1180,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, { s1 = nat44_ed_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1, - thread_index, now, vm, - node); + thread_index, now, + vm, node); if (!s1) next1 = NAT_NEXT_DROP; other_packets++; @@ -1192,8 +1191,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) { next1 = icmp_in2out_ed_slow_path - (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, - next1, now, thread_index, &s1); + (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, + node, next1, now, thread_index, &s1); icmp_packets++; goto trace01; } @@ -1206,13 +1205,6 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, goto trace01; } - if (ip4_is_fragment (ip1)) - { - next1 = def_reass; - fragments++; - goto trace01; - } - if (is_output_feature) { if (PREDICT_FALSE @@ -1229,8 +1221,9 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, } make_ed_kv (&kv1, &ip1->src_address, &ip1->dst_address, - ip1->protocol, rx_fib_index1, udp1->src_port, - udp1->dst_port); + ip1->protocol, rx_fib_index1, + vnet_buffer (b1)->ip.reass.l4_src_port, + vnet_buffer (b1)->ip.reass.l4_dst_port); if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv1, &value1)) { @@ -1240,8 +1233,10 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (nat44_ed_not_translate_output_feature - (sm, ip1, ip1->protocol, udp1->src_port, - udp1->dst_port, thread_index, sw_if_index1, + (sm, ip1, ip1->protocol, + vnet_buffer (b1)->ip.reass.l4_src_port, + vnet_buffer (b1)->ip.reass.l4_dst_port, + thread_index, sw_if_index1, vnet_buffer (b1)->sw_if_index[VLIB_TX]))) goto trace01; @@ -1252,7 +1247,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE ((b1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED) && proto1 == SNAT_PROTOCOL_UDP - && (udp1->dst_port == + && (vnet_buffer (b1)->ip.reass.l4_dst_port == clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)))) goto trace01; @@ -1261,7 +1256,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index1, - ip1, proto1, + ip1, + proto1, rx_fib_index1, thread_index))) goto trace01; @@ -1269,7 +1265,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, next1 = slow_path_ed (sm, b1, rx_fib_index1, &kv1, &s1, node, - next1, thread_index, now, tcp1); + next1, thread_index, now); if (PREDICT_FALSE (next1 == NAT_NEXT_DROP)) goto trace01; @@ -1304,50 +1300,62 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, dst_address); ip1->checksum = ip_csum_fold (sum1); - old_port1 = udp1->src_port; - new_port1 = udp1->src_port = s1->out2in.port; + old_port1 = vnet_buffer (b1)->ip.reass.l4_src_port; if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP)) { - sum1 = tcp1->checksum; - sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, - dst_address); - sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s1))) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32, - s1->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum1 = ip_csum_update (sum1, tcp1->dst_port, - s1->ext_host_port, ip4_header_t, - length); - tcp1->dst_port = s1->ext_host_port; - ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32; + new_port1 = udp1->src_port = s1->out2in.port; + sum1 = tcp1->checksum; + sum1 = + ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, dst_address); + sum1 = + ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t, length); + if (PREDICT_FALSE (is_twice_nat_session (s1))) + { + sum1 = + ip_csum_update (sum1, ip1->dst_address.as_u32, + s1->ext_host_addr.as_u32, + ip4_header_t, dst_address); + sum1 = + ip_csum_update (sum1, + vnet_buffer (b1)->ip. + reass.l4_dst_port, s1->ext_host_port, + ip4_header_t, length); + tcp1->dst_port = s1->ext_host_port; + ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32; + } + tcp1->checksum = ip_csum_fold (sum1); + mss_clamping (sm, tcp1, &sum1); } - tcp1->checksum = ip_csum_fold (sum1); - mss_clamping (sm, tcp1, &sum1); tcp_packets++; - if (nat44_set_tcp_session_state_i2o - (sm, s1, tcp1, thread_index)) + if (nat44_set_tcp_session_state_i2o (sm, s1, b1, thread_index)) goto trace01; } - else if (udp1->checksum) + else if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment + && udp1->checksum) { + new_port1 = udp1->src_port = s1->out2in.port; sum1 = udp1->checksum; - sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, - dst_address); - sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t, - length); + sum1 = + ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, + dst_address); + sum1 = + ip_csum_update (sum1, old_port1, new_port1, ip4_header_t, + length); if (PREDICT_FALSE (is_twice_nat_session (s1))) { sum1 = ip_csum_update (sum1, ip1->dst_address.as_u32, s1->ext_host_addr.as_u32, ip4_header_t, dst_address); - sum1 = ip_csum_update (sum1, tcp1->dst_port, - s1->ext_host_port, ip4_header_t, - length); + sum1 = + ip_csum_update (sum1, + vnet_buffer (b1)->ip.reass.l4_dst_port, + s1->ext_host_port, ip4_header_t, length); udp1->dst_port = s1->ext_host_port; ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32; } @@ -1356,17 +1364,22 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, } else { - if (PREDICT_FALSE (is_twice_nat_session (s1))) + if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) { - udp1->dst_port = s1->ext_host_port; - ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32; + new_port1 = udp1->src_port = s1->out2in.port; + if (PREDICT_FALSE (is_twice_nat_session (s1))) + { + udp1->dst_port = s1->ext_host_port; + ip1->dst_address.as_u32 = s1->ext_host_addr.as_u32; + } } udp_packets++; } /* Accounting */ nat44_session_update_counters (s1, now, - vlib_buffer_length_in_chain (vm, b1), + vlib_buffer_length_in_chain (vm, + b1), thread_index); /* Per-user LRU list maintenance */ nat44_session_update_lru (sm, s1, thread_index); @@ -1426,7 +1439,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE (!is_slow_path)) vnet_feature_next (&nat_buffer_opaque (b0)->arc_next, b0); - iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length; + iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length; } next0 = nat_buffer_opaque (b0)->arc_next; @@ -1460,8 +1473,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, { s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, - thread_index, now, vm, - node); + thread_index, now, + vm, node); if (!s0) next0 = NAT_NEXT_DROP; other_packets++; @@ -1471,8 +1484,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = icmp_in2out_ed_slow_path - (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, thread_index, &s0); + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, + node, next0, now, thread_index, &s0); icmp_packets++; goto trace0; } @@ -1485,13 +1498,6 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, goto trace0; } - if (ip4_is_fragment (ip0)) - { - next0 = def_reass; - fragments++; - goto trace0; - } - if (is_output_feature) { if (PREDICT_FALSE @@ -1508,8 +1514,9 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, } make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, - ip0->protocol, rx_fib_index0, udp0->src_port, - udp0->dst_port); + ip0->protocol, rx_fib_index0, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port); if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0)) { @@ -1519,8 +1526,10 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (nat44_ed_not_translate_output_feature - (sm, ip0, ip0->protocol, udp0->src_port, - udp0->dst_port, thread_index, sw_if_index0, + (sm, ip0, ip0->protocol, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + thread_index, sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX]))) goto trace0; @@ -1531,7 +1540,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED) && proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port == + && (vnet_buffer (b0)->ip.reass.l4_dst_port == clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)))) goto trace0; @@ -1540,7 +1549,8 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index0, - ip0, proto0, + ip0, + proto0, rx_fib_index0, thread_index))) goto trace0; @@ -1548,7 +1558,7 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, &s0, node, - next0, thread_index, now, tcp0); + next0, thread_index, now); if (PREDICT_FALSE (next0 == NAT_NEXT_DROP)) goto trace0; @@ -1583,49 +1593,61 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, dst_address); ip0->checksum = ip_csum_fold (sum0); - old_port0 = udp0->src_port; - new_port0 = udp0->src_port = s0->out2in.port; + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, tcp0->dst_port, - s0->ext_host_port, ip4_header_t, - length); - tcp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; + new_port0 = udp0->src_port = s0->out2in.port; + sum0 = tcp0->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, dst_address); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t, length); + if (PREDICT_FALSE (is_twice_nat_session (s0))) + { + sum0 = + ip_csum_update (sum0, ip0->dst_address.as_u32, + s0->ext_host_addr.as_u32, + ip4_header_t, dst_address); + sum0 = + ip_csum_update (sum0, + vnet_buffer (b0)->ip. + reass.l4_dst_port, s0->ext_host_port, + ip4_header_t, length); + tcp0->dst_port = s0->ext_host_port; + ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; + } + mss_clamping (sm, tcp0, &sum0); + tcp0->checksum = ip_csum_fold (sum0); } - mss_clamping (sm, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); tcp_packets++; - if (nat44_set_tcp_session_state_i2o - (sm, s0, tcp0, thread_index)) + if (nat44_set_tcp_session_state_i2o (sm, s0, b0, thread_index)) goto trace0; } - else if (udp0->checksum) + else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment + && udp0->checksum) { + new_port0 = udp0->src_port = s0->out2in.port; sum0 = udp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, + length); if (PREDICT_FALSE (is_twice_nat_session (s0))) { sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, s0->ext_host_addr.as_u32, ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, tcp0->dst_port, - s0->ext_host_port, ip4_header_t, - length); + sum0 = + ip_csum_update (sum0, + vnet_buffer (b0)->ip.reass.l4_dst_port, + s0->ext_host_port, ip4_header_t, length); udp0->dst_port = s0->ext_host_port; ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; } @@ -1634,18 +1656,22 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, } else { - if (PREDICT_FALSE (is_twice_nat_session (s0))) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; + new_port0 = udp0->src_port = s0->out2in.port; + if (PREDICT_FALSE (is_twice_nat_session (s0))) + { + udp0->dst_port = s0->ext_host_port; + ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; + } + udp_packets++; } - udp_packets++; } /* Accounting */ nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain (vm, b0), - thread_index); + vlib_buffer_length_in_chain + (vm, b0), thread_index); /* Per-user LRU list maintenance */ nat44_session_update_lru (sm, s0, thread_index); @@ -1687,379 +1713,6 @@ nat44_ed_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_increment_counter (vm, stats_node_index, NAT_IN2OUT_ED_ERROR_OTHER_PACKETS, other_packets); - vlib_node_increment_counter (vm, stats_node_index, - NAT_IN2OUT_ED_ERROR_FRAGMENTS, fragments); - - return frame->n_vectors; -} - -static inline uword -nat44_ed_in2out_reass_node_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - int is_output_feature) -{ - u32 n_left_from, *from, *to_next; - nat_next_t next_index; - u32 pkts_processed = 0, cached_fragments = 0; - snat_main_t *sm = &snat_main; - f64 now = vlib_time_now (vm); - u32 thread_index = vm->thread_index; - snat_main_per_thread_data_t *per_thread_data = - &sm->per_thread_data[thread_index]; - u32 *fragments_to_drop = 0; - u32 *fragments_to_loopback = 0; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0; - u32 iph_offset0 = 0; - vlib_buffer_t *b0; - u32 next0; - u8 cached0 = 0; - ip4_header_t *ip0 = 0; - nat_reass_ip4_t *reass0; - udp_header_t *udp0; - tcp_header_t *tcp0; - icmp46_header_t *icmp0; - clib_bihash_kv_16_8_t kv0, value0; - snat_session_t *s0 = 0; - u16 old_port0, new_port0; - ip_csum_t sum0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - next0 = nat_buffer_opaque (b0)->arc_next; - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - rx_fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, - sw_if_index0); - - if (PREDICT_FALSE (nat_reass_is_drop_frag (0))) - { - next0 = NAT_NEXT_DROP; - b0->error = node->errors[NAT_IN2OUT_ED_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - if (is_output_feature) - iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length; - - ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + - iph_offset0); - - udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; - icmp0 = (icmp46_header_t *) udp0; - proto0 = ip_proto_to_snat_proto (ip0->protocol); - - reass0 = nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, - 1, &fragments_to_drop); - - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT_NEXT_DROP; - b0->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto trace0; - } - - if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) - { - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) - { - if (is_output_feature) - { - if (PREDICT_FALSE - (nat_not_translate_output_feature_fwd - (sm, ip0, thread_index, now, vm, b0))) - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - goto trace0; - } - - next0 = icmp_in2out_ed_slow_path - (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, thread_index, &s0); - - if (PREDICT_TRUE (next0 != NAT_NEXT_DROP)) - { - if (s0) - reass0->sess_index = s0 - per_thread_data->sessions; - else - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - } - - goto trace0; - } - - make_ed_kv (&kv0, &ip0->src_address, &ip0->dst_address, - ip0->protocol, rx_fib_index0, udp0->src_port, - udp0->dst_port); - - if (clib_bihash_search_16_8 - (&per_thread_data->in2out_ed, &kv0, &value0)) - { - if (is_output_feature) - { - if (PREDICT_FALSE - (nat44_ed_not_translate_output_feature - (sm, ip0, ip0->protocol, udp0->src_port, - udp0->dst_port, thread_index, sw_if_index0, - vnet_buffer (b0)->sw_if_index[VLIB_TX]))) - { - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - goto trace0; - } - - /* - * Send DHCP packets to the ipv4 stack, or we won't - * be able to use dhcp client on the outside interface - */ - if (PREDICT_FALSE - ((b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED) - && proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port == - clib_host_to_net_u16 - (UDP_DST_PORT_dhcp_to_server)))) - goto trace0; - } - else - { - if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, - sw_if_index0, - ip0, proto0, - rx_fib_index0, - thread_index))) - { - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - goto trace0; - } - } - - next0 = slow_path_ed (sm, b0, rx_fib_index0, &kv0, - &s0, node, next0, thread_index, now, - tcp0); - - if (PREDICT_FALSE (next0 == NAT_NEXT_DROP)) - goto trace0; - - if (PREDICT_FALSE (!s0)) - { - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - goto trace0; - } - - reass0->sess_index = s0 - per_thread_data->sessions; - } - else - { - s0 = pool_elt_at_index (per_thread_data->sessions, - value0.value); - reass0->sess_index = value0.value; - } - nat_ip4_reass_get_frags (reass0, &fragments_to_loopback); - } - else - { - if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE) - goto trace0; - if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) - { - if (nat_ip4_reass_add_fragment - (thread_index, reass0, bi0, &fragments_to_drop)) - { - b0->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_FRAG]; - nat_elog_notice - ("maximum fragments per reassembly exceeded"); - next0 = NAT_NEXT_DROP; - goto trace0; - } - cached0 = 1; - goto trace0; - } - s0 = pool_elt_at_index (per_thread_data->sessions, - reass0->sess_index); - } - - old_addr0 = ip0->src_address.as_u32; - ip0->src_address = s0->out2in.addr; - new_addr0 = ip0->src_address.as_u32; - if (!is_output_feature) - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - src_address /* changed member */ ); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - ip0->checksum = ip_csum_fold (sum0); - - if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) - { - old_port0 = udp0->src_port; - new_port0 = udp0->src_port = s0->out2in.port; - - if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) - { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, tcp0->dst_port, - s0->ext_host_port, ip4_header_t, - length); - tcp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - tcp0->checksum = ip_csum_fold (sum0); - } - else if (udp0->checksum) - { - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, tcp0->dst_port, - s0->ext_host_port, ip4_header_t, - length); - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); - } - else - { - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - } - } - - /* Hairpinning */ - nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port, - s0->ext_host_port, proto0, 1); - - /* Accounting */ - nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain (vm, b0), - thread_index); - /* Per-user LRU list maintenance */ - nat44_session_update_lru (sm, s0, thread_index); - - trace0: - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - nat44_reass_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->cached = cached0; - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - - if (cached0) - { - n_left_to_next++; - to_next--; - cached_fragments++; - } - else - { - pkts_processed += next0 == nat_buffer_opaque (b0)->arc_next; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy_fast (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy_fast (from, fragments_to_loopback + - (len - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, sm->ed_in2out_reass_node_index, - NAT_IN2OUT_ED_ERROR_PROCESSED_FRAGMENTS, - pkts_processed); - vlib_node_increment_counter (vm, sm->ed_in2out_reass_node_index, - NAT_IN2OUT_ED_ERROR_CACHED_FRAGMENTS, - cached_fragments); - - nat_send_all_to_node (vm, fragments_to_drop, node, - &node->errors[NAT_IN2OUT_ED_ERROR_DROP_FRAGMENT], - NAT_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); return frame->n_vectors; } @@ -2104,8 +1757,8 @@ VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = { /* *INDENT-ON* */ VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) + vlib_node_runtime_t * + node, vlib_frame_t * frame) { return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 0); } @@ -2124,8 +1777,8 @@ VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = { /* *INDENT-ON* */ VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm, - vlib_node_runtime_t * - node, + vlib_node_runtime_t + * node, vlib_frame_t * frame) { return nat44_ed_in2out_node_fn_inline (vm, node, frame, 1, 1); @@ -2144,45 +1797,6 @@ VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = { }; /* *INDENT-ON* */ - -VLIB_NODE_FN (nat44_ed_in2out_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return nat44_ed_in2out_reass_node_fn_inline (vm, node, frame, 0); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_in2out_reass_node) = { - .name = "nat44-ed-in2out-reass", - .vector_size = sizeof (u32), - .sibling_of = "nat-default", - .format_trace = format_nat44_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings), - .error_strings = nat_in2out_ed_error_strings, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FN (nat44_ed_in2out_reass_output_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return nat44_ed_in2out_reass_node_fn_inline (vm, node, frame, 1); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_in2out_reass_output_node) = { - .name = "nat44-ed-in2out-reass-output", - .vector_size = sizeof (u32), - .sibling_of = "nat-default", - .format_trace = format_nat44_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings), - .error_strings = nat_in2out_ed_error_strings, -}; -/* *INDENT-ON* */ - static u8 * format_nat_pre_trace (u8 * s, va_list * args) { @@ -2192,9 +1806,8 @@ format_nat_pre_trace (u8 * s, va_list * args) return format (s, "in2out next_index %d", t->next_index); } -VLIB_NODE_FN (nat_pre_in2out_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (nat_pre_in2out_node) + (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return nat_pre_node_fn_inline (vm, node, frame, NAT_NEXT_IN2OUT_ED_FAST_PATH); diff --git a/src/plugins/nat/nat.api b/src/plugins/nat/nat.api index 8cf26d4900d..356dd0ce8a9 100644 --- a/src/plugins/nat/nat.api +++ b/src/plugins/nat/nat.api @@ -189,85 +189,6 @@ autoreply define nat_ipfix_enable_disable { bool enable; }; -/** \brief Set NAT virtual fragmentation reassembly - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param timeout - reassembly timeout - @param max_reass - maximum number of concurrent reassemblies - @param max_frag - maximum number of fragmets per reassembly - @param drop_frag - if 0 translate fragments, otherwise drop fragments - @param is_ip6 - true if IPv6, false if IPv4 -*/ -autoreply define nat_set_reass { - u32 client_index; - u32 context; - u32 timeout; - u16 max_reass; - u8 max_frag; - u8 drop_frag; - bool is_ip6; -}; - -/** \brief Get NAT virtual fragmentation reassembly configuration - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define nat_get_reass { - u32 client_index; - u32 context; -}; - -/** \brief Get NAT virtual fragmentation reassembly configuration reply - @param context - sender context, to match reply w/ request - @param retval - return code - @param ip4_timeout - reassembly timeout - @param ip4_max_reass - maximum number of concurrent reassemblies - @param ip4_max_frag - maximum number of fragmets per reassembly - @param ip4_drop_frag - if 0 translate fragments, otherwise drop fragments - @param ip6_timeout - reassembly timeout - @param ip6_max_reass - maximum number of concurrent reassemblies - @param ip6_max_frag - maximum number of fragmets per reassembly - @param ip6_drop_frag - if 0 translate fragments, otherwise drop fragments -*/ -define nat_get_reass_reply { - u32 context; - i32 retval; - u32 ip4_timeout; - u16 ip4_max_reass; - u8 ip4_max_frag; - u8 ip4_drop_frag; - u32 ip6_timeout; - u16 ip6_max_reass; - u8 ip6_max_frag; - u8 ip6_drop_frag; -}; - -/** \brief Dump NAT virtual fragmentation reassemblies - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define nat_reass_dump { - u32 client_index; - u32 context; -}; - -/** \brief NAT virtual fragmentation reassemblies response - @param context - sender context, to match reply w/ request - @param src_addr - source IPv4 address - @param dst_addr - destination IPv4 address - @param frag_id - fragment ID - @param proto - protocol - @param frag_n - number of cached fragments -*/ -define nat_reass_details { - u32 context; - vl_api_address_t src_addr; - vl_api_address_t dst_addr; - u32 frag_id; - u8 proto; - u8 frag_n; -}; - /** \brief Set values of timeouts for NAT sessions (seconds) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index d85fb267bed..c1a18394aff 100755 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -26,13 +26,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include @@ -46,13 +46,15 @@ fib_source_t nat_fib_src_low; VNET_FEATURE_INIT (nat_pre_in2out, static) = { .arc_name = "ip4-unicast", .node_name = "nat-pre-in2out", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + "ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (nat_pre_out2in, static) = { .arc_name = "ip4-unicast", .node_name = "nat-pre-out2in", .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", - "ip4-dhcp-client-detect"), + "ip4-dhcp-client-detect", + "ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = { .arc_name = "ip4-unicast", @@ -68,103 +70,103 @@ VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = { VNET_FEATURE_INIT (ip4_snat_in2out, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-in2out", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (ip4_snat_out2in, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-out2in", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature", "ip4-dhcp-client-detect"), }; VNET_FEATURE_INIT (ip4_nat_classify, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-classify", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (ip4_snat_det_in2out, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-det-in2out", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (ip4_snat_det_out2in, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-det-out2in", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature", "ip4-dhcp-client-detect"), }; VNET_FEATURE_INIT (ip4_nat_det_classify, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-det-classify", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-ed-in2out", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-ed-out2in", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature", "ip4-dhcp-client-detect"), }; VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-ed-classify", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-handoff-classify", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-in2out-fast", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-out2in-fast", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature", "ip4-dhcp-client-detect"), }; VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-hairpin-dst", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = { .arc_name = "ip4-unicast", .node_name = "nat44-ed-hairpin-dst", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; /* Hook up output features */ VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = { .arc_name = "ip4-output", .node_name = "nat44-in2out-output", - .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"), }; VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = { .arc_name = "ip4-output", .node_name = "nat44-in2out-output-worker-handoff", - .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"), }; VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = { .arc_name = "ip4-output", .node_name = "nat44-hairpin-src", - .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"), }; VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = { .arc_name = "ip4-output", .node_name = "nat44-ed-in2out-output", - .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"), }; VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = { .arc_name = "ip4-output", .node_name = "nat44-ed-hairpin-src", - .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"), }; /* Hook up ip4-local features */ @@ -1869,6 +1871,9 @@ feature_set: feature_name = !is_inside ? "nat44-in2out" : "nat44-out2in"; } + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0); + if (rv) + return rv; vnet_feature_enable_disable ("ip4-unicast", del_feature_name, sw_if_index, 0, 0, 0); vnet_feature_enable_disable ("ip4-unicast", feature_name, @@ -1887,6 +1892,9 @@ feature_set: } else { + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0); + if (rv) + return rv; vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 0, 0, 0); pool_put (sm->interfaces, i); @@ -1934,6 +1942,9 @@ feature_set: feature_name = "nat44-classify"; } + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1); + if (rv) + return rv; vnet_feature_enable_disable ("ip4-unicast", del_feature_name, sw_if_index, 0, 0, 0); vnet_feature_enable_disable ("ip4-unicast", feature_name, @@ -1964,6 +1975,10 @@ feature_set: vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0, 0); + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1); + if (rv) + return rv; + if (is_inside && !sm->out2in_dpo) { if (sm->endpoint_dependent) @@ -2063,6 +2078,15 @@ feature_set: { if (sm->endpoint_dependent) { + int rv = + ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; + rv = + ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, + !is_del); + if (rv) + return rv; vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst", sw_if_index, !is_del, 0, 0); vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src", @@ -2070,6 +2094,15 @@ feature_set: } else { + int rv = + ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; + rv = + ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, + !is_del); + if (rv) + return rv; vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst", sw_if_index, !is_del, 0, 0); vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src", @@ -2080,6 +2113,13 @@ feature_set: if (sm->num_workers > 1) { + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; + rv = + ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in-worker-handoff", sw_if_index, !is_del, 0, 0); @@ -2091,6 +2131,15 @@ feature_set: { if (sm->endpoint_dependent) { + int rv = + ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; + rv = + ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, + !is_del); + if (rv) + return rv; vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in", sw_if_index, !is_del, 0, 0); vnet_feature_enable_disable ("ip4-output", "nat44-ed-in2out-output", @@ -2098,6 +2147,15 @@ feature_set: } else { + int rv = + ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; + rv = + ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, + !is_del); + if (rv) + return rv; vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in", sw_if_index, !is_del, 0, 0); vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output", @@ -2352,29 +2410,21 @@ snat_init (vlib_main_t * vm) sm->in2out_slowpath_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-output-slowpath"); sm->in2out_slowpath_output_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-in2out-reass"); - sm->in2out_reass_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out"); sm->ed_in2out_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-slowpath"); sm->ed_in2out_slowpath_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-reass"); - sm->ed_in2out_reass_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in"); sm->out2in_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-fast"); sm->out2in_fast_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-out2in-reass"); - sm->out2in_reass_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in"); sm->ed_out2in_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-slowpath"); sm->ed_out2in_slowpath_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in-reass"); - sm->ed_out2in_reass_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-det-in2out"); sm->det_in2out_node_index = node->index; @@ -2471,8 +2521,7 @@ snat_init (vlib_main_t * vm) FIB_SOURCE_PRIORITY_LOW, FIB_SOURCE_BH_SIMPLE); - /* Init virtual fragmenentation reassembly */ - return nat_reass_init (vm); + return error; } VLIB_INIT_FUNCTION (snat_init); @@ -3012,8 +3061,8 @@ snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0, } static u32 -snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0, - u8 is_output) +snat_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip0, + u32 rx_fib_index0, u8 is_output) { snat_main_t *sm = &snat_main; udp_header_t *udp; @@ -3044,52 +3093,6 @@ snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0, udp = ip4_next_header (ip0); port = udp->dst_port; - if (PREDICT_FALSE (ip4_is_fragment (ip0))) - { - if (PREDICT_FALSE (nat_reass_is_drop_frag (0))) - return vlib_get_thread_index (); - - nat_reass_ip4_t *reass; - reass = nat_ip4_reass_find (ip0->src_address, ip0->dst_address, - ip0->fragment_id, ip0->protocol); - - if (reass && (reass->thread_index != (u32) ~ 0)) - return reass->thread_index; - - if (ip4_is_first_fragment (ip0)) - { - reass = - nat_ip4_reass_create (ip0->src_address, ip0->dst_address, - ip0->fragment_id, ip0->protocol); - if (!reass) - goto no_reass; - - if (PREDICT_FALSE (pool_elts (sm->static_mappings))) - { - m_key.addr = ip0->dst_address; - m_key.port = clib_net_to_host_u16 (port); - m_key.protocol = proto; - m_key.fib_index = rx_fib_index0; - kv.key = m_key.as_u64; - if (!clib_bihash_search_8_8 - (&sm->static_mapping_by_external, &kv, &value)) - { - m = pool_elt_at_index (sm->static_mappings, value.value); - reass->thread_index = m->workers[0]; - return reass->thread_index; - } - } - reass->thread_index = sm->first_worker_index; - reass->thread_index += - sm->workers[(clib_net_to_host_u16 (port) - 1024) / - sm->port_per_thread]; - return reass->thread_index; - } - else - return vlib_get_thread_index (); - } - -no_reass: /* unknown protocol */ if (PREDICT_FALSE (proto == ~0)) { @@ -3101,10 +3104,12 @@ no_reass: { icmp46_header_t *icmp = (icmp46_header_t *) udp; icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); - if (!icmp_is_error_message (icmp)) - port = echo->identifier; + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + port = vnet_buffer (b)->ip.reass.l4_src_port; else { + /* if error message, then it's not fragmented and we can access it */ ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); proto = ip_proto_to_snat_proto (inner_ip->protocol); void *l4_header = ip4_next_header (inner_ip); @@ -3252,8 +3257,8 @@ nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index, } static u32 -nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index, - u8 is_output) +nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip, + u32 rx_fib_index, u8 is_output) { snat_main_t *sm = &snat_main; clib_bihash_kv_8_8_t kv, value; @@ -3295,7 +3300,7 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index, { nat_ed_ses_key_t key; - if (!get_icmp_o2i_ed_key (ip, &key)) + if (!get_icmp_o2i_ed_key (b, ip, &key)) { key.fib_index = rx_fib_index; @@ -3349,10 +3354,12 @@ nat44_ed_get_worker_out2in_cb (ip4_header_t * ip, u32 rx_fib_index, { icmp46_header_t *icmp = (icmp46_header_t *) udp; icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); - if (!icmp_is_error_message (icmp)) - port = echo->identifier; + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + port = vnet_buffer (b)->ip.reass.l4_src_port; else { + /* if error message, then it's not fragmented and we can access it */ ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); proto = ip_proto_to_snat_proto (inner_ip->protocol); void *l4_header = ip4_next_header (inner_ip); @@ -4359,11 +4366,8 @@ VLIB_REGISTER_NODE (nat_default_node) = { [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out", [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath", [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath", - [NAT_NEXT_IN2OUT_ED_REASS] = "nat44-ed-in2out-reass", - [NAT_NEXT_IN2OUT_ED_OUTPUT_REASS] = "nat44-ed-in2out-reass-output", [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in", [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath", - [NAT_NEXT_OUT2IN_ED_REASS] = "nat44-ed-out2in-reass", }, }; /* *INDENT-ON* */ diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 38f5a99bfbb..d3fa3eea38a 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -58,11 +58,8 @@ typedef enum NAT_NEXT_IN2OUT_ED_FAST_PATH, NAT_NEXT_IN2OUT_ED_SLOW_PATH, NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH, - NAT_NEXT_IN2OUT_ED_REASS, - NAT_NEXT_IN2OUT_ED_OUTPUT_REASS, NAT_NEXT_OUT2IN_ED_FAST_PATH, NAT_NEXT_OUT2IN_ED_SLOW_PATH, - NAT_NEXT_OUT2IN_ED_REASS, NAT_N_NEXT, } nat_next_t; @@ -534,8 +531,14 @@ typedef u32 (snat_icmp_match_function_t) (struct snat_main_s * sm, void *e); /* Return worker thread index for given packet */ -typedef u32 (snat_get_worker_function_t) (ip4_header_t * ip, - u32 rx_fib_index, u8 is_output); +typedef u32 (snat_get_worker_in2out_function_t) (ip4_header_t * ip, + u32 rx_fib_index, + u8 is_output); + +typedef u32 (snat_get_worker_out2in_function_t) (vlib_buffer_t * b, + ip4_header_t * ip, + u32 rx_fib_index, + u8 is_output); /* NAT address and port allacotaion function */ typedef int (nat_alloc_out_addr_and_port_function_t) (snat_address_t * @@ -556,8 +559,8 @@ typedef struct snat_main_s u32 num_workers; u32 first_worker_index; u32 *workers; - snat_get_worker_function_t *worker_in2out_cb; - snat_get_worker_function_t *worker_out2in_cb; + snat_get_worker_in2out_function_t *worker_in2out_cb; + snat_get_worker_out2in_function_t *worker_out2in_cb; u16 port_per_thread; u32 num_snat_thread; @@ -629,16 +632,12 @@ typedef struct snat_main_s u32 in2out_fast_node_index; u32 in2out_slowpath_node_index; u32 in2out_slowpath_output_node_index; - u32 in2out_reass_node_index; u32 ed_in2out_node_index; u32 ed_in2out_slowpath_node_index; - u32 ed_in2out_reass_node_index; u32 out2in_node_index; u32 out2in_fast_node_index; - u32 out2in_reass_node_index; u32 ed_out2in_node_index; u32 ed_out2in_slowpath_node_index; - u32 ed_out2in_reass_node_index; u32 det_in2out_node_index; u32 det_out2in_node_index; @@ -756,7 +755,6 @@ format_function_t format_snat_key; format_function_t format_static_mapping_key; format_function_t format_snat_protocol; format_function_t format_nat_addr_and_port_alloc_alg; -format_function_t format_nat44_reass_trace; /* unformat functions */ unformat_function_t unformat_snat_protocol; @@ -848,7 +846,11 @@ unformat_function_t unformat_snat_protocol; @param t TCP header @return 1 if client initiating TCP connection */ -#define tcp_is_init(t) ((t->flags & TCP_FLAG_SYN) && !(t->flags & TCP_FLAG_ACK)) +always_inline bool +tcp_flags_is_init (u8 f) +{ + return (f & TCP_FLAG_SYN) && !(f & TCP_FLAG_ACK); +} /* logging */ #define nat_log_err(...) \ diff --git a/src/plugins/nat/nat44_classify.c b/src/plugins/nat/nat44_classify.c index b6ce4d7b494..f339770d8f5 100644 --- a/src/plugins/nat/nat44_classify.c +++ b/src/plugins/nat/nat44_classify.c @@ -21,12 +21,9 @@ #include #include #include -#include #include #define foreach_nat44_classify_error \ -_(MAX_REASS, "Maximum reassemblies exceeded") \ -_(MAX_FRAG, "Maximum fragments per reassembly exceeded") \ _(NEXT_IN2OUT, "next in2out") \ _(NEXT_OUT2IN, "next out2in") \ _(FRAG_CACHED, "fragment cached") @@ -87,7 +84,6 @@ nat44_classify_node_fn_inline (vlib_main_t * vm, nat44_classify_next_t next_index; snat_main_t *sm = &snat_main; snat_static_mapping_t *m; - u32 thread_index = vm->thread_index; u32 *fragments_to_drop = 0; u32 *fragments_to_loopback = 0; u32 next_in2out = 0, next_out2in = 0, frag_cached = 0; @@ -111,8 +107,6 @@ nat44_classify_node_fn_inline (vlib_main_t * vm, snat_address_t *ap; snat_session_key_t m_key0; clib_bihash_kv_8_8_t kv0, value0; - udp_header_t *udp0; - nat_reass_ip4_t *reass0; u8 cached0 = 0; /* speculatively enqueue b0 to the current next frame */ @@ -125,7 +119,6 @@ nat44_classify_node_fn_inline (vlib_main_t * vm, b0 = vlib_get_buffer (vm, bi0); ip0 = vlib_buffer_get_current (b0); - udp0 = ip4_next_header (ip0); /* *INDENT-OFF* */ vec_foreach (ap, sm->addresses) @@ -154,87 +147,16 @@ nat44_classify_node_fn_inline (vlib_main_t * vm, next0 = NAT44_CLASSIFY_NEXT_OUT2IN; goto enqueue0; } - if (!ip4_is_fragment (ip0) || ip4_is_first_fragment (ip0)) - { - /* process leading fragment/whole packet (with L4 header) */ - m_key0.port = clib_net_to_host_u16 (udp0->dst_port); - m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol); - kv0.key = m_key0.as_u64; - if (!clib_bihash_search_8_8 - (&sm->static_mapping_by_external, &kv0, &value0)) - { - m = - pool_elt_at_index (sm->static_mappings, value0.value); - if (m->local_addr.as_u32 != m->external_addr.as_u32) - next0 = NAT44_CLASSIFY_NEXT_OUT2IN; - } - if (ip4_is_fragment (ip0)) - { - reass0 = nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, - 1, - &fragments_to_drop); - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT44_CLASSIFY_NEXT_DROP; - b0->error = - node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto enqueue0; - } - /* save classification for future fragments and set past - * fragments to be looped over and reprocessed */ - if (next0 == NAT44_CLASSIFY_NEXT_OUT2IN) - reass0->classify_next = - NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN; - else - reass0->classify_next = - NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - } - } - else + m_key0.port = + clib_net_to_host_u16 (vnet_buffer (b0)->ip.reass.l4_dst_port); + m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol); + kv0.key = m_key0.as_u64; + if (!clib_bihash_search_8_8 + (&sm->static_mapping_by_external, &kv0, &value0)) { - /* process non-first fragment */ - reass0 = nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, - 1, - &fragments_to_drop); - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT44_CLASSIFY_NEXT_DROP; - b0->error = - node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto enqueue0; - } - if (reass0->classify_next == NAT_REASS_IP4_CLASSIFY_NONE) - /* first fragment still hasn't arrived */ - { - if (nat_ip4_reass_add_fragment - (thread_index, reass0, bi0, &fragments_to_drop)) - { - b0->error = - node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG]; - nat_elog_notice - ("maximum fragments per reassembly exceeded"); - next0 = NAT44_CLASSIFY_NEXT_DROP; - goto enqueue0; - } - cached0 = 1; - goto enqueue0; - } - else if (reass0->classify_next == - NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN) + m = pool_elt_at_index (sm->static_mappings, value0.value); + if (m->local_addr.as_u32 != m->external_addr.as_u32) next0 = NAT44_CLASSIFY_NEXT_OUT2IN; - else if (reass0->classify_next == - NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT) - next0 = NAT44_CLASSIFY_NEXT_IN2OUT; } } @@ -343,8 +265,6 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm, snat_session_key_t m_key0; clib_bihash_kv_8_8_t kv0, value0; clib_bihash_kv_16_8_t ed_kv0, ed_value0; - udp_header_t *udp0; - nat_reass_ip4_t *reass0; u8 cached0 = 0; /* speculatively enqueue b0 to the current next frame */ @@ -357,7 +277,6 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm, b0 = vlib_get_buffer (vm, bi0); ip0 = vlib_buffer_get_current (b0); - udp0 = ip4_next_header (ip0); if (!in_loopback) { @@ -369,108 +288,21 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm, if (ip0->protocol != IP_PROTOCOL_ICMP) { - if (!ip4_is_fragment (ip0) || ip4_is_first_fragment (ip0)) - { - /* process leading fragment/whole packet (with L4 header) */ - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - rx_fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, - sw_if_index0); - make_ed_kv (&ed_kv0, &ip0->src_address, - &ip0->dst_address, ip0->protocol, - rx_fib_index0, udp0->src_port, udp0->dst_port); - if (ip4_is_fragment (ip0)) - { - reass0 = - nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, 1, - &fragments_to_drop); - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT_NEXT_DROP; - b0->error = - node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto enqueue0; - } - if (!clib_bihash_search_16_8 - (&tsm->in2out_ed, &ed_kv0, &ed_value0)) - { - /* session exists so classify as IN2OUT, - * save this information for future fragments and set - * past fragments to be looped over and reprocessed */ - reass0->sess_index = ed_value0.value; - reass0->classify_next = - NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - goto enqueue0; - } - else - { - /* session doesn't exist so continue in the code, - * save this information for future fragments and set - * past fragments to be looped over and reprocessed */ - reass0->flags |= - NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - } - } - else - { - /* process whole packet */ - if (!clib_bihash_search_16_8 - (&tsm->in2out_ed, &ed_kv0, &ed_value0)) - goto enqueue0; - /* session doesn't exist so continue in code */ - } - } - else - { - /* process non-first fragment */ - reass0 = nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, - 1, - &fragments_to_drop); - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT_NEXT_DROP; - b0->error = - node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto enqueue0; - } - /* check if first fragment has arrived */ - if (reass0->classify_next == NAT_REASS_IP4_CLASSIFY_NONE - && !(reass0->flags & - NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE)) - { - /* first fragment still hasn't arrived, cache this fragment */ - if (nat_ip4_reass_add_fragment - (thread_index, reass0, bi0, &fragments_to_drop)) - { - b0->error = - node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG]; - nat_elog_notice - ("maximum fragments per reassembly exceeded"); - next0 = NAT_NEXT_DROP; - goto enqueue0; - } - cached0 = 1; - goto enqueue0; - } - if (reass0->classify_next == - NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT) - goto enqueue0; - /* flag NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE is set - * so keep the default next0 and continue in code to - * potentially find other classification for this packet */ - } + /* process leading fragment/whole packet (with L4 header) */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, + sw_if_index0); + make_ed_kv (&ed_kv0, &ip0->src_address, + &ip0->dst_address, ip0->protocol, + rx_fib_index0, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port); + /* process whole packet */ + if (!clib_bihash_search_16_8 + (&tsm->in2out_ed, &ed_kv0, &ed_value0)) + goto enqueue0; + /* session doesn't exist so continue in code */ } /* *INDENT-OFF* */ @@ -500,85 +332,16 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm, next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH; goto enqueue0; } - if (!ip4_is_fragment (ip0) || ip4_is_first_fragment (ip0)) - { - /* process leading fragment/whole packet (with L4 header) */ - m_key0.port = clib_net_to_host_u16 (udp0->dst_port); - m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol); - kv0.key = m_key0.as_u64; - if (!clib_bihash_search_8_8 - (&sm->static_mapping_by_external, &kv0, &value0)) - { - m = - pool_elt_at_index (sm->static_mappings, value0.value); - if (m->local_addr.as_u32 != m->external_addr.as_u32) - next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH; - } - if (ip4_is_fragment (ip0)) - { - reass0 = nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, - 1, - &fragments_to_drop); - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT_NEXT_DROP; - b0->error = - node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto enqueue0; - } - /* save classification for future fragments and set past - * fragments to be looped over and reprocessed */ - if (next0 == NAT_NEXT_OUT2IN_ED_FAST_PATH) - reass0->classify_next = NAT_NEXT_OUT2IN_ED_REASS; - else - reass0->classify_next = NAT_NEXT_IN2OUT_ED_REASS; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - } - } - else + m_key0.port = + clib_net_to_host_u16 (vnet_buffer (b0)->ip.reass.l4_dst_port); + m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol); + kv0.key = m_key0.as_u64; + if (!clib_bihash_search_8_8 + (&sm->static_mapping_by_external, &kv0, &value0)) { - /* process non-first fragment */ - reass0 = nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, - 1, - &fragments_to_drop); - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT_NEXT_DROP; - b0->error = - node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto enqueue0; - } - if (reass0->classify_next == NAT_REASS_IP4_CLASSIFY_NONE) - /* first fragment still hasn't arrived */ - { - if (nat_ip4_reass_add_fragment - (thread_index, reass0, bi0, &fragments_to_drop)) - { - b0->error = - node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG]; - nat_elog_notice - ("maximum fragments per reassembly exceeded"); - next0 = NAT_NEXT_DROP; - goto enqueue0; - } - cached0 = 1; - goto enqueue0; - } - else if (reass0->classify_next == - NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN) + m = pool_elt_at_index (sm->static_mappings, value0.value); + if (m->local_addr.as_u32 != m->external_addr.as_u32) next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH; - else if (reass0->classify_next == - NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT) - next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH; } } diff --git a/src/plugins/nat/nat44_hairpinning.c b/src/plugins/nat/nat44_hairpinning.c index 331e7ca96bd..69a19b80645 100644 --- a/src/plugins/nat/nat44_hairpinning.c +++ b/src/plugins/nat/nat44_hairpinning.c @@ -22,7 +22,6 @@ #include #include #include -#include typedef enum { @@ -211,7 +210,8 @@ snat_icmp_hairpinning (snat_main_t * sm, snat_session_t *s0; snat_static_mapping_t *m0; - if (icmp_is_error_message (icmp0)) + if (icmp_type_is_error_message + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) { ip4_header_t *inner_ip0 = 0; tcp_udp_header_t *l4_header = 0; @@ -391,7 +391,7 @@ nat44_ed_hairpinning_unknown_proto (snat_main_t * sm, snat_main_per_thread_data_t *tsm; if (sm->num_workers > 1) - ti = sm->worker_out2in_cb (ip, sm->outside_fib_index, 0); + ti = sm->worker_out2in_cb (b, ip, sm->outside_fib_index, 0); else ti = sm->num_workers; tsm = &sm->per_thread_data[ti]; diff --git a/src/plugins/nat/nat44_handoff.c b/src/plugins/nat/nat44_handoff.c index 277f2de57fe..d221e5ba7cd 100644 --- a/src/plugins/nat/nat44_handoff.c +++ b/src/plugins/nat/nat44_handoff.c @@ -82,7 +82,6 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm, vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; snat_main_t *sm = &snat_main; - snat_get_worker_function_t *get_worker; u32 fq_index, thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); @@ -93,12 +92,10 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm, if (is_in2out) { fq_index = is_output ? sm->fq_in2out_output_index : sm->fq_in2out_index; - get_worker = sm->worker_in2out_cb; } else { fq_index = sm->fq_out2in_index; - get_worker = sm->worker_out2in_cb; } while (n_left_from >= 4) @@ -147,10 +144,20 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm, rx_fib_index2 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index2); rx_fib_index3 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index3); - ti[0] = get_worker (ip0, rx_fib_index0, is_output); - ti[1] = get_worker (ip1, rx_fib_index1, is_output); - ti[2] = get_worker (ip2, rx_fib_index2, is_output); - ti[3] = get_worker (ip3, rx_fib_index3, is_output); + if (is_in2out) + { + ti[0] = sm->worker_in2out_cb (ip0, rx_fib_index0, is_output); + ti[1] = sm->worker_in2out_cb (ip1, rx_fib_index1, is_output); + ti[2] = sm->worker_in2out_cb (ip2, rx_fib_index2, is_output); + ti[3] = sm->worker_in2out_cb (ip3, rx_fib_index3, is_output); + } + else + { + ti[0] = sm->worker_out2in_cb (b[0], ip0, rx_fib_index0, is_output); + ti[1] = sm->worker_out2in_cb (b[1], ip1, rx_fib_index1, is_output); + ti[2] = sm->worker_out2in_cb (b[2], ip2, rx_fib_index2, is_output); + ti[3] = sm->worker_out2in_cb (b[3], ip3, rx_fib_index3, is_output); + } if (ti[0] == thread_index) same_worker++; @@ -194,7 +201,14 @@ nat44_worker_handoff_fn_inline (vlib_main_t * vm, sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); - ti[0] = get_worker (ip0, rx_fib_index0, is_output); + if (is_in2out) + { + ti[0] = sm->worker_in2out_cb (ip0, rx_fib_index0, is_output); + } + else + { + ti[0] = sm->worker_out2in_cb (b[0], ip0, rx_fib_index0, is_output); + } if (ti[0] == thread_index) same_worker++; diff --git a/src/plugins/nat/nat64.c b/src/plugins/nat/nat64.c index e1afea6510e..405fc84c7b8 100644 --- a/src/plugins/nat/nat64.c +++ b/src/plugins/nat/nat64.c @@ -19,10 +19,11 @@ #include #include -#include #include #include #include +#include +#include nat64_main_t nat64_main; @@ -34,21 +35,25 @@ VNET_FEATURE_INIT (nat64_in2out, static) = { .arc_name = "ip6-unicast", .node_name = "nat64-in2out", .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), }; VNET_FEATURE_INIT (nat64_out2in, static) = { .arc_name = "ip4-unicast", .node_name = "nat64-out2in", .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), }; VNET_FEATURE_INIT (nat64_in2out_handoff, static) = { .arc_name = "ip6-unicast", .node_name = "nat64-in2out-handoff", .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), }; VNET_FEATURE_INIT (nat64_out2in_handoff, static) = { .arc_name = "ip4-unicast", .node_name = "nat64-out2in-handoff", .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), }; @@ -120,7 +125,7 @@ nat64_get_worker_in2out (ip6_address_t * addr) } u32 -nat64_get_worker_out2in (ip4_header_t * ip) +nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip) { nat64_main_t *nm = &nat64_main; snat_main_t *sm = nm->sm; @@ -132,41 +137,6 @@ nat64_get_worker_out2in (ip4_header_t * ip) udp = ip4_next_header (ip); port = udp->dst_port; - /* fragments */ - if (PREDICT_FALSE (ip4_is_fragment (ip))) - { - if (PREDICT_FALSE (nat_reass_is_drop_frag (0))) - return vlib_get_thread_index (); - - nat_reass_ip4_t *reass; - reass = nat_ip4_reass_find (ip->src_address, ip->dst_address, - ip->fragment_id, ip->protocol); - - if (reass && (reass->thread_index != (u32) ~ 0)) - return reass->thread_index; - - if (ip4_is_first_fragment (ip)) - { - reass = - nat_ip4_reass_create (ip->src_address, ip->dst_address, - ip->fragment_id, ip->protocol); - if (!reass) - goto no_reass; - - port = clib_net_to_host_u16 (port); - if (port > 1024) - reass->thread_index = - nm->sm->first_worker_index + - ((port - 1024) / sm->port_per_thread); - else - reass->thread_index = vlib_get_thread_index (); - return reass->thread_index; - } - else - return vlib_get_thread_index (); - } - -no_reass: /* unknown protocol */ if (PREDICT_FALSE (proto == ~0)) { @@ -193,10 +163,12 @@ no_reass: { icmp46_header_t *icmp = (icmp46_header_t *) udp; icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); - if (!icmp_is_error_message (icmp)) - port = echo->identifier; + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + port = vnet_buffer (b)->ip.reass.l4_src_port; else { + /* if error message, then it's not fragmented and we can access it */ ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); proto = ip_proto_to_snat_proto (inner_ip->protocol); void *l4_header = ip4_next_header (inner_ip); @@ -249,15 +221,9 @@ nat64_init (vlib_main_t * vm) node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-slowpath"); nm->in2out_slowpath_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-reass"); - nm->in2out_reass_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in"); nm->out2in_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in-reass"); - nm->out2in_reass_node_index = node->index; - /* set session timeouts to default values */ nm->udp_timeout = SNAT_UDP_TIMEOUT; nm->icmp_timeout = SNAT_ICMP_TIMEOUT; @@ -528,6 +494,19 @@ nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add) arc_name = is_inside ? "ip6-unicast" : "ip4-unicast"; + if (is_inside) + { + int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add); + if (rv) + return rv; + } + else + { + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add); + if (rv) + return rv; + } + return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index, is_add, 0, 0); } diff --git a/src/plugins/nat/nat64.h b/src/plugins/nat/nat64.h index 2bca1f9b73b..a95ded22893 100644 --- a/src/plugins/nat/nat64.h +++ b/src/plugins/nat/nat64.h @@ -115,10 +115,8 @@ typedef struct u32 in2out_node_index; u32 in2out_slowpath_node_index; - u32 in2out_reass_node_index; u32 out2in_node_index; - u32 out2in_reass_node_index; ip4_main_t *ip4_main; snat_main_t *sm; @@ -380,7 +378,7 @@ u32 nat64_get_worker_in2out (ip6_address_t * addr); * * @returns worker thread index. */ -u32 nat64_get_worker_out2in (ip4_header_t * ip); +u32 nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip); #endif /* __included_nat64_h__ */ diff --git a/src/plugins/nat/nat64_in2out.c b/src/plugins/nat/nat64_in2out.c index 8d1d734c459..8d4b1a89cad 100644 --- a/src/plugins/nat/nat64_in2out.c +++ b/src/plugins/nat/nat64_in2out.c @@ -18,7 +18,6 @@ */ #include -#include #include #include #include @@ -47,38 +46,12 @@ format_nat64_in2out_trace (u8 * s, va_list * args) return s; } -typedef struct -{ - u32 sw_if_index; - u32 next_index; - u8 cached; -} nat64_in2out_reass_trace_t; - -static u8 * -format_nat64_in2out_reass_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - nat64_in2out_reass_trace_t *t = - va_arg (*args, nat64_in2out_reass_trace_t *); - - s = - format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s", - t->sw_if_index, t->next_index, - t->cached ? "cached" : "translated"); - - return s; -} - - #define foreach_nat64_in2out_error \ _(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ _(IN2OUT_PACKETS, "good in2out packets processed") \ _(NO_TRANSLATION, "no translation") \ _(UNKNOWN, "unknown") \ _(DROP_FRAGMENT, "drop fragment") \ -_(MAX_REASS, "maximum reassemblies exceeded") \ -_(MAX_FRAG, "maximum fragments per reassembly exceeded") \ _(TCP_PACKETS, "TCP packets") \ _(UDP_PACKETS, "UDP packets") \ _(ICMP_PACKETS, "ICMP packets") \ @@ -108,7 +81,6 @@ typedef enum NAT64_IN2OUT_NEXT_IP6_LOOKUP, NAT64_IN2OUT_NEXT_DROP, NAT64_IN2OUT_NEXT_SLOWPATH, - NAT64_IN2OUT_NEXT_REASS, NAT64_IN2OUT_N_NEXT, } nat64_in2out_next_t; @@ -165,32 +137,75 @@ is_hairpinning (ip6_address_t * dst_addr) } static int -nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, - void *arg) +nat64_in2out_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p, u16 l4_offset, + u16 frag_hdr_offset, nat64_in2out_set_ctx_t * ctx) { + ip6_header_t *ip6; + ip_csum_t csum = 0; + ip4_header_t *ip4; + u16 fragment_id; + u8 frag_more; + u16 frag_offset; nat64_main_t *nm = &nat64_main; - nat64_in2out_set_ctx_t *ctx = arg; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; - ip46_address_t saddr, daddr; + ip46_address_t old_saddr, old_daddr; + ip4_address_t new_daddr; u32 sw_if_index, fib_index; - udp_header_t *udp = ip6_next_header (ip6); - u8 proto = ip6->protocol; - u16 sport = udp->src_port; - u16 dport = udp->dst_port; + u8 proto = vnet_buffer (p)->ip.reass.ip_proto; + u16 sport = vnet_buffer (p)->ip.reass.l4_src_port; + u16 dport = vnet_buffer (p)->ip.reass.l4_dst_port; nat64_db_t *db = &nm->db[ctx->thread_index]; + ip6 = vlib_buffer_get_current (p); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + ip4 = vlib_buffer_get_current (p); + + u32 ip_version_traffic_class_and_flow_label = + ip6->ip_version_traffic_class_and_flow_label; + u16 payload_length = ip6->payload_length; + u8 hop_limit = ip6->hop_limit; + + old_saddr.as_u64[0] = ip6->src_address.as_u64[0]; + old_saddr.as_u64[1] = ip6->src_address.as_u64[1]; + old_daddr.as_u64[0] = ip6->dst_address.as_u64[0]; + old_daddr.as_u64[1] = ip6->dst_address.as_u64[1]; + + if (PREDICT_FALSE (frag_hdr_offset)) + { + //Only the first fragment + ip6_frag_hdr_t *hdr = + (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset); + fragment_id = frag_id_6to4 (hdr->identification); + frag_more = ip6_frag_hdr_more (hdr); + frag_offset = ip6_frag_hdr_offset (hdr); + } + else + { + fragment_id = 0; + frag_offset = 0; + frag_more = 0; + } + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip_version_traffic_class_and_flow_label); + ip4->length = + u16_net_add (payload_length, sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (frag_offset | + (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); + ip4->ttl = hop_limit; + ip4->protocol = (proto == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : proto; + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); - saddr.as_u64[0] = ip6->src_address.as_u64[0]; - saddr.as_u64[1] = ip6->src_address.as_u64[1]; - daddr.as_u64[0] = ip6->dst_address.as_u64[0]; - daddr.as_u64[1] = ip6->dst_address.as_u64[1]; - ste = - nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, + nat64_db_st_entry_find (db, &old_saddr, &old_daddr, sport, dport, proto, fib_index, 1); if (ste) @@ -201,7 +216,8 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, } else { - bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1); + bibe = + nat64_db_bib_entry_find (db, &old_saddr, sport, proto, fib_index, 1); if (!bibe) { @@ -214,7 +230,7 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, bibe = nat64_db_bib_entry_create (ctx->thread_index, db, - &ip6->src_address, &out_addr, sport, + &old_saddr.ip6, &out_addr, sport, out_port, fib_index, proto, 0); if (!bibe) return -1; @@ -223,10 +239,10 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, db->bib.bib_entries_num); } - nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); + nat64_extract_ip4 (&old_daddr.ip6, &new_daddr, fib_index); ste = nat64_db_st_entry_create (ctx->thread_index, db, bibe, - &ip6->dst_address, &daddr.ip4, dport); + &old_daddr.ip6, &new_daddr, dport); if (!ste) return -1; @@ -235,22 +251,36 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, } ip4->src_address.as_u32 = bibe->out_addr.as_u32; - udp->src_port = bibe->out_port; - ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; - if (proto == IP_PROTOCOL_TCP) + ip4->checksum = ip4_header_checksum (ip4); + + if (!vnet_buffer (p)->ip.reass.is_non_first_fragment) { - u16 *checksum; - ip_csum_t csum; - tcp_header_t *tcp = ip6_next_header (ip6); + udp_header_t *udp = (udp_header_t *) (ip4 + 1); + udp->src_port = bibe->out_port; - nat64_tcp_session_set_state (ste, tcp, 1); - checksum = &tcp->checksum; - csum = ip_csum_sub_even (*checksum, sport); - csum = ip_csum_add_even (csum, udp->src_port); - mss_clamping (nm->sm, tcp, &csum); - *checksum = ip_csum_fold (csum); + //UDP checksum is optional over IPv4 + if (proto == IP_PROTOCOL_UDP) + { + udp->checksum = 0; + } + else + { + tcp_header_t *tcp = (tcp_header_t *) (ip4 + 1); + csum = ip_csum_sub_even (tcp->checksum, old_saddr.as_u64[0]); + csum = ip_csum_sub_even (csum, old_saddr.as_u64[1]); + csum = ip_csum_sub_even (csum, old_daddr.as_u64[0]); + csum = ip_csum_sub_even (csum, old_daddr.as_u64[1]); + csum = ip_csum_add_even (csum, ip4->dst_address.as_u32); + csum = ip_csum_add_even (csum, ip4->src_address.as_u32); + csum = ip_csum_sub_even (csum, sport); + csum = ip_csum_add_even (csum, udp->src_port); + mss_clamping (nm->sm, tcp, &csum); + tcp->checksum = ip_csum_fold (csum); + + nat64_tcp_session_set_state (ste, tcp, 1); + } } nat64_session_reset_timeout (ste, ctx->vm); @@ -480,16 +510,43 @@ unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg) } static int -nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, - void *arg) +nat64_in2out_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, u8 l4_protocol, + u16 l4_offset, u16 frag_hdr_offset, + nat64_in2out_set_ctx_t * s_ctx) { + ip6_header_t *ip6; + ip4_header_t *ip4; + u16 fragment_id; + u16 frag_offset; + u8 frag_more; + + ip6 = vlib_buffer_get_current (p); + + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + if (PREDICT_FALSE (frag_hdr_offset)) + { + //Only the first fragment + ip6_frag_hdr_t *hdr = + (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset); + fragment_id = frag_id_6to4 (hdr->identification); + frag_offset = ip6_frag_hdr_offset (hdr); + frag_more = ip6_frag_hdr_more (hdr); + } + else + { + fragment_id = 0; + frag_offset = 0; + frag_more = 0; + } + nat64_main_t *nm = &nat64_main; - nat64_in2out_set_ctx_t *s_ctx = arg; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; ip46_address_t saddr, daddr, addr; u32 sw_if_index, fib_index; - u8 proto = ip6->protocol; int i; nat64_db_t *db = &nm->db[s_ctx->thread_index]; @@ -503,17 +560,19 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, daddr.as_u64[1] = ip6->dst_address.as_u64[1]; ste = - nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1); + nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, l4_protocol, fib_index, + 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, l4_protocol, ste->bibe_index); if (!bibe) return -1; } else { - bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1); + bibe = + nat64_db_bib_entry_find (db, &saddr, 0, l4_protocol, fib_index, 1); if (!bibe) { @@ -525,7 +584,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1], .out_addr.as_u32 = 0, .fib_index = fib_index, - .proto = proto, + .proto = l4_protocol, .thread_index = s_ctx->thread_index, }; @@ -537,7 +596,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, /* Verify if out address is not already in use for protocol */ clib_memset (&addr, 0, sizeof (addr)); addr.ip4.as_u32 = ctx.out_addr.as_u32; - if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) + if (nat64_db_bib_entry_find (db, &addr, 0, l4_protocol, 0, 0)) ctx.out_addr.as_u32 = 0; if (!ctx.out_addr.as_u32) @@ -545,7 +604,8 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, for (i = 0; i < vec_len (nm->addr_pool); i++) { addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32; - if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) + if (!nat64_db_bib_entry_find + (db, &addr, 0, l4_protocol, 0, 0)) break; } } @@ -556,7 +616,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, bibe = nat64_db_bib_entry_create (s_ctx->thread_index, db, &ip6->src_address, &ctx.out_addr, - 0, 0, fib_index, proto, 0); + 0, 0, fib_index, l4_protocol, 0); if (!bibe) return -1; @@ -580,27 +640,39 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, ip4->src_address.as_u32 = bibe->out_addr.as_u32; ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label); + ip4->length = u16_net_add (ip6->payload_length, + sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (frag_offset | + (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); + ip4->ttl = ip6->hop_limit; + ip4->protocol = l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + return 0; } - - static int nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, - ip6_header_t * ip6, u32 thread_index) + ip6_header_t * ip6, u32 l4_offset, + u32 thread_index) { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; ip46_address_t saddr, daddr; u32 sw_if_index, fib_index; - udp_header_t *udp = ip6_next_header (ip6); - tcp_header_t *tcp = ip6_next_header (ip6); - u8 proto = ip6->protocol; - u16 sport = udp->src_port; - u16 dport = udp->dst_port; - u16 *checksum; - ip_csum_t csum; + udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, l4_offset); + tcp_header_t *tcp = (tcp_header_t *) u8_ptr_add (ip6, l4_offset); + u8 proto = vnet_buffer (b)->ip.reass.ip_proto; + u16 sport = vnet_buffer (b)->ip.reass.l4_src_port; + u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port; + u16 *checksum = NULL; + ip_csum_t csum = 0; nat64_db_t *db = &nm->db[thread_index]; sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; @@ -612,17 +684,17 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, daddr.as_u64[0] = ip6->dst_address.as_u64[0]; daddr.as_u64[1] = ip6->dst_address.as_u64[1]; - if (proto == IP_PROTOCOL_UDP) - checksum = &udp->checksum; - else - checksum = &tcp->checksum; - - csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_sub_even (csum, sport); - csum = ip_csum_sub_even (csum, dport); + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + if (proto == IP_PROTOCOL_UDP) + checksum = &udp->checksum; + else + checksum = &tcp->checksum; + csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); + } ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, @@ -674,7 +746,11 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_session_reset_timeout (ste, vm); - sport = udp->src_port = bibe->out_port; + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + udp->src_port = bibe->out_port; + } + nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index); clib_memset (&daddr, 0, sizeof (daddr)); @@ -696,15 +772,20 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; - udp->dst_port = bibe->in_port; - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_add_even (csum, udp->src_port); - csum = ip_csum_add_even (csum, udp->dst_port); - *checksum = ip_csum_fold (csum); + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); + csum = ip_csum_sub_even (csum, sport); + csum = ip_csum_sub_even (csum, dport); + udp->dst_port = bibe->in_port; + csum = ip_csum_add_even (csum, udp->src_port); + csum = ip_csum_add_even (csum, udp->dst_port); + *checksum = ip_csum_fold (csum); + } return 0; } @@ -990,7 +1071,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *b0; u32 next0; ip6_header_t *ip60; - u16 l4_offset0, frag_offset0; + u16 l4_offset0, frag_hdr_offset0; u8 l4_protocol0; u32 proto0; nat64_in2out_set_ctx_t ctx0; @@ -1015,8 +1096,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_FALSE (ip6_parse - (ip60, b0->current_length, &l4_protocol0, &l4_offset0, - &frag_offset0))) + (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0, + &frag_hdr_offset0))) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN]; @@ -1051,7 +1132,9 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto trace0; } - if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0)) + if (nat64_in2out_unk_proto + (vm, b0, l4_protocol0, l4_offset0, frag_hdr_offset0, + &ctx0)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1070,14 +1153,6 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } } - if (PREDICT_FALSE - (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)) - { - next0 = NAT64_IN2OUT_NEXT_REASS; - fragments++; - goto trace0; - } - if (proto0 == SNAT_PROTOCOL_ICMP) { icmp_packets++; @@ -1095,7 +1170,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } if (icmp6_to_icmp - (b0, nat64_in2out_icmp_set_cb, &ctx0, + (vm, b0, nat64_in2out_icmp_set_cb, &ctx0, nat64_in2out_inner_icmp_set_cb, &ctx0)) { next0 = NAT64_IN2OUT_NEXT_DROP; @@ -1114,7 +1189,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; if (nat64_in2out_tcp_udp_hairpinning - (vm, b0, ip60, thread_index)) + (vm, b0, ip60, l4_offset0, thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1123,8 +1198,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto trace0; } - if (ip6_to_ip4_tcp_udp - (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0)) + if (nat64_in2out_tcp_udp + (vm, b0, l4_offset0, frag_hdr_offset0, &ctx0)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; @@ -1191,7 +1266,6 @@ VLIB_REGISTER_NODE (nat64_in2out_node) = { [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath", - [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass", }, }; /* *INDENT-ON* */ @@ -1218,7 +1292,6 @@ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = { [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath", - [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass", }, }; /* *INDENT-ON* */ @@ -1233,447 +1306,6 @@ typedef struct nat64_in2out_frag_set_ctx_t_ u8 first_frag; } nat64_in2out_frag_set_ctx_t; -static int -nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) -{ - nat64_main_t *nm = &nat64_main; - nat64_in2out_frag_set_ctx_t *ctx = arg; - nat64_db_st_entry_t *ste; - nat64_db_bib_entry_t *bibe; - udp_header_t *udp; - nat64_db_t *db = &nm->db[ctx->thread_index]; - - ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); - if (!ste) - return -1; - - bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); - if (!bibe) - return -1; - - nat64_session_reset_timeout (ste, ctx->vm); - - if (ctx->first_frag) - { - udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset); - - if (ctx->proto == IP_PROTOCOL_TCP) - { - u16 *checksum; - ip_csum_t csum; - tcp_header_t *tcp = (tcp_header_t *) udp; - - nat64_tcp_session_set_state (ste, tcp, 1); - checksum = &tcp->checksum; - csum = ip_csum_sub_even (*checksum, tcp->src_port); - csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_add_even (csum, bibe->out_port); - csum = ip_csum_add_even (csum, bibe->out_addr.as_u32); - csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32); - *checksum = ip_csum_fold (csum); - } - - udp->src_port = bibe->out_port; - } - - ip4->src_address.as_u32 = bibe->out_addr.as_u32; - ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; - - return 0; -} - -static int -nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6, - nat64_in2out_frag_set_ctx_t * ctx) -{ - nat64_main_t *nm = &nat64_main; - nat64_db_st_entry_t *ste; - nat64_db_bib_entry_t *bibe; - udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset); - tcp_header_t *tcp = (tcp_header_t *) udp; - u16 sport = udp->src_port; - u16 dport = udp->dst_port; - u16 *checksum; - ip_csum_t csum; - ip46_address_t daddr; - nat64_db_t *db = &nm->db[ctx->thread_index]; - - if (ctx->first_frag) - { - if (ctx->proto == IP_PROTOCOL_UDP) - checksum = &udp->checksum; - else - checksum = &tcp->checksum; - - csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_sub_even (csum, sport); - csum = ip_csum_sub_even (csum, dport); - } - - ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); - if (!ste) - return -1; - - bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); - if (!bibe) - return -1; - - if (ctx->proto == IP_PROTOCOL_TCP) - nat64_tcp_session_set_state (ste, tcp, 1); - - nat64_session_reset_timeout (ste, ctx->vm); - - sport = bibe->out_port; - dport = ste->r_port; - - nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index); - - clib_memset (&daddr, 0, sizeof (daddr)); - daddr.ip4.as_u32 = ste->out_r_addr.as_u32; - - bibe = 0; - /* *INDENT-OFF* */ - vec_foreach (db, nm->db) - { - bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0); - - if (bibe) - break; - } - /* *INDENT-ON* */ - - if (!bibe) - return -1; - - ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; - ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; - - if (ctx->first_frag) - { - udp->dst_port = bibe->in_port; - udp->src_port = sport; - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_add_even (csum, udp->src_port); - csum = ip_csum_add_even (csum, udp->dst_port); - *checksum = ip_csum_fold (csum); - } - - return 0; -} - -VLIB_NODE_FN (nat64_in2out_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 n_left_from, *from, *to_next; - nat64_in2out_next_t next_index; - u32 pkts_processed = 0, cached_fragments = 0; - u32 *fragments_to_drop = 0; - u32 *fragments_to_loopback = 0; - nat64_main_t *nm = &nat64_main; - u32 thread_index = vm->thread_index; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - u32 next0; - u8 cached0 = 0; - ip6_header_t *ip60; - u16 l4_offset0, frag_offset0; - u8 l4_protocol0; - nat_reass_ip6_t *reass0; - ip6_frag_hdr_t *frag0; - nat64_db_bib_entry_t *bibe0; - nat64_db_st_entry_t *ste0; - udp_header_t *udp0; - snat_protocol_t proto0; - u32 sw_if_index0, fib_index0; - ip46_address_t saddr0, daddr0; - nat64_in2out_frag_set_ctx_t ctx0; - nat64_db_t *db = &nm->db[thread_index]; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP; - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, - sw_if_index0); - - ctx0.thread_index = thread_index; - - if (PREDICT_FALSE (nat_reass_is_drop_frag (1))) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - ip60 = (ip6_header_t *) vlib_buffer_get_current (b0); - - if (PREDICT_FALSE - (ip6_parse - (ip60, b0->current_length, &l4_protocol0, &l4_offset0, - &frag_offset0))) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN]; - goto trace0; - } - - if (PREDICT_FALSE - (!(l4_protocol0 == IP_PROTOCOL_TCP - || l4_protocol0 == IP_PROTOCOL_UDP))) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0); - frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0); - proto0 = ip_proto_to_snat_proto (l4_protocol0); - - reass0 = nat_ip6_reass_find_or_create (ip60->src_address, - ip60->dst_address, - frag0->identification, - l4_protocol0, - 1, &fragments_to_drop); - - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS]; - goto trace0; - } - - if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0))) - { - ctx0.first_frag = 0; - if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) - { - if (nat_ip6_reass_add_fragment - (thread_index, reass0, bi0, &fragments_to_drop)) - { - b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG]; - next0 = NAT64_IN2OUT_NEXT_DROP; - goto trace0; - } - cached0 = 1; - goto trace0; - } - } - else - { - ctx0.first_frag = 1; - - saddr0.as_u64[0] = ip60->src_address.as_u64[0]; - saddr0.as_u64[1] = ip60->src_address.as_u64[1]; - daddr0.as_u64[0] = ip60->dst_address.as_u64[0]; - daddr0.as_u64[1] = ip60->dst_address.as_u64[1]; - - ste0 = - nat64_db_st_entry_find (db, &saddr0, &daddr0, - udp0->src_port, udp0->dst_port, - l4_protocol0, fib_index0, 1); - if (!ste0) - { - bibe0 = - nat64_db_bib_entry_find (db, &saddr0, udp0->src_port, - l4_protocol0, fib_index0, 1); - if (!bibe0) - { - u16 out_port0; - ip4_address_t out_addr0; - if (nat64_alloc_out_addr_and_port - (fib_index0, proto0, &out_addr0, &out_port0, - thread_index)) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = - node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; - goto trace0; - } - - bibe0 = - nat64_db_bib_entry_create (thread_index, db, - &ip60->src_address, - &out_addr0, udp0->src_port, - out_port0, fib_index0, - l4_protocol0, 0); - if (!bibe0) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = - node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; - goto trace0; - } - vlib_set_simple_counter (&nm->total_bibs, thread_index, - 0, db->bib.bib_entries_num); - } - nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4, - fib_index0); - ste0 = - nat64_db_st_entry_create (thread_index, db, bibe0, - &ip60->dst_address, &daddr0.ip4, - udp0->dst_port); - if (!ste0) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = - node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; - goto trace0; - } - - vlib_set_simple_counter (&nm->total_sessions, thread_index, - 0, db->st.st_entries_num); - } - reass0->sess_index = nat64_db_st_entry_get_index (db, ste0); - - nat_ip6_reass_get_frags (reass0, &fragments_to_loopback); - } - - ctx0.sess_index = reass0->sess_index; - ctx0.proto = l4_protocol0; - ctx0.vm = vm; - ctx0.l4_offset = l4_offset0; - - if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address))) - { - next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; - if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0)) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; - } - goto trace0; - } - else - { - if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0)) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN]; - goto trace0; - } - } - - trace0: - if (PREDICT_FALSE - ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - nat64_in2out_reass_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->cached = cached0; - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - - if (cached0) - { - n_left_to_next++; - to_next--; - cached_fragments++; - } - else - { - pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy_fast (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy_fast (from, fragments_to_loopback + - (len - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, nm->in2out_reass_node_index, - NAT64_IN2OUT_ERROR_PROCESSED_FRAGMENTS, - pkts_processed); - vlib_node_increment_counter (vm, nm->in2out_reass_node_index, - NAT64_IN2OUT_ERROR_CACHED_FRAGMENTS, - cached_fragments); - - nat_send_all_to_node (vm, fragments_to_drop, node, - &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT], - NAT64_IN2OUT_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); - return frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat64_in2out_reass_node) = { - .name = "nat64-in2out-reass", - .vector_size = sizeof (u32), - .format_trace = format_nat64_in2out_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN (nat64_in2out_error_strings), - .error_strings = nat64_in2out_error_strings, - .n_next_nodes = NAT64_IN2OUT_N_NEXT, - /* edit / add dispositions here */ - .next_nodes = { - [NAT64_IN2OUT_NEXT_DROP] = "error-drop", - [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", - [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", - [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath", - [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass", - }, -}; -/* *INDENT-ON* */ #define foreach_nat64_in2out_handoff_error \ _(CONGESTION_DROP, "congestion drop") \ diff --git a/src/plugins/nat/nat64_out2in.c b/src/plugins/nat/nat64_out2in.c index e0dd407e0cf..6c0075102a1 100644 --- a/src/plugins/nat/nat64_out2in.c +++ b/src/plugins/nat/nat64_out2in.c @@ -18,7 +18,6 @@ */ #include -#include #include #include #include @@ -44,38 +43,12 @@ format_nat64_out2in_trace (u8 * s, va_list * args) return s; } -typedef struct -{ - u32 sw_if_index; - u32 next_index; - u8 cached; -} nat64_out2in_reass_trace_t; - -static u8 * -format_nat64_out2in_reass_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - nat64_out2in_reass_trace_t *t = - va_arg (*args, nat64_out2in_reass_trace_t *); - - s = - format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s", - t->sw_if_index, t->next_index, - t->cached ? "cached" : "translated"); - - return s; -} - - #define foreach_nat64_out2in_error \ _(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ _(OUT2IN_PACKETS, "good out2in packets processed") \ _(NO_TRANSLATION, "no translation") \ _(UNKNOWN, "unknown") \ _(DROP_FRAGMENT, "drop fragment") \ -_(MAX_REASS, "maximum reassemblies exceeded") \ -_(MAX_FRAG, "maximum fragments per reassembly exceeded") \ _(TCP_PACKETS, "TCP packets") \ _(UDP_PACKETS, "UDP packets") \ _(ICMP_PACKETS, "ICMP packets") \ @@ -104,7 +77,6 @@ typedef enum NAT64_OUT2IN_NEXT_IP6_LOOKUP, NAT64_OUT2IN_NEXT_IP4_LOOKUP, NAT64_OUT2IN_NEXT_DROP, - NAT64_OUT2IN_NEXT_REASS, NAT64_OUT2IN_N_NEXT, } nat64_out2in_next_t; @@ -116,25 +88,90 @@ typedef struct nat64_out2in_set_ctx_t_ } nat64_out2in_set_ctx_t; static int -nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, - void *arg) +nat64_out2in_tcp_udp (vlib_main_t * vm, vlib_buffer_t * b, + nat64_out2in_set_ctx_t * ctx) { + ip4_header_t *ip4; + ip6_header_t *ip6; + ip_csum_t csum; + u16 *checksum = NULL; + ip6_frag_hdr_t *frag; + u32 frag_id; + ip4_address_t old_src, old_dst; + nat64_main_t *nm = &nat64_main; - nat64_out2in_set_ctx_t *ctx = arg; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; - ip46_address_t saddr, daddr; + ip46_address_t saddr; + ip46_address_t daddr; ip6_address_t ip6_saddr; - udp_header_t *udp = ip4_next_header (ip4); - tcp_header_t *tcp = ip4_next_header (ip4); - u8 proto = ip4->protocol; - u16 dport = udp->dst_port; - u16 sport = udp->src_port; + u8 proto = vnet_buffer (b)->ip.reass.ip_proto; + u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port; + u16 sport = vnet_buffer (b)->ip.reass.l4_src_port; u32 sw_if_index, fib_index; - u16 *checksum; - ip_csum_t csum; nat64_db_t *db = &nm->db[ctx->thread_index]; + ip4 = vlib_buffer_get_current (b); + + udp_header_t *udp = ip4_next_header (ip4); + tcp_header_t *tcp = ip4_next_header (ip4); + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + if (ip4->protocol == IP_PROTOCOL_UDP) + { + checksum = &udp->checksum; + //UDP checksum is optional over IPv4 but mandatory for IPv6 + //We do not check udp->length sanity but use our safe computed value instead + if (PREDICT_FALSE (!*checksum)) + { + u16 udp_len = + clib_host_to_net_u16 (ip4->length) - sizeof (*ip4); + csum = ip_incremental_checksum (0, udp, udp_len); + csum = + ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); + csum = + ip_csum_with_carry (csum, + clib_host_to_net_u16 (IP_PROTOCOL_UDP)); + csum = + ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address))); + *checksum = ~ip_csum_fold (csum); + } + } + else + { + checksum = &tcp->checksum; + } + } + + old_src.as_u32 = ip4->src_address.as_u32; + old_dst.as_u32 = ip4->dst_address.as_u32; + + // Deal with fragmented packets + u16 frag_offset = ip4_get_fragment_offset (ip4); + if (PREDICT_FALSE (ip4_get_fragment_more (ip4) || frag_offset)) + { + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*frag)); + frag = + (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + frag_id = frag_id_4to6 (ip4->fragment_id); + vlib_buffer_advance (b, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + } + else + { + ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); + vlib_buffer_advance (b, sizeof (*ip4) - sizeof (*ip6)); + frag = NULL; + } + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = ip4->protocol; + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); @@ -159,7 +196,7 @@ nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, if (!bibe) return -1; - nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index); + nat64_compose_ip6 (&ip6_saddr, &old_src, bibe->fib_index); ste = nat64_db_st_entry_create (ctx->thread_index, db, bibe, &ip6_saddr, &saddr.ip4, sport); @@ -176,29 +213,48 @@ nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; - udp->dst_port = bibe->in_port; - if (proto == IP_PROTOCOL_UDP) - checksum = &udp->checksum; - else + vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index; + + nat64_session_reset_timeout (ste, ctx->vm); + + if (PREDICT_FALSE (frag != NULL)) { - checksum = &tcp->checksum; - nat64_tcp_session_set_state (ste, tcp, 0); + frag->next_hdr = ip6->protocol; + frag->identification = frag_id; + frag->rsv = 0; + frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (frag_offset, 1); + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); } - csum = ip_csum_sub_even (*checksum, dport); - csum = ip_csum_add_even (csum, udp->dst_port); - *checksum = ip_csum_fold (csum); + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + udp->dst_port = bibe->in_port; - vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index; + if (proto == IP_PROTOCOL_TCP) + { + nat64_tcp_session_set_state (ste, tcp, 0); + } - nat64_session_reset_timeout (ste, ctx->vm); + csum = ip_csum_sub_even (*checksum, dport); + csum = ip_csum_add_even (csum, udp->dst_port); + csum = ip_csum_sub_even (csum, old_src.as_u32); + csum = ip_csum_sub_even (csum, old_dst.as_u32); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); + *checksum = ip_csum_fold (csum); + } return 0; } static int -nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) +nat64_out2in_icmp_set_cb (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *arg) { nat64_main_t *nm = &nat64_main; nat64_out2in_set_ctx_t *ctx = arg; @@ -278,8 +334,8 @@ nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) } static int -nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, - void *arg) +nat64_out2in_inner_icmp_set_cb (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *arg) { nat64_main_t *nm = &nat64_main; nat64_out2in_set_ctx_t *ctx = arg; @@ -370,11 +426,15 @@ nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, } static int -nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, - void *arg) +nat64_out2in_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, + nat64_out2in_set_ctx_t * ctx) { + ip4_header_t *ip4 = vlib_buffer_get_current (p); + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + u32 frag_id; + nat64_main_t *nm = &nat64_main; - nat64_out2in_set_ctx_t *ctx = arg; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; ip46_address_t saddr, daddr; @@ -383,6 +443,43 @@ nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, u8 proto = ip4->protocol; nat64_db_t *db = &nm->db[ctx->thread_index]; + // Deal with fragmented packets + u16 frag_offset = ip4_get_fragment_offset (ip4); + if (PREDICT_FALSE (ip4_get_fragment_more (ip4) || frag_offset)) + { + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*frag)); + frag = + (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + frag_id = frag_id_4to6 (ip4->fragment_id); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + } + else + { + ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); + frag = NULL; + } + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = ip4->protocol; + + if (PREDICT_FALSE (frag != NULL)) + { + frag->next_hdr = ip6->protocol; + frag->identification = frag_id; + frag->rsv = 0; + frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (frag_offset, 1); + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); + } + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); @@ -482,7 +579,7 @@ VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == ~0)) { - if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0)) + if (nat64_out2in_unk_proto (vm, b0, &ctx0)) { next0 = NAT64_OUT2IN_NEXT_DROP; b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION]; @@ -491,13 +588,6 @@ VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm, goto trace0; } - if (PREDICT_FALSE (ip4_is_fragment (ip40))) - { - next0 = NAT64_OUT2IN_NEXT_REASS; - fragments++; - goto trace0; - } - if (proto0 == SNAT_PROTOCOL_ICMP) { icmp_packets++; @@ -517,7 +607,7 @@ VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm, else udp_packets++; - if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0)) + if (nat64_out2in_tcp_udp (vm, b0, &ctx0)) { udp0 = ip4_next_header (ip40); /* @@ -587,7 +677,6 @@ VLIB_REGISTER_NODE (nat64_out2in_node) = { [NAT64_OUT2IN_NEXT_DROP] = "error-drop", [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup", [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup", - [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass", }, }; /* *INDENT-ON* */ @@ -602,350 +691,6 @@ typedef struct nat64_out2in_frag_set_ctx_t_ u8 first_frag; } nat64_out2in_frag_set_ctx_t; -static int -nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) -{ - nat64_main_t *nm = &nat64_main; - nat64_out2in_frag_set_ctx_t *ctx = arg; - nat64_db_st_entry_t *ste; - nat64_db_bib_entry_t *bibe; - udp_header_t *udp = ip4_next_header (ip4); - ip_csum_t csum; - u16 *checksum; - nat64_db_t *db = &nm->db[ctx->thread_index]; - - ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); - if (!ste) - return -1; - - bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); - if (!bibe) - return -1; - - if (ctx->first_frag) - { - udp->dst_port = bibe->in_port; - - if (ip4->protocol == IP_PROTOCOL_UDP) - { - checksum = &udp->checksum; - - if (!checksum) - { - u16 udp_len = - clib_host_to_net_u16 (ip4->length) - sizeof (*ip4); - csum = ip_incremental_checksum (0, udp, udp_len); - csum = - ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); - csum = - ip_csum_with_carry (csum, - clib_host_to_net_u16 (IP_PROTOCOL_UDP)); - csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]); - csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]); - csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]); - csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]); - *checksum = ~ip_csum_fold (csum); - } - else - { - csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32); - csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32); - csum = ip_csum_sub_even (csum, bibe->out_port); - csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]); - csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]); - csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]); - csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]); - csum = ip_csum_add_even (csum, bibe->in_port); - *checksum = ip_csum_fold (csum); - } - } - else - { - tcp_header_t *tcp = ip4_next_header (ip4); - nat64_tcp_session_set_state (ste, tcp, 0); - checksum = &tcp->checksum; - csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32); - csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32); - csum = ip_csum_sub_even (csum, bibe->out_port); - csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]); - csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]); - csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]); - csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]); - csum = ip_csum_add_even (csum, bibe->in_port); - *checksum = ip_csum_fold (csum); - } - - } - - ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0]; - ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1]; - - ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; - ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; - - vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index; - - nat64_session_reset_timeout (ste, ctx->vm); - - return 0; -} - -VLIB_NODE_FN (nat64_out2in_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 n_left_from, *from, *to_next; - nat64_out2in_next_t next_index; - u32 pkts_processed = 0, cached_fragments = 0; - u32 *fragments_to_drop = 0; - u32 *fragments_to_loopback = 0; - nat64_main_t *nm = &nat64_main; - u32 thread_index = vm->thread_index; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - u32 next0; - ip4_header_t *ip40; - u8 cached0 = 0; - u32 sw_if_index0, fib_index0; - udp_header_t *udp0; - nat_reass_ip4_t *reass0; - ip46_address_t saddr0, daddr0; - nat64_db_st_entry_t *ste0; - nat64_db_bib_entry_t *bibe0; - ip6_address_t ip6_saddr0; - nat64_out2in_frag_set_ctx_t ctx0; - nat64_db_t *db = &nm->db[thread_index]; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP; - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, - sw_if_index0); - - ctx0.thread_index = thread_index; - - if (PREDICT_FALSE (nat_reass_is_drop_frag (1))) - { - next0 = NAT64_OUT2IN_NEXT_DROP; - b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - ip40 = vlib_buffer_get_current (b0); - - if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP - || ip40->protocol == IP_PROTOCOL_UDP))) - { - next0 = NAT64_OUT2IN_NEXT_DROP; - b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - udp0 = ip4_next_header (ip40); - - reass0 = nat_ip4_reass_find_or_create (ip40->src_address, - ip40->dst_address, - ip40->fragment_id, - ip40->protocol, - 1, &fragments_to_drop); - - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT64_OUT2IN_NEXT_DROP; - b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS]; - goto trace0; - } - - if (PREDICT_FALSE (ip4_is_first_fragment (ip40))) - { - ctx0.first_frag = 1; - - clib_memset (&saddr0, 0, sizeof (saddr0)); - saddr0.ip4.as_u32 = ip40->src_address.as_u32; - clib_memset (&daddr0, 0, sizeof (daddr0)); - daddr0.ip4.as_u32 = ip40->dst_address.as_u32; - - ste0 = - nat64_db_st_entry_find (db, &daddr0, &saddr0, - udp0->dst_port, udp0->src_port, - ip40->protocol, fib_index0, 0); - if (!ste0) - { - bibe0 = - nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port, - ip40->protocol, fib_index0, 0); - if (!bibe0) - { - next0 = NAT64_OUT2IN_NEXT_DROP; - b0->error = - node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION]; - goto trace0; - } - - nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address, - bibe0->fib_index); - ste0 = - nat64_db_st_entry_create (thread_index, - db, bibe0, &ip6_saddr0, - &saddr0.ip4, udp0->src_port); - - if (!ste0) - { - next0 = NAT64_OUT2IN_NEXT_DROP; - b0->error = - node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION]; - goto trace0; - } - - vlib_set_simple_counter (&nm->total_sessions, thread_index, - 0, db->st.st_entries_num); - } - reass0->sess_index = nat64_db_st_entry_get_index (db, ste0); - reass0->thread_index = thread_index; - - nat_ip4_reass_get_frags (reass0, &fragments_to_loopback); - } - else - { - ctx0.first_frag = 0; - - if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) - { - if (nat_ip4_reass_add_fragment - (thread_index, reass0, bi0, &fragments_to_drop)) - { - b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG]; - next0 = NAT64_OUT2IN_NEXT_DROP; - goto trace0; - } - cached0 = 1; - goto trace0; - } - } - - ctx0.sess_index = reass0->sess_index; - ctx0.proto = ip40->protocol; - ctx0.vm = vm; - ctx0.b = b0; - - if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0)) - { - next0 = NAT64_OUT2IN_NEXT_DROP; - b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN]; - goto trace0; - } - - trace0: - if (PREDICT_FALSE - ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - nat64_out2in_reass_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->cached = cached0; - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - - if (cached0) - { - n_left_to_next++; - to_next--; - cached_fragments++; - } - else - { - pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy_fast (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy_fast (from, fragments_to_loopback + - (len - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, nm->out2in_reass_node_index, - NAT64_OUT2IN_ERROR_PROCESSED_FRAGMENTS, - pkts_processed); - vlib_node_increment_counter (vm, nm->out2in_reass_node_index, - NAT64_OUT2IN_ERROR_CACHED_FRAGMENTS, - cached_fragments); - - nat_send_all_to_node (vm, fragments_to_drop, node, - &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT], - NAT64_OUT2IN_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); - return frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat64_out2in_reass_node) = { - .name = "nat64-out2in-reass", - .vector_size = sizeof (u32), - .format_trace = format_nat64_out2in_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN (nat64_out2in_error_strings), - .error_strings = nat64_out2in_error_strings, - .n_next_nodes = NAT64_OUT2IN_N_NEXT, - /* edit / add dispositions here */ - .next_nodes = { - [NAT64_OUT2IN_NEXT_DROP] = "error-drop", - [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup", - [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup", - [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass", - }, -}; -/* *INDENT-ON* */ - #define foreach_nat64_out2in_handoff_error \ _(CONGESTION_DROP, "congestion drop") \ _(SAME_WORKER, "same worker") \ @@ -1010,7 +755,7 @@ VLIB_NODE_FN (nat64_out2in_handoff_node) (vlib_main_t * vm, ip4_header_t *ip0; ip0 = vlib_buffer_get_current (b[0]); - ti[0] = nat64_get_worker_out2in (ip0); + ti[0] = nat64_get_worker_out2in (b[0], ip0); if (ti[0] != thread_index) do_handoff++; diff --git a/src/plugins/nat/nat66.c b/src/plugins/nat/nat66.c index e5e783b31f7..3ac773c7da4 100644 --- a/src/plugins/nat/nat66.c +++ b/src/plugins/nat/nat66.c @@ -19,6 +19,7 @@ #include #include +#include nat66_main_t nat66_main; @@ -29,11 +30,13 @@ VNET_FEATURE_INIT (nat66_in2out, static) = { .arc_name = "ip6-unicast", .node_name = "nat66-in2out", .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), }; VNET_FEATURE_INIT (nat66_out2in, static) = { .arc_name = "ip6-unicast", .node_name = "nat66-out2in", .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), }; /* *INDENT-ON* */ @@ -99,6 +102,9 @@ nat66_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add) } feature_name = is_inside ? "nat66-in2out" : "nat66-out2in"; + int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add); + if (rv) + return rv; return vnet_feature_enable_disable ("ip6-unicast", feature_name, sw_if_index, is_add, 0, 0); } diff --git a/src/plugins/nat/nat66_in2out.c b/src/plugins/nat/nat66_in2out.c index ac1f3298415..437d66550f6 100644 --- a/src/plugins/nat/nat66_in2out.c +++ b/src/plugins/nat/nat66_in2out.c @@ -156,7 +156,7 @@ VLIB_NODE_FN (nat66_in2out_node) (vlib_main_t * vm, if (PREDICT_FALSE (ip6_parse - (ip60, b0->current_length, &l4_protocol0, &l4_offset0, + (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0, &frag_offset0))) { next0 = NAT66_IN2OUT_NEXT_DROP; diff --git a/src/plugins/nat/nat66_out2in.c b/src/plugins/nat/nat66_out2in.c index d404d9f71eb..8386cd3ca73 100644 --- a/src/plugins/nat/nat66_out2in.c +++ b/src/plugins/nat/nat66_out2in.c @@ -116,7 +116,7 @@ VLIB_NODE_FN (nat66_out2in_node) (vlib_main_t * vm, if (PREDICT_FALSE (ip6_parse - (ip60, b0->current_length, &l4_protocol0, &l4_offset0, + (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0, &frag_offset0))) { next0 = NAT66_OUT2IN_NEXT_DROP; diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index b83ea0b49f8..6df1a851e48 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -303,156 +302,6 @@ vl_api_nat_ipfix_enable_disable_t_print (vl_api_nat_ipfix_enable_disable_t * FINISH; } -static void -vl_api_nat_set_reass_t_handler (vl_api_nat_set_reass_t * mp) -{ - snat_main_t *sm = &snat_main; - vl_api_nat_set_reass_reply_t *rmp; - int rv = 0; - - rv = - nat_reass_set (ntohl (mp->timeout), ntohs (mp->max_reass), mp->max_frag, - mp->drop_frag, mp->is_ip6); - - REPLY_MACRO (VL_API_NAT_SET_REASS_REPLY); -} - -static void * -vl_api_nat_set_reass_t_print (vl_api_nat_set_reass_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: nat_set_reass "); - s = format (s, "timeout %d max_reass %d max_frag %d drop_frag %d is_ip6 %d", - clib_host_to_net_u32 (mp->timeout), - clib_host_to_net_u16 (mp->max_reass), - mp->max_frag, mp->drop_frag, mp->is_ip6); - - FINISH; -} - -static void -vl_api_nat_get_reass_t_handler (vl_api_nat_get_reass_t * mp) -{ - snat_main_t *sm = &snat_main; - vl_api_nat_get_reass_reply_t *rmp; - int rv = 0; - - /* *INDENT-OFF* */ - REPLY_MACRO2 (VL_API_NAT_GET_REASS_REPLY, - ({ - rmp->ip4_timeout = htonl (nat_reass_get_timeout(0)); - rmp->ip4_max_reass = htons (nat_reass_get_max_reass(0)); - rmp->ip4_max_frag = nat_reass_get_max_frag(0); - rmp->ip4_drop_frag = nat_reass_is_drop_frag(0); - rmp->ip6_timeout = htonl (nat_reass_get_timeout(1)); - rmp->ip6_max_reass = htons (nat_reass_get_max_reass(1)); - rmp->ip6_max_frag = nat_reass_get_max_frag(1); - rmp->ip6_drop_frag = nat_reass_is_drop_frag(1); - })) - /* *INDENT-ON* */ -} - -static void * -vl_api_nat_get_reass_t_print (vl_api_nat_get_reass_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: nat_get_reass"); - - FINISH; -} - -typedef struct nat_api_walk_ctx_t_ -{ - vl_api_registration_t *reg; - u32 context; -} nat_api_walk_ctx_t; - -static int -nat_ip4_reass_walk_api (nat_reass_ip4_t * reass, void *arg) -{ - vl_api_nat_reass_details_t *rmp; - snat_main_t *sm = &snat_main; - nat_api_walk_ctx_t *ctx = arg; - ip46_address_t ip_address; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - clib_memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_NAT_REASS_DETAILS + sm->msg_id_base); - rmp->context = ctx->context; - - clib_memcpy (&ip_address.ip4, &reass->key.src, 4); - ip_address_encode (&ip_address, IP46_TYPE_IP4, &rmp->src_addr); - - clib_memcpy (&ip_address.ip4, &reass->key.dst, 4); - ip_address_encode (&ip_address, IP46_TYPE_IP4, &rmp->dst_addr); - - rmp->proto = reass->key.proto; - rmp->frag_id = ntohl (reass->key.frag_id); - rmp->frag_n = reass->frag_n; - - vl_api_send_msg (ctx->reg, (u8 *) rmp); - - return 0; -} - -static int -nat_ip6_reass_walk_api (nat_reass_ip6_t * reass, void *arg) -{ - vl_api_nat_reass_details_t *rmp; - snat_main_t *sm = &snat_main; - nat_api_walk_ctx_t *ctx = arg; - ip46_address_t ip_address; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - clib_memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_NAT_REASS_DETAILS + sm->msg_id_base); - rmp->context = ctx->context; - - clib_memcpy (&ip_address.ip6, &reass->key.src, 16); - ip_address_encode (&ip_address, IP46_TYPE_IP6, &rmp->src_addr); - - clib_memcpy (&ip_address.ip6, &reass->key.dst, 16); - ip_address_encode (&ip_address, IP46_TYPE_IP6, &rmp->dst_addr); - - rmp->proto = reass->key.proto; - rmp->frag_id = ntohl (reass->key.frag_id); - rmp->frag_n = reass->frag_n; - - vl_api_send_msg (ctx->reg, (u8 *) rmp); - - return 0; -} - -static void -vl_api_nat_reass_dump_t_handler (vl_api_nat_reass_dump_t * mp) -{ - vl_api_registration_t *reg; - - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - nat_api_walk_ctx_t ctx = { - .reg = reg, - .context = mp->context, - }; - - nat_ip4_reass_walk (nat_ip4_reass_walk_api, &ctx); - nat_ip6_reass_walk (nat_ip6_reass_walk_api, &ctx); -} - -static void * -vl_api_nat_reass_dump_t_print (vl_api_nat_reass_dump_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: nat_reass_dump"); - - FINISH; -} - static void vl_api_nat_set_timeouts_t_handler (vl_api_nat_set_timeouts_t * mp) { @@ -3471,9 +3320,6 @@ _(NAT_SET_WORKERS, nat_set_workers) \ _(NAT_WORKER_DUMP, nat_worker_dump) \ _(NAT_SET_LOG_LEVEL, nat_set_log_level) \ _(NAT_IPFIX_ENABLE_DISABLE, nat_ipfix_enable_disable) \ -_(NAT_SET_REASS, nat_set_reass) \ -_(NAT_GET_REASS, nat_get_reass) \ -_(NAT_REASS_DUMP, nat_reass_dump) \ _(NAT_SET_TIMEOUTS, nat_set_timeouts) \ _(NAT_GET_TIMEOUTS, nat_get_timeouts) \ _(NAT_SET_ADDR_AND_PORT_ALLOC_ALG, nat_set_addr_and_port_alloc_alg) \ diff --git a/src/plugins/nat/nat_det_in2out.c b/src/plugins/nat/nat_det_in2out.c index 832a2bae947..384a1eb54b9 100644 --- a/src/plugins/nat/nat_det_in2out.c +++ b/src/plugins/nat/nat_det_in2out.c @@ -121,14 +121,16 @@ icmp_match_in2out_det (snat_main_t * sm, vlib_node_runtime_t * node, sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); - if (!icmp_is_error_message (icmp0)) + if (!icmp_type_is_error_message + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) { protocol = SNAT_PROTOCOL_ICMP; in_addr = ip0->src_address; - in_port = echo0->identifier; + in_port = vnet_buffer (b0)->ip.reass.l4_src_port; } else { + /* if error message, then it's not fragmented and we can access it */ inner_ip0 = (ip4_header_t *) (echo0 + 1); l4_header = ip4_next_header (inner_ip0); protocol = ip_proto_to_snat_proto (inner_ip0->protocol); @@ -213,8 +215,10 @@ icmp_match_in2out_det (snat_main_t * sm, vlib_node_runtime_t * node, } } - if (PREDICT_FALSE (icmp0->type != ICMP4_echo_request && - !icmp_is_error_message (icmp0))) + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_request + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) { b0->error = node->errors[NAT_DET_IN2OUT_ERROR_BAD_ICMP_TYPE]; next0 = NAT_DET_IN2OUT_NEXT_DROP; diff --git a/src/plugins/nat/nat_det_out2in.c b/src/plugins/nat/nat_det_out2in.c index c4bd096deb1..74210e17860 100644 --- a/src/plugins/nat/nat_det_out2in.c +++ b/src/plugins/nat/nat_det_out2in.c @@ -117,16 +117,18 @@ icmp_match_out2in_det (snat_main_t * sm, vlib_node_runtime_t * node, echo0 = (icmp_echo_header_t *) (icmp0 + 1); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - if (!icmp_is_error_message (icmp0)) + if (!icmp_type_is_error_message + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) { protocol = SNAT_PROTOCOL_ICMP; key0.ext_host_addr = ip0->src_address; key0.ext_host_port = 0; - key0.out_port = echo0->identifier; + key0.out_port = vnet_buffer (b0)->ip.reass.l4_src_port; out_addr = ip0->dst_address; } else { + /* if error message, then it's not fragmented and we can access it */ inner_ip0 = (ip4_header_t *) (echo0 + 1); l4_header = ip4_next_header (inner_ip0); protocol = ip_proto_to_snat_proto (inner_ip0->protocol); @@ -191,8 +193,10 @@ icmp_match_out2in_det (snat_main_t * sm, vlib_node_runtime_t * node, goto out; } - if (PREDICT_FALSE (icmp0->type != ICMP4_echo_reply && - !icmp_is_error_message (icmp0))) + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_reply + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) { b0->error = node->errors[NAT_DET_OUT2IN_ERROR_BAD_ICMP_TYPE]; next0 = NAT_DET_OUT2IN_NEXT_DROP; diff --git a/src/plugins/nat/nat_format.c b/src/plugins/nat/nat_format.c index 7dcdff6c769..17f64b9b222 100644 --- a/src/plugins/nat/nat_format.c +++ b/src/plugins/nat/nat_format.c @@ -333,20 +333,6 @@ format_det_map_ses (u8 * s, va_list * args) return s; } -u8 * -format_nat44_reass_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - nat44_reass_trace_t *t = va_arg (*args, nat44_reass_trace_t *); - - s = format (s, "NAT44_REASS: sw_if_index %d, next index %d, status %s", - t->sw_if_index, t->next_index, - t->cached ? "cached" : "translated"); - - return s; -} - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/nat/nat_inlines.h b/src/plugins/nat/nat_inlines.h index 2f68ed4a700..a58317acdf3 100644 --- a/src/plugins/nat/nat_inlines.h +++ b/src/plugins/nat/nat_inlines.h @@ -171,9 +171,9 @@ snat_proto_to_ip_proto (snat_protocol_t snat_proto) } static_always_inline u8 -icmp_is_error_message (icmp46_header_t * icmp) +icmp_type_is_error_message (u8 icmp_type) { - switch (icmp->type) + switch (icmp_type) { case ICMP4_destination_unreachable: case ICMP4_time_exceeded: @@ -323,25 +323,28 @@ nat44_delete_session (snat_main_t * sm, snat_session_t * ses, */ always_inline int nat44_set_tcp_session_state_i2o (snat_main_t * sm, snat_session_t * ses, - tcp_header_t * tcp, u32 thread_index) + vlib_buffer_t * b, u32 thread_index) { - if ((ses->state == 0) && (tcp->flags & TCP_FLAG_RST)) + u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags; + u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number; + u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number; + if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST)) ses->state = NAT44_SES_RST; - if ((ses->state == NAT44_SES_RST) && !(tcp->flags & TCP_FLAG_RST)) + if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST)) ses->state = 0; - if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && + if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && (ses->state & NAT44_SES_O2I_SYN)) ses->state = 0; - if (tcp->flags & TCP_FLAG_SYN) + if (tcp_flags & TCP_FLAG_SYN) ses->state |= NAT44_SES_I2O_SYN; - if (tcp->flags & TCP_FLAG_FIN) + if (tcp_flags & TCP_FLAG_FIN) { - ses->i2o_fin_seq = clib_net_to_host_u32 (tcp->seq_number); + ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number); ses->state |= NAT44_SES_I2O_FIN; } - if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN)) + if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN)) { - if (clib_net_to_host_u32 (tcp->ack_number) > ses->o2i_fin_seq) + if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq) ses->state |= NAT44_SES_O2I_FIN_ACK; } if (nat44_is_ses_closed (ses) @@ -356,25 +359,26 @@ nat44_set_tcp_session_state_i2o (snat_main_t * sm, snat_session_t * ses, always_inline int nat44_set_tcp_session_state_o2i (snat_main_t * sm, snat_session_t * ses, - tcp_header_t * tcp, u32 thread_index) + u8 tcp_flags, u32 tcp_ack_number, + u32 tcp_seq_number, u32 thread_index) { - if ((ses->state == 0) && (tcp->flags & TCP_FLAG_RST)) + if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST)) ses->state = NAT44_SES_RST; - if ((ses->state == NAT44_SES_RST) && !(tcp->flags & TCP_FLAG_RST)) + if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST)) ses->state = 0; - if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && + if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && (ses->state & NAT44_SES_O2I_SYN)) ses->state = 0; - if (tcp->flags & TCP_FLAG_SYN) + if (tcp_flags & TCP_FLAG_SYN) ses->state |= NAT44_SES_O2I_SYN; - if (tcp->flags & TCP_FLAG_FIN) + if (tcp_flags & TCP_FLAG_FIN) { - ses->o2i_fin_seq = clib_net_to_host_u32 (tcp->seq_number); + ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number); ses->state |= NAT44_SES_O2I_FIN; } - if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN)) + if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN)) { - if (clib_net_to_host_u32 (tcp->ack_number) > ses->i2o_fin_seq) + if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq) ses->state |= NAT44_SES_I2O_FIN_ACK; } if (nat44_is_ses_closed (ses)) @@ -466,7 +470,8 @@ make_sm_kv (clib_bihash_kv_8_8_t * kv, ip4_address_t * addr, u8 proto, } static_always_inline int -get_icmp_i2o_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0) +get_icmp_i2o_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, + nat_ed_ses_key_t * p_key0) { icmp46_header_t *icmp0; nat_ed_ses_key_t key0; @@ -478,12 +483,13 @@ get_icmp_i2o_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0) icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *) (icmp0 + 1); - if (!icmp_is_error_message (icmp0)) + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) { key0.proto = IP_PROTOCOL_ICMP; key0.l_addr = ip0->src_address; key0.r_addr = ip0->dst_address; - key0.l_port = echo0->identifier; + key0.l_port = vnet_buffer (b)->ip.reass.l4_src_port; // TODO should this be src or dst? key0.r_port = 0; } else @@ -516,7 +522,8 @@ get_icmp_i2o_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0) static_always_inline int -get_icmp_o2i_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0) +get_icmp_o2i_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, + nat_ed_ses_key_t * p_key0) { icmp46_header_t *icmp0; nat_ed_ses_key_t key0; @@ -528,12 +535,13 @@ get_icmp_o2i_ed_key (ip4_header_t * ip0, nat_ed_ses_key_t * p_key0) icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *) (icmp0 + 1); - if (!icmp_is_error_message (icmp0)) + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) { key0.proto = IP_PROTOCOL_ICMP; key0.l_addr = ip0->dst_address; key0.r_addr = ip0->src_address; - key0.l_port = echo0->identifier; + key0.l_port = vnet_buffer (b)->ip.reass.l4_src_port; // TODO should this be src or dst? key0.r_port = 0; } else diff --git a/src/plugins/nat/nat_reass.c b/src/plugins/nat/nat_reass.c deleted file mode 100755 index b518c0cb916..00000000000 --- a/src/plugins/nat/nat_reass.c +++ /dev/null @@ -1,893 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * @file - * @brief NAT plugin virtual fragmentation reassembly - */ - -#include -#include -#include - -nat_reass_main_t nat_reass_main; - -static u32 -nat_reass_get_nbuckets (u8 is_ip6) -{ - nat_reass_main_t *srm = &nat_reass_main; - u32 nbuckets; - u8 i; - - if (is_ip6) - nbuckets = (u32) (srm->ip6_max_reass / NAT_REASS_HT_LOAD_FACTOR); - else - nbuckets = (u32) (srm->ip4_max_reass / NAT_REASS_HT_LOAD_FACTOR); - - for (i = 0; i < 31; i++) - if ((1 << i) >= nbuckets) - break; - nbuckets = 1 << i; - - return nbuckets; -} - -static_always_inline void -nat_ip4_reass_get_frags_inline (nat_reass_ip4_t * reass, u32 ** bi) -{ - nat_reass_main_t *srm = &nat_reass_main; - u32 elt_index; - dlist_elt_t *elt; - - while ((elt_index = - clib_dlist_remove_head (srm->ip4_frags_list_pool, - reass->frags_per_reass_list_head_index)) != - ~0) - { - elt = pool_elt_at_index (srm->ip4_frags_list_pool, elt_index); - vec_add1 (*bi, elt->value); - reass->frag_n--; - pool_put_index (srm->ip4_frags_list_pool, elt_index); - } -} - -static_always_inline void -nat_ip6_reass_get_frags_inline (nat_reass_ip6_t * reass, u32 ** bi) -{ - nat_reass_main_t *srm = &nat_reass_main; - u32 elt_index; - dlist_elt_t *elt; - - while ((elt_index = - clib_dlist_remove_head (srm->ip6_frags_list_pool, - reass->frags_per_reass_list_head_index)) != - ~0) - { - elt = pool_elt_at_index (srm->ip6_frags_list_pool, elt_index); - vec_add1 (*bi, elt->value); - reass->frag_n--; - pool_put_index (srm->ip6_frags_list_pool, elt_index); - } -} - -int -nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag, - u8 is_ip6) -{ - nat_reass_main_t *srm = &nat_reass_main; - u32 nbuckets; - - if (is_ip6) - { - if (srm->ip6_max_reass != max_reass) - { - clib_spinlock_lock_if_init (&srm->ip6_reass_lock); - - srm->ip6_max_reass = max_reass; - pool_free (srm->ip6_reass_pool); - pool_alloc (srm->ip6_reass_pool, srm->ip4_max_reass); - nbuckets = nat_reass_get_nbuckets (0); - clib_bihash_free_48_8 (&srm->ip6_reass_hash); - clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass", - nbuckets, nbuckets * 1024); - - clib_spinlock_unlock_if_init (&srm->ip6_reass_lock); - } - srm->ip6_timeout = timeout; - srm->ip6_max_frag = max_frag; - srm->ip6_drop_frag = drop_frag; - } - else - { - if (srm->ip4_max_reass != max_reass) - { - clib_spinlock_lock_if_init (&srm->ip4_reass_lock); - - srm->ip4_max_reass = max_reass; - pool_free (srm->ip4_reass_pool); - pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass); - nbuckets = nat_reass_get_nbuckets (0); - clib_bihash_free_16_8 (&srm->ip4_reass_hash); - clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass", - nbuckets, nbuckets * 1024); - clib_spinlock_unlock_if_init (&srm->ip4_reass_lock); - } - srm->ip4_timeout = timeout; - srm->ip4_max_frag = max_frag; - srm->ip4_drop_frag = drop_frag; - } - - return 0; -} - -u32 -nat_reass_get_timeout (u8 is_ip6) -{ - nat_reass_main_t *srm = &nat_reass_main; - - if (is_ip6) - return srm->ip6_timeout; - - return srm->ip4_timeout; -} - -u16 -nat_reass_get_max_reass (u8 is_ip6) -{ - nat_reass_main_t *srm = &nat_reass_main; - - if (is_ip6) - return srm->ip6_max_reass; - - return srm->ip4_max_reass; -} - -u8 -nat_reass_get_max_frag (u8 is_ip6) -{ - nat_reass_main_t *srm = &nat_reass_main; - - if (is_ip6) - return srm->ip6_max_frag; - - return srm->ip4_max_frag; -} - -u8 -nat_reass_is_drop_frag (u8 is_ip6) -{ - nat_reass_main_t *srm = &nat_reass_main; - - if (is_ip6) - return srm->ip6_drop_frag; - - return srm->ip4_drop_frag; -} - -static_always_inline nat_reass_ip4_t * -nat_ip4_reass_lookup (nat_reass_ip4_key_t * k, f64 now) -{ - nat_reass_main_t *srm = &nat_reass_main; - clib_bihash_kv_16_8_t kv, value; - nat_reass_ip4_t *reass; - - kv.key[0] = k->as_u64[0]; - kv.key[1] = k->as_u64[1]; - - if (clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value)) - return 0; - - reass = pool_elt_at_index (srm->ip4_reass_pool, value.value); - if (now < reass->last_heard + (f64) srm->ip4_timeout) - return reass; - - return 0; -} - -nat_reass_ip4_t * -nat_ip4_reass_find (ip4_address_t src, ip4_address_t dst, u16 frag_id, - u8 proto) -{ - nat_reass_main_t *srm = &nat_reass_main; - nat_reass_ip4_t *reass = 0; - nat_reass_ip4_key_t k; - f64 now = vlib_time_now (srm->vlib_main); - - k.src.as_u32 = src.as_u32; - k.dst.as_u32 = dst.as_u32; - k.frag_id = frag_id; - k.proto = proto; - - clib_spinlock_lock_if_init (&srm->ip4_reass_lock); - reass = nat_ip4_reass_lookup (&k, now); - clib_spinlock_unlock_if_init (&srm->ip4_reass_lock); - - return reass; -} - -nat_reass_ip4_t * -nat_ip4_reass_create (ip4_address_t src, ip4_address_t dst, u16 frag_id, - u8 proto) -{ - nat_reass_main_t *srm = &nat_reass_main; - nat_reass_ip4_t *reass = 0; - dlist_elt_t *elt, *per_reass_list_head_elt; - u32 elt_index; - f64 now = vlib_time_now (srm->vlib_main); - nat_reass_ip4_key_t k; - clib_bihash_kv_16_8_t kv; - - clib_spinlock_lock_if_init (&srm->ip4_reass_lock); - - if (srm->ip4_reass_n >= srm->ip4_max_reass) - { - nat_elog_warn ("no free resassembly slot"); - goto unlock; - } - - pool_get (srm->ip4_reass_pool, reass); - pool_get (srm->ip4_reass_lru_list_pool, elt); - reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool; - clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index); - elt->value = reass - srm->ip4_reass_pool; - clib_dlist_addtail (srm->ip4_reass_lru_list_pool, - srm->ip4_reass_head_index, elt_index); - pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt); - reass->frags_per_reass_list_head_index = - per_reass_list_head_elt - srm->ip4_frags_list_pool; - clib_dlist_init (srm->ip4_frags_list_pool, - reass->frags_per_reass_list_head_index); - srm->ip4_reass_n++; - k.src.as_u32 = src.as_u32; - k.dst.as_u32 = dst.as_u32; - k.frag_id = frag_id; - k.proto = proto; - reass->key.as_u64[0] = kv.key[0] = k.as_u64[0]; - reass->key.as_u64[1] = kv.key[1] = k.as_u64[1]; - kv.value = reass - srm->ip4_reass_pool; - reass->sess_index = (u32) ~ 0; - reass->thread_index = (u32) ~ 0; - reass->last_heard = now; - reass->frag_n = 0; - reass->flags = 0; - reass->classify_next = NAT_REASS_IP4_CLASSIFY_NONE; - if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1)) - nat_elog_warn ("ip4_reass_hash add key failed"); - -unlock: - clib_spinlock_unlock_if_init (&srm->ip4_reass_lock); - return reass; -} - -nat_reass_ip4_t * -nat_ip4_reass_find_or_create (ip4_address_t src, ip4_address_t dst, - u16 frag_id, u8 proto, u8 reset_timeout, - u32 ** bi_to_drop) -{ - nat_reass_main_t *srm = &nat_reass_main; - nat_reass_ip4_t *reass = 0; - nat_reass_ip4_key_t k; - f64 now = vlib_time_now (srm->vlib_main); - dlist_elt_t *oldest_elt, *elt; - dlist_elt_t *per_reass_list_head_elt; - u32 oldest_index, elt_index; - clib_bihash_kv_16_8_t kv, value; - - k.src.as_u32 = src.as_u32; - k.dst.as_u32 = dst.as_u32; - k.frag_id = frag_id; - k.proto = proto; - - clib_spinlock_lock_if_init (&srm->ip4_reass_lock); - - reass = nat_ip4_reass_lookup (&k, now); - if (reass) - { - if (reset_timeout) - { - reass->last_heard = now; - clib_dlist_remove (srm->ip4_reass_lru_list_pool, - reass->lru_list_index); - clib_dlist_addtail (srm->ip4_reass_lru_list_pool, - srm->ip4_reass_head_index, - reass->lru_list_index); - } - - if (reass->flags & NAT_REASS_FLAG_MAX_FRAG_DROP) - { - reass = 0; - goto unlock; - } - - goto unlock; - } - - if (srm->ip4_reass_n >= srm->ip4_max_reass) - { - oldest_index = - clib_dlist_remove_head (srm->ip4_reass_lru_list_pool, - srm->ip4_reass_head_index); - ASSERT (oldest_index != ~0); - oldest_elt = - pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index); - reass = pool_elt_at_index (srm->ip4_reass_pool, oldest_elt->value); - if (now < reass->last_heard + (f64) srm->ip4_timeout) - { - clib_dlist_addhead (srm->ip4_reass_lru_list_pool, - srm->ip4_reass_head_index, oldest_index); - nat_elog_warn ("no free resassembly slot"); - reass = 0; - goto unlock; - } - - clib_dlist_addtail (srm->ip4_reass_lru_list_pool, - srm->ip4_reass_head_index, oldest_index); - - kv.key[0] = reass->key.as_u64[0]; - kv.key[1] = reass->key.as_u64[1]; - if (!clib_bihash_search_16_8 (&srm->ip4_reass_hash, &kv, &value)) - { - if (value.value == (reass - srm->ip4_reass_pool)) - { - if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 0)) - { - reass = 0; - goto unlock; - } - } - } - - nat_ip4_reass_get_frags_inline (reass, bi_to_drop); - } - else - { - pool_get (srm->ip4_reass_pool, reass); - pool_get (srm->ip4_reass_lru_list_pool, elt); - reass->lru_list_index = elt_index = elt - srm->ip4_reass_lru_list_pool; - clib_dlist_init (srm->ip4_reass_lru_list_pool, elt_index); - elt->value = reass - srm->ip4_reass_pool; - clib_dlist_addtail (srm->ip4_reass_lru_list_pool, - srm->ip4_reass_head_index, elt_index); - pool_get (srm->ip4_frags_list_pool, per_reass_list_head_elt); - reass->frags_per_reass_list_head_index = - per_reass_list_head_elt - srm->ip4_frags_list_pool; - clib_dlist_init (srm->ip4_frags_list_pool, - reass->frags_per_reass_list_head_index); - srm->ip4_reass_n++; - } - - reass->key.as_u64[0] = kv.key[0] = k.as_u64[0]; - reass->key.as_u64[1] = kv.key[1] = k.as_u64[1]; - kv.value = reass - srm->ip4_reass_pool; - reass->sess_index = (u32) ~ 0; - reass->thread_index = (u32) ~ 0; - reass->last_heard = now; - reass->frag_n = 0; - reass->flags = 0; - reass->classify_next = NAT_REASS_IP4_CLASSIFY_NONE; - - if (clib_bihash_add_del_16_8 (&srm->ip4_reass_hash, &kv, 1)) - { - reass = 0; - goto unlock; - } - -unlock: - clib_spinlock_unlock_if_init (&srm->ip4_reass_lock); - return reass; -} - -int -nat_ip4_reass_add_fragment (u32 thread_index, nat_reass_ip4_t * reass, - u32 bi, u32 ** bi_to_drop) -{ - nat_reass_main_t *srm = &nat_reass_main; - dlist_elt_t *elt; - u32 elt_index; - - if (reass->frag_n >= srm->ip4_max_frag) - { - nat_ipfix_logging_max_fragments_ip4 (thread_index, srm->ip4_max_frag, - &reass->key.src); - reass->flags |= NAT_REASS_FLAG_MAX_FRAG_DROP; - nat_ip4_reass_get_frags_inline (reass, bi_to_drop); - return -1; - } - - clib_spinlock_lock_if_init (&srm->ip4_reass_lock); - - pool_get (srm->ip4_frags_list_pool, elt); - elt_index = elt - srm->ip4_frags_list_pool; - clib_dlist_init (srm->ip4_frags_list_pool, elt_index); - elt->value = bi; - clib_dlist_addtail (srm->ip4_frags_list_pool, - reass->frags_per_reass_list_head_index, elt_index); - reass->frag_n++; - - clib_spinlock_unlock_if_init (&srm->ip4_reass_lock); - - return 0; -} - -void -nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi) -{ - nat_reass_main_t *srm = &nat_reass_main; - - clib_spinlock_lock_if_init (&srm->ip4_reass_lock); - - nat_ip4_reass_get_frags_inline (reass, bi); - - clib_spinlock_unlock_if_init (&srm->ip4_reass_lock); -} - -void -nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx) -{ - nat_reass_ip4_t *reass; - nat_reass_main_t *srm = &nat_reass_main; - f64 now = vlib_time_now (srm->vlib_main); - - /* *INDENT-OFF* */ - pool_foreach (reass, srm->ip4_reass_pool, - ({ - if (now < reass->last_heard + (f64) srm->ip4_timeout) - { - if (fn (reass, ctx)) - return; - } - })); - /* *INDENT-ON* */ -} - -static_always_inline nat_reass_ip6_t * -nat_ip6_reass_lookup (nat_reass_ip6_key_t * k, f64 now) -{ - nat_reass_main_t *srm = &nat_reass_main; - clib_bihash_kv_48_8_t kv, value; - nat_reass_ip6_t *reass; - - k->unused = 0; - kv.key[0] = k->as_u64[0]; - kv.key[1] = k->as_u64[1]; - kv.key[2] = k->as_u64[2]; - kv.key[3] = k->as_u64[3]; - kv.key[4] = k->as_u64[4]; - kv.key[5] = k->as_u64[5]; - - if (clib_bihash_search_48_8 (&srm->ip6_reass_hash, &kv, &value)) - return 0; - - reass = pool_elt_at_index (srm->ip6_reass_pool, value.value); - if (now < reass->last_heard + (f64) srm->ip6_timeout) - return reass; - - return 0; -} - -nat_reass_ip6_t * -nat_ip6_reass_find_or_create (ip6_address_t src, ip6_address_t dst, - u32 frag_id, u8 proto, u8 reset_timeout, - u32 ** bi_to_drop) -{ - nat_reass_main_t *srm = &nat_reass_main; - nat_reass_ip6_t *reass = 0; - nat_reass_ip6_key_t k; - f64 now = vlib_time_now (srm->vlib_main); - dlist_elt_t *oldest_elt, *elt; - dlist_elt_t *per_reass_list_head_elt; - u32 oldest_index, elt_index; - clib_bihash_kv_48_8_t kv; - - k.src.as_u64[0] = src.as_u64[0]; - k.src.as_u64[1] = src.as_u64[1]; - k.dst.as_u64[0] = dst.as_u64[0]; - k.dst.as_u64[1] = dst.as_u64[1]; - k.frag_id = frag_id; - k.proto = proto; - k.unused = 0; - - clib_spinlock_lock_if_init (&srm->ip6_reass_lock); - - reass = nat_ip6_reass_lookup (&k, now); - if (reass) - { - if (reset_timeout) - { - reass->last_heard = now; - clib_dlist_remove (srm->ip6_reass_lru_list_pool, - reass->lru_list_index); - clib_dlist_addtail (srm->ip6_reass_lru_list_pool, - srm->ip6_reass_head_index, - reass->lru_list_index); - } - - if (reass->flags & NAT_REASS_FLAG_MAX_FRAG_DROP) - { - reass = 0; - goto unlock; - } - - goto unlock; - } - - if (srm->ip6_reass_n >= srm->ip6_max_reass) - { - oldest_index = - clib_dlist_remove_head (srm->ip6_reass_lru_list_pool, - srm->ip6_reass_head_index); - ASSERT (oldest_index != ~0); - oldest_elt = - pool_elt_at_index (srm->ip4_reass_lru_list_pool, oldest_index); - reass = pool_elt_at_index (srm->ip6_reass_pool, oldest_elt->value); - if (now < reass->last_heard + (f64) srm->ip6_timeout) - { - clib_dlist_addhead (srm->ip6_reass_lru_list_pool, - srm->ip6_reass_head_index, oldest_index); - nat_elog_warn ("no free resassembly slot"); - reass = 0; - goto unlock; - } - - clib_dlist_addtail (srm->ip6_reass_lru_list_pool, - srm->ip6_reass_head_index, oldest_index); - - kv.key[0] = k.as_u64[0]; - kv.key[1] = k.as_u64[1]; - kv.key[2] = k.as_u64[2]; - kv.key[3] = k.as_u64[3]; - kv.key[4] = k.as_u64[4]; - kv.key[5] = k.as_u64[5]; - if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 0)) - { - reass = 0; - goto unlock; - } - - nat_ip6_reass_get_frags_inline (reass, bi_to_drop); - } - else - { - pool_get (srm->ip6_reass_pool, reass); - pool_get (srm->ip6_reass_lru_list_pool, elt); - reass->lru_list_index = elt_index = elt - srm->ip6_reass_lru_list_pool; - clib_dlist_init (srm->ip6_reass_lru_list_pool, elt_index); - elt->value = reass - srm->ip6_reass_pool; - clib_dlist_addtail (srm->ip6_reass_lru_list_pool, - srm->ip6_reass_head_index, elt_index); - pool_get (srm->ip6_frags_list_pool, per_reass_list_head_elt); - reass->frags_per_reass_list_head_index = - per_reass_list_head_elt - srm->ip6_frags_list_pool; - clib_dlist_init (srm->ip6_frags_list_pool, - reass->frags_per_reass_list_head_index); - srm->ip6_reass_n++; - } - - reass->key.as_u64[0] = kv.key[0] = k.as_u64[0]; - reass->key.as_u64[1] = kv.key[1] = k.as_u64[1]; - reass->key.as_u64[2] = kv.key[2] = k.as_u64[2]; - reass->key.as_u64[3] = kv.key[3] = k.as_u64[3]; - reass->key.as_u64[4] = kv.key[4] = k.as_u64[4]; - reass->key.as_u64[5] = kv.key[5] = k.as_u64[5]; - kv.value = reass - srm->ip6_reass_pool; - reass->sess_index = (u32) ~ 0; - reass->last_heard = now; - - if (clib_bihash_add_del_48_8 (&srm->ip6_reass_hash, &kv, 1)) - { - reass = 0; - goto unlock; - } - -unlock: - clib_spinlock_unlock_if_init (&srm->ip6_reass_lock); - return reass; -} - -int -nat_ip6_reass_add_fragment (u32 thread_index, nat_reass_ip6_t * reass, - u32 bi, u32 ** bi_to_drop) -{ - nat_reass_main_t *srm = &nat_reass_main; - dlist_elt_t *elt; - u32 elt_index; - - if (reass->frag_n >= srm->ip6_max_frag) - { - nat_ipfix_logging_max_fragments_ip6 (thread_index, srm->ip6_max_frag, - &reass->key.src); - reass->flags |= NAT_REASS_FLAG_MAX_FRAG_DROP; - nat_ip6_reass_get_frags_inline (reass, bi_to_drop); - return -1; - } - - clib_spinlock_lock_if_init (&srm->ip6_reass_lock); - - pool_get (srm->ip6_frags_list_pool, elt); - elt_index = elt - srm->ip6_frags_list_pool; - clib_dlist_init (srm->ip6_frags_list_pool, elt_index); - elt->value = bi; - clib_dlist_addtail (srm->ip6_frags_list_pool, - reass->frags_per_reass_list_head_index, elt_index); - reass->frag_n++; - - clib_spinlock_unlock_if_init (&srm->ip6_reass_lock); - - return 0; -} - -void -nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi) -{ - nat_reass_main_t *srm = &nat_reass_main; - - clib_spinlock_lock_if_init (&srm->ip6_reass_lock); - - nat_ip6_reass_get_frags_inline (reass, bi); - - clib_spinlock_unlock_if_init (&srm->ip6_reass_lock); -} - -void -nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx) -{ - nat_reass_ip6_t *reass; - nat_reass_main_t *srm = &nat_reass_main; - f64 now = vlib_time_now (srm->vlib_main); - - /* *INDENT-OFF* */ - pool_foreach (reass, srm->ip6_reass_pool, - ({ - if (now < reass->last_heard + (f64) srm->ip4_timeout) - { - if (fn (reass, ctx)) - return; - } - })); - /* *INDENT-ON* */ -} - -clib_error_t * -nat_reass_init (vlib_main_t * vm) -{ - nat_reass_main_t *srm = &nat_reass_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - clib_error_t *error = 0; - dlist_elt_t *head; - u32 nbuckets, head_index; - - srm->vlib_main = vm; - srm->vnet_main = vnet_get_main (); - - /* IPv4 */ - srm->ip4_timeout = NAT_REASS_TIMEOUT_DEFAULT; - srm->ip4_max_reass = NAT_MAX_REASS_DEAFULT; - srm->ip4_max_frag = NAT_MAX_FRAG_DEFAULT; - srm->ip4_drop_frag = 0; - srm->ip4_reass_n = 0; - - if (tm->n_vlib_mains > 1) - clib_spinlock_init (&srm->ip4_reass_lock); - - pool_alloc (srm->ip4_reass_pool, srm->ip4_max_reass); - - nbuckets = nat_reass_get_nbuckets (0); - clib_bihash_init_16_8 (&srm->ip4_reass_hash, "nat-ip4-reass", nbuckets, - nbuckets * 1024); - - pool_get (srm->ip4_reass_lru_list_pool, head); - srm->ip4_reass_head_index = head_index = - head - srm->ip4_reass_lru_list_pool; - clib_dlist_init (srm->ip4_reass_lru_list_pool, head_index); - - /* IPv6 */ - srm->ip6_timeout = NAT_REASS_TIMEOUT_DEFAULT; - srm->ip6_max_reass = NAT_MAX_REASS_DEAFULT; - srm->ip6_max_frag = NAT_MAX_FRAG_DEFAULT; - srm->ip6_drop_frag = 0; - srm->ip6_reass_n = 0; - - if (tm->n_vlib_mains > 1) - clib_spinlock_init (&srm->ip6_reass_lock); - - pool_alloc (srm->ip6_reass_pool, srm->ip6_max_reass); - - nbuckets = nat_reass_get_nbuckets (1); - clib_bihash_init_48_8 (&srm->ip6_reass_hash, "nat-ip6-reass", nbuckets, - nbuckets * 1024); - - pool_get (srm->ip6_reass_lru_list_pool, head); - srm->ip6_reass_head_index = head_index = - head - srm->ip6_reass_lru_list_pool; - clib_dlist_init (srm->ip6_reass_lru_list_pool, head_index); - - return error; -} - -static clib_error_t * -nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - clib_error_t *error = 0; - unformat_input_t _line_input, *line_input = &_line_input; - u32 timeout = 0, max_reass = 0, max_frag = 0; - u8 drop_frag = (u8) ~ 0, is_ip6 = 0; - int rv; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "max-reassemblies %u", &max_reass)) - ; - else if (unformat (line_input, "max-fragments %u", &max_frag)) - ; - else if (unformat (line_input, "timeout %u", &timeout)) - ; - else if (unformat (line_input, "enable")) - drop_frag = 0; - else if (unformat (line_input, "disable")) - drop_frag = 1; - else if (unformat (line_input, "ip4")) - is_ip6 = 0; - else if (unformat (line_input, "ip6")) - is_ip6 = 1; - else - { - error = clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (!timeout) - timeout = nat_reass_get_timeout (is_ip6); - if (!max_reass) - max_reass = nat_reass_get_max_reass (is_ip6); - if (!max_frag) - max_frag = nat_reass_get_max_frag (is_ip6); - if (drop_frag == (u8) ~ 0) - drop_frag = nat_reass_is_drop_frag (is_ip6); - - rv = - nat_reass_set (timeout, (u16) max_reass, (u8) max_frag, drop_frag, - is_ip6); - if (rv) - { - error = clib_error_return (0, "nat_set_reass return %d", rv); - goto done; - } - -done: - unformat_free (line_input); - - return error; -} - -static int -nat_ip4_reass_walk_cli (nat_reass_ip4_t * reass, void *ctx) -{ - vlib_main_t *vm = ctx; - u8 *flags_str = 0; - const char *classify_next_str; - - if (reass->flags & NAT_REASS_FLAG_MAX_FRAG_DROP) - flags_str = format (flags_str, "MAX_FRAG_DROP"); - if (reass->flags & NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE) - { - if (flags_str) - flags_str = format (flags_str, " | "); - flags_str = format (flags_str, "CLASSIFY_ED_CONTINUE"); - } - if (reass->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE) - { - if (flags_str) - flags_str = format (flags_str, " | "); - flags_str = format (flags_str, "CLASSIFY_ED_DONT_TRANSLATE"); - } - if (!flags_str) - flags_str = format (flags_str, "0"); - flags_str = format (flags_str, "%c", 0); - - switch (reass->classify_next) - { - case NAT_REASS_IP4_CLASSIFY_NONE: - classify_next_str = "NONE"; - break; - case NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT: - classify_next_str = "IN2OUT"; - break; - case NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN: - classify_next_str = "OUT2IN"; - break; - default: - classify_next_str = "invalid value"; - } - - vlib_cli_output (vm, " src %U dst %U proto %u id 0x%04x cached %u " - "flags %s classify_next %s", - format_ip4_address, &reass->key.src, - format_ip4_address, &reass->key.dst, - reass->key.proto, - clib_net_to_host_u16 (reass->key.frag_id), reass->frag_n, - flags_str, classify_next_str); - - vec_free (flags_str); - - return 0; -} - -static int -nat_ip6_reass_walk_cli (nat_reass_ip6_t * reass, void *ctx) -{ - vlib_main_t *vm = ctx; - - vlib_cli_output (vm, " src %U dst %U proto %u id 0x%08x cached %u", - format_ip6_address, &reass->key.src, - format_ip6_address, &reass->key.dst, - reass->key.proto, - clib_net_to_host_u32 (reass->key.frag_id), reass->frag_n); - - return 0; -} - -static clib_error_t * -show_nat_reass_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vlib_cli_output (vm, "NAT IPv4 virtual fragmentation reassembly is %s", - nat_reass_is_drop_frag (0) ? "DISABLED" : "ENABLED"); - vlib_cli_output (vm, " max-reassemblies %u", nat_reass_get_max_reass (0)); - vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (0)); - vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (0)); - vlib_cli_output (vm, " reassemblies:"); - nat_ip4_reass_walk (nat_ip4_reass_walk_cli, vm); - - vlib_cli_output (vm, "NAT IPv6 virtual fragmentation reassembly is %s", - nat_reass_is_drop_frag (1) ? "DISABLED" : "ENABLED"); - vlib_cli_output (vm, " max-reassemblies %u", nat_reass_get_max_reass (1)); - vlib_cli_output (vm, " max-fragments %u", nat_reass_get_max_frag (1)); - vlib_cli_output (vm, " timeout %usec", nat_reass_get_timeout (1)); - vlib_cli_output (vm, " reassemblies:"); - nat_ip6_reass_walk (nat_ip6_reass_walk_cli, vm); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (nat_reass_command, static) = -{ - .path = "nat virtual-reassembly", - .short_help = "nat virtual-reassembly ip4|ip6 [max-reassemblies ] " - "[max-fragments ] [timeout ] [enable|disable]", - .function = nat_reass_command_fn, -}; - -VLIB_CLI_COMMAND (show_nat_reass_command, static) = -{ - .path = "show nat virtual-reassembly", - .short_help = "show nat virtual-reassembly", - .function = show_nat_reass_command_fn, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/nat/nat_reass.h b/src/plugins/nat/nat_reass.h deleted file mode 100644 index 11f9db5a252..00000000000 --- a/src/plugins/nat/nat_reass.h +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * @file - * @brief NAT plugin virtual fragmentation reassembly - */ -#ifndef __included_nat_reass_h__ -#define __included_nat_reass_h__ - -#include -#include -#include -#include -#include - -#define NAT_REASS_TIMEOUT_DEFAULT 2 -#define NAT_MAX_REASS_DEAFULT 1024 -#define NAT_MAX_FRAG_DEFAULT 5 -#define NAT_REASS_HT_LOAD_FACTOR (0.75) - -#define NAT_REASS_FLAG_MAX_FRAG_DROP 1 -#define NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE 2 -#define NAT_REASS_FLAG_ED_DONT_TRANSLATE 4 - -typedef struct -{ - union - { - struct - { - ip4_address_t src; - ip4_address_t dst; - /* align by making this 4 octets even though its a 2 octets field */ - u32 frag_id; - /* align by making this 4 octets even though its a 1 octet field */ - u32 proto; - }; - u64 as_u64[2]; - }; -} nat_reass_ip4_key_t; - -enum -{ - NAT_REASS_IP4_CLASSIFY_NONE, - NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT, - NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN -}; - -/* *INDENT-OFF* */ -typedef CLIB_PACKED(struct -{ - nat_reass_ip4_key_t key; - u32 lru_list_index; - u32 sess_index; - u32 thread_index; - f64 last_heard; - u32 frags_per_reass_list_head_index; - u8 frag_n; - u8 flags; - u8 classify_next; -}) nat_reass_ip4_t; -/* *INDENT-ON* */ - -typedef struct -{ - union - { - struct - { - ip6_address_t src; - ip6_address_t dst; - u32 frag_id; - /* align by making this 4 octets even though its a 1 octet field */ - u32 proto; - u64 unused; - }; - u64 as_u64[6]; - }; -} nat_reass_ip6_key_t; - -/* *INDENT-OFF* */ -typedef CLIB_PACKED(struct -{ - nat_reass_ip6_key_t key; - u32 lru_list_index; - u32 sess_index; - f64 last_heard; - u32 frags_per_reass_list_head_index; - u8 frag_n; - u8 flags; -}) nat_reass_ip6_t; -/* *INDENT-ON* */ - -typedef struct -{ - /* IPv4 config */ - u32 ip4_timeout; - u16 ip4_max_reass; - u8 ip4_max_frag; - u8 ip4_drop_frag; - - /* IPv6 config */ - u32 ip6_timeout; - u16 ip6_max_reass; - u8 ip6_max_frag; - u8 ip6_drop_frag; - - /* IPv4 runtime */ - nat_reass_ip4_t *ip4_reass_pool; - clib_bihash_16_8_t ip4_reass_hash; - dlist_elt_t *ip4_reass_lru_list_pool; - dlist_elt_t *ip4_frags_list_pool; - u32 ip4_reass_head_index; - u16 ip4_reass_n; - clib_spinlock_t ip4_reass_lock; - - /* IPv6 runtime */ - nat_reass_ip6_t *ip6_reass_pool; - clib_bihash_48_8_t ip6_reass_hash; - dlist_elt_t *ip6_reass_lru_list_pool; - dlist_elt_t *ip6_frags_list_pool; - u32 ip6_reass_head_index; - u16 ip6_reass_n; - clib_spinlock_t ip6_reass_lock; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; -} nat_reass_main_t; - -/** - * @brief Set NAT virtual fragmentation reassembly configuration. - * - * @param timeout Reassembly timeout. - * @param max_reass Maximum number of concurrent reassemblies. - * @param max_frag Maximum number of fragmets per reassembly - * @param drop_frag If zero translate fragments, otherwise drop fragments. - * @param is_ip6 1 if IPv6, 0 if IPv4. - * - * @returns 0 on success, non-zero value otherwise. - */ -int nat_reass_set (u32 timeout, u16 max_reass, u8 max_frag, u8 drop_frag, - u8 is_ip6); - -/** - * @brief Get reassembly timeout. - * - * @param is_ip6 1 if IPv6, 0 if IPv4. - * - * @returns reassembly timeout. - */ -u32 nat_reass_get_timeout (u8 is_ip6); - -/** - * @brief Get maximum number of concurrent reassemblies. - * - * @param is_ip6 1 if IPv6, 0 if IPv4. - * - * @returns maximum number of concurrent reassemblies. - */ -u16 nat_reass_get_max_reass (u8 is_ip6); - -/** - * @brief Get maximum number of fragmets per reassembly. - * - * @param is_ip6 1 if IPv6, 0 if IPv4. - * - * @returns maximum number of fragmets per reassembly. - */ -u8 nat_reass_get_max_frag (u8 is_ip6); - -/** - * @brief Get status of virtual fragmentation reassembly. - * - * @param is_ip6 1 if IPv6, 0 if IPv4. - * - * @returns zero if translate fragments, non-zero value if drop fragments. - */ -u8 nat_reass_is_drop_frag (u8 is_ip6); - -/** - * @brief Initialize NAT virtual fragmentation reassembly. - * - * @param vm vlib main. - * - * @return error code. - */ -clib_error_t *nat_reass_init (vlib_main_t * vm); - -/** - * @brief Find reassembly. - * - * @param src Source IPv4 address. - * @param dst Destination IPv4 address. - * @param frag_id Fragment ID. - * @param proto L4 protocol. - * - * @returns Reassembly data or 0 if not found. - */ -nat_reass_ip4_t *nat_ip4_reass_find (ip4_address_t src, - ip4_address_t dst, - u16 frag_id, u8 proto); - -/** - * @brief Create reassembly. - * - * @param src Source IPv4 address. - * @param dst Destination IPv4 address. - * @param frag_id Fragment ID. - * @param proto L4 protocol. - * - * @returns Reassembly data or 0 on failure. - */ -nat_reass_ip4_t *nat_ip4_reass_create (ip4_address_t src, ip4_address_t dst, - u16 frag_id, u8 proto); - -/** - * @brief Find or create reassembly. - * - * @param src Source IPv4 address. - * @param dst Destination IPv4 address. - * @param frag_id Fragment ID. - * @param proto L4 protocol. - * @param reset_timeout If non-zero value reset timeout. - * @param bi_to_drop Fragments to drop. - * - * @returns Reassembly data or 0 on failure. - */ -nat_reass_ip4_t *nat_ip4_reass_find_or_create (ip4_address_t src, - ip4_address_t dst, - u16 frag_id, u8 proto, - u8 reset_timeout, - u32 ** bi_to_drop); - -/** - * @brief Cache fragment. - * - * @param reass Reassembly data. - * @param bi Buffer index. - * @param bi_to_drop Fragments to drop. - * - * @returns 0 on success, non-zero value otherwise. - */ -int nat_ip4_reass_add_fragment (u32 thread_index, nat_reass_ip4_t * reass, - u32 bi, u32 ** bi_to_drop); - -/** - * @brief Get cached fragments. - * - * @param reass Reassembly data. - * @param bi Vector of buffer indexes. - */ -void nat_ip4_reass_get_frags (nat_reass_ip4_t * reass, u32 ** bi); - -/** - * @breif Call back function when walking IPv4 reassemblies, non-zero return - * value stop walk. - */ -typedef int (*nat_ip4_reass_walk_fn_t) (nat_reass_ip4_t * reass, void *ctx); - -/** - * @brief Walk IPv4 reassemblies. - * - * @param fn The function to invoke on each entry visited. - * @param ctx A context passed in the visit function. - */ -void nat_ip4_reass_walk (nat_ip4_reass_walk_fn_t fn, void *ctx); - -/** - * @brief Find or create reassembly. - * - * @param src Source IPv6 address. - * @param dst Destination IPv6 address. - * @param frag_id Fragment ID. - * @param proto L4 protocol. - * @param reset_timeout If non-zero value reset timeout. - * @param bi_to_drop Fragments to drop. - * - * @returns Reassembly data or 0 on failure. - */ -nat_reass_ip6_t *nat_ip6_reass_find_or_create (ip6_address_t src, - ip6_address_t dst, - u32 frag_id, u8 proto, - u8 reset_timeout, - u32 ** bi_to_drop); -/** - * @brief Cache fragment. - * - * @param reass Reassembly data. - * @param bi Buffer index. - * @param bi_to_drop Fragments to drop. - * - * @returns 0 on success, non-zero value otherwise. - */ -int nat_ip6_reass_add_fragment (u32 thread_index, nat_reass_ip6_t * reass, - u32 bi, u32 ** bi_to_drop); - -/** - * @brief Get cached fragments. - * - * @param reass Reassembly data. - * @param bi Vector of buffer indexes. - */ -void nat_ip6_reass_get_frags (nat_reass_ip6_t * reass, u32 ** bi); - -/** - * @breif Call back function when walking IPv6 reassemblies, non-zero return - * value stop walk. - */ -typedef int (*nat_ip6_reass_walk_fn_t) (nat_reass_ip6_t * reass, void *ctx); - -/** - * @brief Walk IPv6 reassemblies. - * - * @param fn The function to invoke on each entry visited. - * @param ctx A context passed in the visit function. - */ -void nat_ip6_reass_walk (nat_ip6_reass_walk_fn_t fn, void *ctx); - -#endif /* __included_nat_reass_h__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 6ee126658c8..e9ca88f1d68 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -108,7 +107,6 @@ typedef enum SNAT_OUT2IN_NEXT_DROP, SNAT_OUT2IN_NEXT_LOOKUP, SNAT_OUT2IN_NEXT_ICMP_ERROR, - SNAT_OUT2IN_NEXT_REASS, SNAT_OUT2IN_N_NEXT, } snat_out2in_next_t; @@ -267,7 +265,7 @@ create_session_for_static_mapping (snat_main_t * sm, #ifndef CLIB_MARCH_VARIANT static_always_inline - snat_out2in_error_t icmp_get_key (ip4_header_t * ip0, + snat_out2in_error_t icmp_get_key (vlib_buffer_t * b, ip4_header_t * ip0, snat_session_key_t * p_key0) { icmp46_header_t *icmp0; @@ -280,11 +278,12 @@ static_always_inline icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *) (icmp0 + 1); - if (!icmp_is_error_message (icmp0)) + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) { key0.protocol = SNAT_PROTOCOL_ICMP; key0.addr = ip0->dst_address; - key0.port = echo0->identifier; + key0.port = vnet_buffer (b)->ip.reass.l4_src_port; // TODO should this be dst port? } else { @@ -332,7 +331,6 @@ icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node, snat_session_key_t * p_value, u8 * p_dont_translate, void *d, void *e) { - icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; snat_session_key_t key0; @@ -345,13 +343,12 @@ icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node, int err; u8 identity_nat; - icmp0 = (icmp46_header_t *) ip4_next_header (ip0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); key0.protocol = 0; - err = icmp_get_key (ip0, &key0); + err = icmp_get_key (b0, ip0, &key0); if (err != -1) { b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; @@ -390,9 +387,11 @@ icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node, } } - if (PREDICT_FALSE (icmp0->type != ICMP4_echo_reply && - (icmp0->type != ICMP4_echo_request - || !is_addr_only))) + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply + && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request || !is_addr_only))) { b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; next0 = SNAT_OUT2IN_NEXT_DROP; @@ -417,9 +416,13 @@ icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node, } else { - if (PREDICT_FALSE (icmp0->type != ICMP4_echo_reply && - icmp0->type != ICMP4_echo_request && - !icmp_is_error_message (icmp0))) + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply + && vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) { b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; next0 = SNAT_OUT2IN_NEXT_DROP; @@ -462,7 +465,6 @@ icmp_match_out2in_fast (snat_main_t * sm, vlib_node_runtime_t * node, snat_session_key_t * p_value, u8 * p_dont_translate, void *d, void *e) { - icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; snat_session_key_t key0; @@ -472,11 +474,10 @@ icmp_match_out2in_fast (snat_main_t * sm, vlib_node_runtime_t * node, u32 next0 = ~0; int err; - icmp0 = (icmp46_header_t *) ip4_next_header (ip0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); - err = icmp_get_key (ip0, &key0); + err = icmp_get_key (b0, ip0, &key0); if (err != -1) { b0->error = node->errors[err]; @@ -499,9 +500,12 @@ icmp_match_out2in_fast (snat_main_t * sm, vlib_node_runtime_t * node, goto out; } - if (PREDICT_FALSE (icmp0->type != ICMP4_echo_reply && - (icmp0->type != ICMP4_echo_request || !is_addr_only) && - !icmp_is_error_message (icmp0))) + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_reply + && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request || !is_addr_only) + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) { b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; next0 = SNAT_OUT2IN_NEXT_DROP; @@ -575,72 +579,78 @@ icmp_out2in (snat_main_t * sm, dst_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); - if (icmp0->checksum == 0) - icmp0->checksum = 0xffff; - if (!icmp_is_error_message (icmp0)) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - new_id0 = sm0.port; - if (PREDICT_FALSE (new_id0 != echo0->identifier)) + if (icmp0->checksum == 0) + icmp0->checksum = 0xffff; + + if (!icmp_type_is_error_message (icmp0->type)) { - old_id0 = echo0->identifier; new_id0 = sm0.port; - echo0->identifier = new_id0; - - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, - identifier /* changed member */ ); - icmp0->checksum = ip_csum_fold (sum0); + if (PREDICT_FALSE (new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + new_id0 = sm0.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = + ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier /* changed member */ ); + icmp0->checksum = ip_csum_fold (sum0); + } } - } - else - { - inner_ip0 = (ip4_header_t *) (echo0 + 1); - l4_header = ip4_next_header (inner_ip0); - - if (!ip4_header_checksum_is_valid (inner_ip0)) + else { - next0 = SNAT_OUT2IN_NEXT_DROP; - goto out; - } + inner_ip0 = (ip4_header_t *) (echo0 + 1); + l4_header = ip4_next_header (inner_ip0); - old_addr0 = inner_ip0->src_address.as_u32; - inner_ip0->src_address = sm0.addr; - new_addr0 = inner_ip0->src_address.as_u32; - - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - src_address /* changed member */ ); - icmp0->checksum = ip_csum_fold (sum0); + if (!ip4_header_checksum_is_valid (inner_ip0)) + { + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } - switch (protocol) - { - case SNAT_PROTOCOL_ICMP: - inner_icmp0 = (icmp46_header_t *) l4_header; - inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); - - old_id0 = inner_echo0->identifier; - new_id0 = sm0.port; - inner_echo0->identifier = new_id0; + old_addr0 = inner_ip0->src_address.as_u32; + inner_ip0->src_address = sm0.addr; + new_addr0 = inner_ip0->src_address.as_u32; sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, - identifier); + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + src_address /* changed member */ ); icmp0->checksum = ip_csum_fold (sum0); - break; - case SNAT_PROTOCOL_UDP: - case SNAT_PROTOCOL_TCP: - old_id0 = ((tcp_udp_header_t *) l4_header)->src_port; - new_id0 = sm0.port; - ((tcp_udp_header_t *) l4_header)->src_port = new_id0; - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t, - src_port); - icmp0->checksum = ip_csum_fold (sum0); - break; - default: - ASSERT (0); + switch (protocol) + { + case SNAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t *) l4_header; + inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); + + old_id0 = inner_echo0->identifier; + new_id0 = sm0.port; + inner_echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = + ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + old_id0 = ((tcp_udp_header_t *) l4_header)->src_port; + new_id0 = sm0.port; + ((tcp_udp_header_t *) l4_header)->src_port = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t, + src_port); + icmp0->checksum = ip_csum_fold (sum0); + break; + default: + ASSERT (0); + } } } @@ -816,13 +826,6 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, goto trace0; } - if (PREDICT_FALSE (ip4_is_fragment (ip0))) - { - next0 = SNAT_OUT2IN_NEXT_REASS; - fragments++; - goto trace0; - } - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = icmp_out2in_slow_path @@ -833,7 +836,7 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, } key0.addr = ip0->dst_address; - key0.port = udp0->dst_port; + key0.port = vnet_buffer (b0)->ip.reass.l4_dst_port; key0.protocol = proto0; key0.fib_index = rx_fib_index0; @@ -851,10 +854,11 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port == - clib_host_to_net_u16 - (UDP_DST_PORT_dhcp_to_client)))) + if (PREDICT_FALSE + (proto0 == SNAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_client)))) { vnet_feature_next (&next0, b0); goto trace0; @@ -897,34 +901,41 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, dst_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); - old_port0 = udp0->dst_port; - new_port0 = udp0->dst_port = s0->in2out.port; - if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - tcp0->checksum = ip_csum_fold (sum0); - tcp_packets++; - } - else - { - if (PREDICT_FALSE (udp0->checksum)) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum0 = udp0->checksum; + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + new_port0 = udp0->dst_port = s0->in2out.port; + sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); + sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); - udp0->checksum = ip_csum_fold (sum0); + tcp0->checksum = ip_csum_fold (sum0); + } + tcp_packets++; + } + else + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + new_port0 = udp0->dst_port = s0->in2out.port; + if (PREDICT_FALSE (udp0->checksum)) + { + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ + ); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } } udp_packets++; } @@ -989,13 +1000,6 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, goto trace1; } - if (PREDICT_FALSE (ip4_is_fragment (ip1))) - { - next1 = SNAT_OUT2IN_NEXT_REASS; - fragments++; - goto trace1; - } - if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) { next1 = icmp_out2in_slow_path @@ -1006,7 +1010,7 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, } key1.addr = ip1->dst_address; - key1.port = udp1->dst_port; + key1.port = vnet_buffer (b1)->ip.reass.l4_dst_port; key1.protocol = proto1; key1.fib_index = rx_fib_index1; @@ -1024,10 +1028,11 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ - if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP - && (udp1->dst_port == - clib_host_to_net_u16 - (UDP_DST_PORT_dhcp_to_client)))) + if (PREDICT_FALSE + (proto1 == SNAT_PROTOCOL_UDP + && (vnet_buffer (b1)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_client)))) { vnet_feature_next (&next1, b1); goto trace1; @@ -1070,34 +1075,45 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, dst_address /* changed member */ ); ip1->checksum = ip_csum_fold (sum1); - old_port1 = udp1->dst_port; - new_port1 = udp1->dst_port = s1->in2out.port; - if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP)) { - sum1 = tcp1->checksum; - sum1 = ip_csum_update (sum1, old_addr1, new_addr1, - ip4_header_t, - dst_address /* changed member */ ); - - sum1 = ip_csum_update (sum1, old_port1, new_port1, - ip4_header_t /* cheat */ , - length /* changed member */ ); - tcp1->checksum = ip_csum_fold (sum1); - tcp_packets++; - } - else - { - if (PREDICT_FALSE (udp1->checksum)) + if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) { - sum1 = udp1->checksum; + old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port; + new_port1 = udp1->dst_port = s1->in2out.port; + + sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address /* changed member */ ); + sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t /* cheat */ , length /* changed member */ ); - udp1->checksum = ip_csum_fold (sum1); + tcp1->checksum = ip_csum_fold (sum1); + } + tcp_packets++; + } + else + { + if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) + { + old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port; + new_port1 = udp1->dst_port = s1->in2out.port; + if (PREDICT_FALSE (udp1->checksum)) + { + + sum1 = udp1->checksum; + sum1 = + ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */ ); + sum1 = + ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp1->checksum = ip_csum_fold (sum1); + } } udp_packets++; } @@ -1199,13 +1215,6 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, goto trace00; } - if (PREDICT_FALSE (ip4_is_fragment (ip0))) - { - next0 = SNAT_OUT2IN_NEXT_REASS; - fragments++; - goto trace00; - } - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = icmp_out2in_slow_path @@ -1216,7 +1225,7 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, } key0.addr = ip0->dst_address; - key0.port = udp0->dst_port; + key0.port = vnet_buffer (b0)->ip.reass.l4_dst_port; key0.protocol = proto0; key0.fib_index = rx_fib_index0; @@ -1234,10 +1243,11 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port == - clib_host_to_net_u16 - (UDP_DST_PORT_dhcp_to_client)))) + if (PREDICT_FALSE + (proto0 == SNAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_client)))) { vnet_feature_next (&next0, b0); goto trace00; @@ -1280,34 +1290,42 @@ VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, dst_address /* changed member */ ); ip0->checksum = ip_csum_fold (sum0); - old_port0 = udp0->dst_port; - new_port0 = udp0->dst_port = s0->in2out.port; - if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - tcp0->checksum = ip_csum_fold (sum0); - tcp_packets++; - } - else - { - if (PREDICT_FALSE (udp0->checksum)) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum0 = udp0->checksum; + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + new_port0 = udp0->dst_port = s0->in2out.port; + + sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ ); + sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */ , length /* changed member */ ); - udp0->checksum = ip_csum_fold (sum0); + tcp0->checksum = ip_csum_fold (sum0); + } + tcp_packets++; + } + else + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + new_port0 = udp0->dst_port = s0->in2out.port; + if (PREDICT_FALSE (udp0->checksum)) + { + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ + ); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } } udp_packets++; } @@ -1381,344 +1399,6 @@ VLIB_REGISTER_NODE (snat_out2in_node) = { [SNAT_OUT2IN_NEXT_DROP] = "error-drop", [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass", - }, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FN (nat44_out2in_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 n_left_from, *from, *to_next; - snat_out2in_next_t next_index; - u32 pkts_processed = 0, cached_fragments = 0; - snat_main_t *sm = &snat_main; - f64 now = vlib_time_now (vm); - u32 thread_index = vm->thread_index; - snat_main_per_thread_data_t *per_thread_data = - &sm->per_thread_data[thread_index]; - u32 *fragments_to_drop = 0; - u32 *fragments_to_loopback = 0; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0; - vlib_buffer_t *b0; - u32 next0; - u8 cached0 = 0; - ip4_header_t *ip0; - nat_reass_ip4_t *reass0; - udp_header_t *udp0; - tcp_header_t *tcp0; - icmp46_header_t *icmp0; - snat_session_key_t key0, sm0; - clib_bihash_kv_8_8_t kv0, value0; - snat_session_t *s0 = 0; - u16 old_port0, new_port0; - ip_csum_t sum0; - u8 identity_nat0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - next0 = SNAT_OUT2IN_NEXT_LOOKUP; - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - rx_fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, - sw_if_index0); - - if (PREDICT_FALSE (nat_reass_is_drop_frag (0))) - { - next0 = SNAT_OUT2IN_NEXT_DROP; - b0->error = node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - ip0 = (ip4_header_t *) vlib_buffer_get_current (b0); - udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; - icmp0 = (icmp46_header_t *) udp0; - proto0 = ip_proto_to_snat_proto (ip0->protocol); - - reass0 = nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, - 1, &fragments_to_drop); - - if (PREDICT_FALSE (!reass0)) - { - next0 = SNAT_OUT2IN_NEXT_DROP; - b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto trace0; - } - - if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) - { - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) - { - next0 = icmp_out2in_slow_path - (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, thread_index, &s0); - - if (PREDICT_TRUE (next0 != SNAT_OUT2IN_NEXT_DROP)) - { - if (s0) - reass0->sess_index = s0 - per_thread_data->sessions; - else - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - reass0->thread_index = thread_index; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - } - - goto trace0; - } - - key0.addr = ip0->dst_address; - key0.port = udp0->dst_port; - key0.protocol = proto0; - key0.fib_index = rx_fib_index0; - kv0.key = key0.as_u64; - - if (clib_bihash_search_8_8 - (&per_thread_data->out2in, &kv0, &value0)) - { - /* Try to match static mapping by external address and port, - destination address and port in packet */ - if (snat_static_mapping_match - (sm, key0, &sm0, 1, 0, 0, 0, 0, &identity_nat0)) - { - /* - * Send DHCP packets to the ipv4 stack, or we won't - * be able to use dhcp client on the outside interface - */ - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port - == - clib_host_to_net_u16 - (UDP_DST_PORT_dhcp_to_client)))) - { - vnet_feature_next (&next0, b0); - goto trace0; - } - - if (!sm->forwarding_enabled) - { - b0->error = - node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; - next0 = SNAT_OUT2IN_NEXT_DROP; - } - else - { - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - } - goto trace0; - } - - if (PREDICT_FALSE (identity_nat0)) - goto trace0; - - /* Create session initiated by host from external network */ - s0 = - create_session_for_static_mapping (sm, b0, sm0, key0, - node, thread_index, - now); - if (!s0) - { - b0->error = - node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; - next0 = SNAT_OUT2IN_NEXT_DROP; - goto trace0; - } - reass0->sess_index = s0 - per_thread_data->sessions; - reass0->thread_index = thread_index; - } - else - { - s0 = pool_elt_at_index (per_thread_data->sessions, - value0.value); - reass0->sess_index = value0.value; - } - nat_ip4_reass_get_frags (reass0, &fragments_to_loopback); - } - else - { - if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE) - goto trace0; - if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) - { - if (nat_ip4_reass_add_fragment - (thread_index, reass0, bi0, &fragments_to_drop)) - { - b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_FRAG]; - nat_elog_notice - ("maximum fragments per reassembly exceeded"); - next0 = SNAT_OUT2IN_NEXT_DROP; - goto trace0; - } - cached0 = 1; - goto trace0; - } - s0 = pool_elt_at_index (per_thread_data->sessions, - reass0->sess_index); - } - - old_addr0 = ip0->dst_address.as_u32; - ip0->dst_address = s0->in2out.addr; - new_addr0 = ip0->dst_address.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - ip0->checksum = ip_csum_fold (sum0); - - if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) - { - old_port0 = udp0->dst_port; - new_port0 = udp0->dst_port = s0->in2out.port; - - if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) - { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - tcp0->checksum = ip_csum_fold (sum0); - } - else if (udp0->checksum) - { - sum0 = udp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - udp0->checksum = ip_csum_fold (sum0); - } - } - - /* Accounting */ - nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain (vm, b0), - thread_index); - /* Per-user LRU list maintenance */ - nat44_session_update_lru (sm, s0, thread_index); - - trace0: - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - nat44_reass_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->cached = cached0; - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - - if (cached0) - { - n_left_to_next++; - to_next--; - cached_fragments++; - } - else - { - pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy_fast (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy_fast (from, fragments_to_loopback + - (len - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, sm->out2in_reass_node_index, - SNAT_OUT2IN_ERROR_PROCESSED_FRAGMENTS, - pkts_processed); - vlib_node_increment_counter (vm, sm->out2in_reass_node_index, - SNAT_OUT2IN_ERROR_CACHED_FRAGMENTS, - cached_fragments); - - nat_send_all_to_node (vm, fragments_to_drop, node, - &node->errors[SNAT_OUT2IN_ERROR_DROP_FRAGMENT], - SNAT_OUT2IN_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); - return frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_out2in_reass_node) = { - .name = "nat44-out2in-reass", - .vector_size = sizeof (u32), - .format_trace = format_nat44_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(snat_out2in_error_strings), - .error_strings = snat_out2in_error_strings, - - .n_next_nodes = SNAT_OUT2IN_N_NEXT, - - /* edit / add dispositions here */ - .next_nodes = { - [SNAT_OUT2IN_NEXT_DROP] = "error-drop", - [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", - [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass", }, }; /* *INDENT-ON* */ @@ -1919,7 +1599,6 @@ VLIB_REGISTER_NODE (snat_out2in_fast_node) = { [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", [SNAT_OUT2IN_NEXT_DROP] = "error-drop", [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [SNAT_OUT2IN_NEXT_REASS] = "nat44-out2in-reass", }, }; /* *INDENT-ON* */ diff --git a/src/plugins/nat/out2in_ed.c b/src/plugins/nat/out2in_ed.c index cb1cbdad375..ee2f85aa080 100644 --- a/src/plugins/nat/out2in_ed.c +++ b/src/plugins/nat/out2in_ed.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -326,8 +325,8 @@ next_src_nat (snat_main_t * sm, ip4_header_t * ip, u8 proto, u16 src_port, } static void -create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index, - u32 thread_index) +create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, + u32 rx_fib_index, u32 thread_index) { nat_ed_ses_key_t key; clib_bihash_kv_16_8_t kv, value; @@ -339,7 +338,7 @@ create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index, if (ip->protocol == IP_PROTOCOL_ICMP) { - if (get_icmp_o2i_ed_key (ip, &key)) + if (get_icmp_o2i_ed_key (b, ip, &key)) return; } else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) @@ -414,7 +413,8 @@ create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index, if (ip->protocol == IP_PROTOCOL_TCP) { tcp_header_t *tcp = ip4_next_header (ip); - if (nat44_set_tcp_session_state_o2i (sm, s, tcp, thread_index)) + if (nat44_set_tcp_session_state_o2i + (sm, s, tcp->flags, tcp->ack_number, tcp->seq_number, thread_index)) return; } @@ -425,15 +425,15 @@ create_bypass_for_fwd (snat_main_t * sm, ip4_header_t * ip, u32 rx_fib_index, } static inline void -create_bypass_for_fwd_worker (snat_main_t * sm, ip4_header_t * ip, - u32 rx_fib_index) +create_bypass_for_fwd_worker (snat_main_t * sm, vlib_buffer_t * b, + ip4_header_t * ip, u32 rx_fib_index) { ip4_header_t ip_wkr = { .src_address = ip->dst_address, }; u32 thread_index = sm->worker_in2out_cb (&ip_wkr, rx_fib_index, 0); - create_bypass_for_fwd (sm, ip, rx_fib_index, thread_index); + create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index); } #ifndef CLIB_MARCH_VARIANT @@ -444,7 +444,6 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node, u8 * p_dont_translate, void *d, void *e) { u32 next = ~0, sw_if_index, rx_fib_index; - icmp46_header_t *icmp; nat_ed_ses_key_t key; clib_bihash_kv_16_8_t kv, value; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; @@ -452,11 +451,10 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node, u8 dont_translate = 0, is_addr_only, identity_nat; snat_session_key_t e_key, l_key; - icmp = (icmp46_header_t *) ip4_next_header (ip); sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); - if (get_icmp_o2i_ed_key (ip, &key)) + if (get_icmp_o2i_ed_key (b, ip, &key)) { b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL]; next = NAT_NEXT_DROP; @@ -499,15 +497,18 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node, goto out; } if (sm->num_workers > 1) - create_bypass_for_fwd_worker (sm, ip, rx_fib_index); + create_bypass_for_fwd_worker (sm, b, ip, rx_fib_index); else - create_bypass_for_fwd (sm, ip, rx_fib_index, thread_index); + create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index); goto out; } } - if (PREDICT_FALSE (icmp->type != ICMP4_echo_reply && - (icmp->type != ICMP4_echo_request || !is_addr_only))) + if (PREDICT_FALSE + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply + && (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request || !is_addr_only))) { b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; next = NAT_NEXT_DROP; @@ -534,9 +535,13 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node, } else { - if (PREDICT_FALSE (icmp->type != ICMP4_echo_reply && - icmp->type != ICMP4_echo_request && - !icmp_is_error_message (icmp))) + if (PREDICT_FALSE + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply + && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request + && !icmp_type_is_error_message (vnet_buffer (b)->ip. + reass.icmp_type_or_tcp_flags))) { b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; next = NAT_NEXT_DROP; @@ -795,13 +800,6 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, goto trace00; } - if (ip4_is_fragment (ip0)) - { - next0 = NAT_NEXT_OUT2IN_ED_REASS; - fragments++; - goto trace00; - } - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = NAT_NEXT_OUT2IN_ED_SLOW_PATH; @@ -810,8 +808,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, } make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, - ip0->protocol, rx_fib_index0, udp0->dst_port, - udp0->src_port); + ip0->protocol, rx_fib_index0, + vnet_buffer (b0)->ip.reass.l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_src_port); if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0)) { @@ -820,7 +819,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, /* Try to match static mapping by external address and port, destination address and port in packet */ e_key0.addr = ip0->dst_address; - e_key0.port = udp0->dst_port; + e_key0.port = vnet_buffer (b0)->ip.reass.l4_dst_port; e_key0.protocol = proto0; e_key0.fib_index = rx_fib_index0; if (snat_static_mapping_match (sm, e_key0, &l_key0, 1, 0, @@ -832,10 +831,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port == - clib_host_to_net_u16 - (UDP_DST_PORT_dhcp_to_client)))) + if (PREDICT_FALSE + (proto0 == SNAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_client)))) { goto trace00; } @@ -848,18 +848,20 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, } else { - if (next_src_nat (sm, ip0, ip0->protocol, - udp0->src_port, udp0->dst_port, - thread_index, rx_fib_index0)) + if (next_src_nat + (sm, ip0, ip0->protocol, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + thread_index, rx_fib_index0)) { next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH; goto trace00; } if (sm->num_workers > 1) - create_bypass_for_fwd_worker (sm, ip0, + create_bypass_for_fwd_worker (sm, b0, ip0, rx_fib_index0); else - create_bypass_for_fwd (sm, ip0, rx_fib_index0, + create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0, thread_index); } goto trace00; @@ -868,7 +870,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (identity_nat0)) goto trace00; - if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) + if ((proto0 == SNAT_PROTOCOL_TCP) + && !tcp_flags_is_init (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags)) { b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN]; next0 = NAT_NEXT_DROP; @@ -912,35 +916,47 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, src_address); ip0->checksum = ip_csum_fold (sum0); - old_port0 = udp0->dst_port; - new_port0 = udp0->dst_port = s0->in2out.port; + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (is_twice_nat_session (s0)) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, tcp0->src_port, - s0->ext_host_nat_port, ip4_header_t, - length); - tcp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; + new_port0 = udp0->dst_port = s0->in2out.port; + sum0 = tcp0->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, + length); + if (is_twice_nat_session (s0)) + { + sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, + s0->ext_host_nat_addr.as_u32, + ip4_header_t, dst_address); + sum0 = + ip_csum_update (sum0, + vnet_buffer (b0)->ip. + reass.l4_src_port, + s0->ext_host_nat_port, ip4_header_t, + length); + tcp0->src_port = s0->ext_host_nat_port; + ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; + } + tcp0->checksum = ip_csum_fold (sum0); } - tcp0->checksum = ip_csum_fold (sum0); tcp_packets++; if (nat44_set_tcp_session_state_o2i - (sm, s0, tcp0, thread_index)) + (sm, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags, + vnet_buffer (b0)->ip.reass.tcp_ack_number, + vnet_buffer (b0)->ip.reass.tcp_seq_number, thread_index)) goto trace00; } - else if (udp0->checksum) + else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment + && udp0->checksum) { + new_port0 = udp0->dst_port = s0->in2out.port; sum0 = udp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address); @@ -951,9 +967,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, s0->ext_host_nat_addr.as_u32, ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, udp0->src_port, - s0->ext_host_nat_port, ip4_header_t, - length); + sum0 = + ip_csum_update (sum0, + vnet_buffer (b0)->ip.reass.l4_src_port, + s0->ext_host_nat_port, ip4_header_t, + length); udp0->src_port = s0->ext_host_nat_port; ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; } @@ -962,10 +980,20 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, } else { - if (PREDICT_FALSE (is_twice_nat_session (s0))) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; + new_port0 = udp0->dst_port = s0->in2out.port; + if (PREDICT_FALSE (is_twice_nat_session (s0))) + { + udp0->dst_port = s0->in2out.port; + if (is_twice_nat_session (s0)) + { + udp0->src_port = s0->ext_host_nat_port; + ip0->src_address.as_u32 = + s0->ext_host_nat_addr.as_u32; + } + udp0->checksum = 0; + } } udp_packets++; } @@ -1050,13 +1078,6 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, goto trace01; } - if (ip4_is_fragment (ip1)) - { - next1 = NAT_NEXT_OUT2IN_ED_REASS; - fragments++; - goto trace01; - } - if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) { next1 = NAT_NEXT_OUT2IN_ED_SLOW_PATH; @@ -1065,8 +1086,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, } make_ed_kv (&kv1, &ip1->dst_address, &ip1->src_address, - ip1->protocol, rx_fib_index1, udp1->dst_port, - udp1->src_port); + ip1->protocol, rx_fib_index1, + vnet_buffer (b1)->ip.reass.l4_dst_port, + vnet_buffer (b1)->ip.reass.l4_src_port); if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv1, &value1)) { @@ -1075,7 +1097,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, /* Try to match static mapping by external address and port, destination address and port in packet */ e_key1.addr = ip1->dst_address; - e_key1.port = udp1->dst_port; + e_key1.port = vnet_buffer (b1)->ip.reass.l4_dst_port; e_key1.protocol = proto1; e_key1.fib_index = rx_fib_index1; if (snat_static_mapping_match (sm, e_key1, &l_key1, 1, 0, @@ -1087,10 +1109,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ - if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_UDP - && (udp1->dst_port == - clib_host_to_net_u16 - (UDP_DST_PORT_dhcp_to_client)))) + if (PREDICT_FALSE + (proto1 == SNAT_PROTOCOL_UDP + && (vnet_buffer (b1)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_client)))) { goto trace01; } @@ -1103,18 +1126,20 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, } else { - if (next_src_nat (sm, ip1, ip1->protocol, - udp1->src_port, udp1->dst_port, - thread_index, rx_fib_index1)) + if (next_src_nat + (sm, ip1, ip1->protocol, + vnet_buffer (b1)->ip.reass.l4_src_port, + vnet_buffer (b1)->ip.reass.l4_dst_port, + thread_index, rx_fib_index1)) { next1 = NAT_NEXT_IN2OUT_ED_FAST_PATH; goto trace01; } if (sm->num_workers > 1) - create_bypass_for_fwd_worker (sm, ip1, + create_bypass_for_fwd_worker (sm, b1, ip1, rx_fib_index1); else - create_bypass_for_fwd (sm, ip1, rx_fib_index1, + create_bypass_for_fwd (sm, b1, ip1, rx_fib_index1, thread_index); } goto trace01; @@ -1123,7 +1148,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (identity_nat1)) goto trace01; - if ((proto1 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp1)) + if ((proto1 == SNAT_PROTOCOL_TCP) + && !tcp_flags_is_init (vnet_buffer (b1)->ip. + reass.icmp_type_or_tcp_flags)) { b1->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN]; next1 = NAT_NEXT_DROP; @@ -1167,35 +1194,48 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, src_address); ip1->checksum = ip_csum_fold (sum1); - old_port1 = udp1->dst_port; - new_port1 = udp1->dst_port = s1->in2out.port; + old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port; if (PREDICT_TRUE (proto1 == SNAT_PROTOCOL_TCP)) { - sum1 = tcp1->checksum; - sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, - dst_address); - sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t, - length); - if (is_twice_nat_session (s1)) + if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) { - sum1 = ip_csum_update (sum1, ip1->src_address.as_u32, - s1->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum1 = ip_csum_update (sum1, tcp1->src_port, - s1->ext_host_nat_port, ip4_header_t, - length); - tcp1->src_port = s1->ext_host_nat_port; - ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32; + new_port1 = udp1->dst_port = s1->in2out.port; + + sum1 = tcp1->checksum; + sum1 = + ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, + dst_address); + sum1 = + ip_csum_update (sum1, old_port1, new_port1, ip4_header_t, + length); + if (is_twice_nat_session (s1)) + { + sum1 = ip_csum_update (sum1, ip1->src_address.as_u32, + s1->ext_host_nat_addr.as_u32, + ip4_header_t, dst_address); + sum1 = + ip_csum_update (sum1, + vnet_buffer (b1)->ip. + reass.l4_src_port, + s1->ext_host_nat_port, ip4_header_t, + length); + tcp1->src_port = s1->ext_host_nat_port; + ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32; + } + tcp1->checksum = ip_csum_fold (sum1); } - tcp1->checksum = ip_csum_fold (sum1); tcp_packets++; if (nat44_set_tcp_session_state_o2i - (sm, s1, tcp1, thread_index)) + (sm, s1, vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags, + vnet_buffer (b1)->ip.reass.tcp_ack_number, + vnet_buffer (b1)->ip.reass.tcp_seq_number, thread_index)) goto trace01; } - else if (udp1->checksum) + else if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment + && udp1->checksum) { + new_port1 = udp1->dst_port = s1->in2out.port; sum1 = udp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address); @@ -1206,9 +1246,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, sum1 = ip_csum_update (sum1, ip1->src_address.as_u32, s1->ext_host_nat_addr.as_u32, ip4_header_t, dst_address); - sum1 = ip_csum_update (sum1, udp1->src_port, - s1->ext_host_nat_port, ip4_header_t, - length); + sum1 = + ip_csum_update (sum1, + vnet_buffer (b1)->ip.reass.l4_src_port, + s1->ext_host_nat_port, ip4_header_t, + length); udp1->src_port = s1->ext_host_nat_port; ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32; } @@ -1217,10 +1259,19 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, } else { - if (PREDICT_FALSE (is_twice_nat_session (s1))) + if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) { - udp1->src_port = s1->ext_host_nat_port; - ip1->src_address.as_u32 = s1->ext_host_nat_addr.as_u32; + if (PREDICT_FALSE (is_twice_nat_session (s1))) + { + udp1->dst_port = s1->in2out.port; + if (is_twice_nat_session (s1)) + { + udp1->src_port = s1->ext_host_nat_port; + ip1->src_address.as_u32 = + s1->ext_host_nat_addr.as_u32; + } + udp1->checksum = 0; + } } udp_packets++; } @@ -1341,13 +1392,6 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, goto trace0; } - if (ip4_is_fragment (ip0)) - { - next0 = NAT_NEXT_OUT2IN_ED_REASS; - fragments++; - goto trace0; - } - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = NAT_NEXT_OUT2IN_ED_SLOW_PATH; @@ -1356,8 +1400,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, } make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, - ip0->protocol, rx_fib_index0, udp0->dst_port, - udp0->src_port); + ip0->protocol, rx_fib_index0, + vnet_buffer (b0)->ip.reass.l4_dst_port, + vnet_buffer (b0)->ip.reass.l4_src_port); if (clib_bihash_search_16_8 (&tsm->out2in_ed, &kv0, &value0)) { @@ -1366,7 +1411,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, /* Try to match static mapping by external address and port, destination address and port in packet */ e_key0.addr = ip0->dst_address; - e_key0.port = udp0->dst_port; + e_key0.port = vnet_buffer (b0)->ip.reass.l4_dst_port; e_key0.protocol = proto0; e_key0.fib_index = rx_fib_index0; if (snat_static_mapping_match (sm, e_key0, &l_key0, 1, 0, @@ -1378,10 +1423,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, * Send DHCP packets to the ipv4 stack, or we won't * be able to use dhcp client on the outside interface */ - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port == - clib_host_to_net_u16 - (UDP_DST_PORT_dhcp_to_client)))) + if (PREDICT_FALSE + (proto0 == SNAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_client)))) { goto trace0; } @@ -1394,18 +1440,20 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, } else { - if (next_src_nat (sm, ip0, ip0->protocol, - udp0->src_port, udp0->dst_port, - thread_index, rx_fib_index0)) + if (next_src_nat + (sm, ip0, ip0->protocol, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + thread_index, rx_fib_index0)) { next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH; goto trace0; } if (sm->num_workers > 1) - create_bypass_for_fwd_worker (sm, ip0, + create_bypass_for_fwd_worker (sm, b0, ip0, rx_fib_index0); else - create_bypass_for_fwd (sm, ip0, rx_fib_index0, + create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0, thread_index); } goto trace0; @@ -1414,7 +1462,9 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (identity_nat0)) goto trace0; - if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) + if ((proto0 == SNAT_PROTOCOL_TCP) + && !tcp_flags_is_init (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags)) { b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN]; next0 = NAT_NEXT_DROP; @@ -1458,35 +1508,47 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, src_address); ip0->checksum = ip_csum_fold (sum0); - old_port0 = udp0->dst_port; - new_port0 = udp0->dst_port = s0->in2out.port; + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (is_twice_nat_session (s0)) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, tcp0->src_port, - s0->ext_host_nat_port, ip4_header_t, - length); - tcp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; + new_port0 = udp0->dst_port = s0->in2out.port; + sum0 = tcp0->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, + length); + if (is_twice_nat_session (s0)) + { + sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, + s0->ext_host_nat_addr.as_u32, + ip4_header_t, dst_address); + sum0 = + ip_csum_update (sum0, + vnet_buffer (b0)->ip. + reass.l4_src_port, + s0->ext_host_nat_port, ip4_header_t, + length); + tcp0->src_port = s0->ext_host_nat_port; + ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; + } + tcp0->checksum = ip_csum_fold (sum0); } - tcp0->checksum = ip_csum_fold (sum0); tcp_packets++; if (nat44_set_tcp_session_state_o2i - (sm, s0, tcp0, thread_index)) + (sm, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags, + vnet_buffer (b0)->ip.reass.tcp_ack_number, + vnet_buffer (b0)->ip.reass.tcp_seq_number, thread_index)) goto trace0; } - else if (udp0->checksum) + else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment + && udp0->checksum) { + new_port0 = udp0->dst_port = s0->in2out.port; sum0 = udp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address); @@ -1497,9 +1559,11 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, s0->ext_host_nat_addr.as_u32, ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, udp0->src_port, - s0->ext_host_nat_port, ip4_header_t, - length); + sum0 = + ip_csum_update (sum0, + vnet_buffer (b0)->ip.reass.l4_src_port, + s0->ext_host_nat_port, ip4_header_t, + length); udp0->src_port = s0->ext_host_nat_port; ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; } @@ -1508,10 +1572,14 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, } else { - if (PREDICT_FALSE (is_twice_nat_session (s0))) + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) { - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; + new_port0 = udp0->dst_port = s0->in2out.port; + if (PREDICT_FALSE (is_twice_nat_session (s0))) + { + udp0->src_port = s0->ext_host_nat_port; + ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; + } } udp_packets++; } @@ -1565,380 +1633,6 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm, return frame->n_vectors; } -static inline uword -nat44_ed_out2in_reass_node_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 n_left_from, *from, *to_next; - nat_next_t next_index; - u32 pkts_processed = 0; - snat_main_t *sm = &snat_main; - f64 now = vlib_time_now (vm); - u32 thread_index = vm->thread_index; - snat_main_per_thread_data_t *per_thread_data = - &sm->per_thread_data[thread_index]; - u32 *fragments_to_drop = 0; - u32 *fragments_to_loopback = 0; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0; - vlib_buffer_t *b0; - u32 next0; - u8 cached0 = 0; - ip4_header_t *ip0; - nat_reass_ip4_t *reass0; - udp_header_t *udp0; - tcp_header_t *tcp0; - icmp46_header_t *icmp0; - clib_bihash_kv_16_8_t kv0, value0; - snat_session_t *s0 = 0; - u16 old_port0, new_port0; - ip_csum_t sum0; - snat_session_key_t e_key0, l_key0; - lb_nat_type_t lb0; - twice_nat_type_t twice_nat0; - u8 identity_nat0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - next0 = nat_buffer_opaque (b0)->arc_next; - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - rx_fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, - sw_if_index0); - - if (PREDICT_FALSE (nat_reass_is_drop_frag (0))) - { - next0 = NAT_NEXT_DROP; - b0->error = node->errors[NAT_OUT2IN_ED_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - ip0 = (ip4_header_t *) vlib_buffer_get_current (b0); - udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; - icmp0 = (icmp46_header_t *) udp0; - proto0 = ip_proto_to_snat_proto (ip0->protocol); - - reass0 = nat_ip4_reass_find_or_create (ip0->src_address, - ip0->dst_address, - ip0->fragment_id, - ip0->protocol, - 1, &fragments_to_drop); - - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT_NEXT_DROP; - b0->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_REASS]; - nat_elog_notice ("maximum reassemblies exceeded"); - goto trace0; - } - - if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) - { - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) - { - next0 = icmp_out2in_ed_slow_path - (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, thread_index, &s0); - - if (PREDICT_TRUE (next0 != NAT_NEXT_DROP)) - { - if (s0) - reass0->sess_index = s0 - per_thread_data->sessions; - else - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - reass0->thread_index = thread_index; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - } - - goto trace0; - } - - make_ed_kv (&kv0, &ip0->dst_address, &ip0->src_address, - ip0->protocol, rx_fib_index0, udp0->dst_port, - udp0->src_port); - - if (clib_bihash_search_16_8 - (&per_thread_data->out2in_ed, &kv0, &value0)) - { - /* Try to match static mapping by external address and port, - destination address and port in packet */ - e_key0.addr = ip0->dst_address; - e_key0.port = udp0->dst_port; - e_key0.protocol = proto0; - e_key0.fib_index = rx_fib_index0; - if (snat_static_mapping_match (sm, e_key0, &l_key0, 1, 0, - &twice_nat0, &lb0, 0, - &identity_nat0)) - { - /* - * Send DHCP packets to the ipv4 stack, or we won't - * be able to use dhcp client on the outside interface - */ - if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_UDP - && (udp0->dst_port - == - clib_host_to_net_u16 - (UDP_DST_PORT_dhcp_to_client)))) - { - goto trace0; - } - - if (!sm->forwarding_enabled) - { - b0->error = - node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; - next0 = NAT_NEXT_DROP; - } - else - { - if (next_src_nat (sm, ip0, ip0->protocol, - udp0->src_port, udp0->dst_port, - thread_index, rx_fib_index0)) - { - next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH; - goto trace0; - } - if (sm->num_workers > 1) - create_bypass_for_fwd_worker (sm, ip0, - rx_fib_index0); - else - create_bypass_for_fwd (sm, ip0, rx_fib_index0, - thread_index); - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - nat_ip4_reass_get_frags (reass0, - &fragments_to_loopback); - } - goto trace0; - } - - if (PREDICT_FALSE (identity_nat0)) - { - reass0->flags |= NAT_REASS_FLAG_ED_DONT_TRANSLATE; - goto trace0; - } - - if ((proto0 == SNAT_PROTOCOL_TCP) && !tcp_is_init (tcp0)) - { - b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN]; - next0 = NAT_NEXT_DROP; - goto trace0; - } - - /* Create session initiated by host from external network */ - s0 = create_session_for_static_mapping_ed (sm, b0, l_key0, - e_key0, node, - thread_index, - twice_nat0, lb0, - now); - if (!s0) - { - b0->error = - node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; - next0 = NAT_NEXT_DROP; - goto trace0; - } - reass0->sess_index = s0 - per_thread_data->sessions; - reass0->thread_index = thread_index; - } - else - { - s0 = pool_elt_at_index (per_thread_data->sessions, - value0.value); - reass0->sess_index = value0.value; - } - nat_ip4_reass_get_frags (reass0, &fragments_to_loopback); - } - else - { - if (reass0->flags & NAT_REASS_FLAG_ED_DONT_TRANSLATE) - goto trace0; - if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) - { - if (nat_ip4_reass_add_fragment - (thread_index, reass0, bi0, &fragments_to_drop)) - { - b0->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_FRAG]; - nat_elog_notice - ("maximum fragments per reassembly exceeded"); - next0 = NAT_NEXT_DROP; - goto trace0; - } - cached0 = 1; - goto trace0; - } - s0 = pool_elt_at_index (per_thread_data->sessions, - reass0->sess_index); - } - - old_addr0 = ip0->dst_address.as_u32; - ip0->dst_address = s0->in2out.addr; - new_addr0 = ip0->dst_address.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, ip4_header_t, - src_address); - ip0->checksum = ip_csum_fold (sum0); - - if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) - { - old_port0 = udp0->dst_port; - new_port0 = udp0->dst_port = s0->in2out.port; - - if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) - { - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - dst_address /* changed member */ ); - - sum0 = ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t /* cheat */ , - length /* changed member */ ); - if (is_twice_nat_session (s0)) - { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, tcp0->src_port, - s0->ext_host_nat_port, - ip4_header_t, length); - tcp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - tcp0->checksum = ip_csum_fold (sum0); - } - else if (udp0->checksum) - { - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, udp0->src_port, - s0->ext_host_nat_port, - ip4_header_t, length); - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); - } - else - { - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - } - } - - /* Accounting */ - nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain (vm, b0), - thread_index); - /* Per-user LRU list maintenance */ - nat44_session_update_lru (sm, s0, thread_index); - - trace0: - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - nat44_reass_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->cached = cached0; - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - - if (cached0) - { - n_left_to_next++; - to_next--; - } - else - { - pkts_processed += next0 != NAT_NEXT_DROP; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy_fast (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy_fast (from, fragments_to_loopback + - (len - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, sm->ed_out2in_reass_node_index, - NAT_OUT2IN_ED_ERROR_OUT2IN_PACKETS, - pkts_processed); - - nat_send_all_to_node (vm, fragments_to_drop, node, - &node->errors[NAT_OUT2IN_ED_ERROR_DROP_FRAGMENT], - NAT_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); - return frame->n_vectors; -} - VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -1979,25 +1673,6 @@ VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = { }; /* *INDENT-ON* */ -VLIB_NODE_FN (nat44_ed_out2in_reass_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return nat44_ed_out2in_reass_node_fn_inline (vm, node, frame); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_out2in_reass_node) = { - .name = "nat44-ed-out2in-reass", - .vector_size = sizeof (u32), - .sibling_of = "nat-default", - .format_trace = format_nat44_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings), - .error_strings = nat_out2in_ed_error_strings, -}; -/* *INDENT-ON* */ - static u8 * format_nat_pre_trace (u8 * s, va_list * args) { diff --git a/src/plugins/nat/test/test_nat.py b/src/plugins/nat/test/test_nat.py index 0daa61042c1..46b97c05dbe 100644 --- a/src/plugins/nat/test/test_nat.py +++ b/src/plugins/nat/test/test_nat.py @@ -31,6 +31,7 @@ from scapy.all import bind_layers, Packet, ByteEnumField, ShortField, \ IPField, IntField, LongField, XByteField, FlagsField, FieldLenField, \ PacketListField from ipaddress import IPv6Network +from util import ppc, ppp # NAT HA protocol event data @@ -168,10 +169,6 @@ class MethodHolder(VppTestCase): last_ip_address=addr.ip_address, vrf_id=0xFFFFFFFF, flags=addr.flags) - self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=5, - drop_frag=0) - self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=5, - drop_frag=0, is_ip6=1) self.verify_no_nat44_user() self.vapi.nat_set_timeouts(udp=300, tcp_established=7440, tcp_transitory=240, icmp=60) @@ -762,6 +759,7 @@ class MethodHolder(VppTestCase): proto=frags[0][IP].proto) if ip.proto == IP_PROTOS.tcp: p = (ip / TCP(buffer.getvalue())) + self.logger.debug(ppp("Reassembled:", p)) self.assert_tcp_checksum_valid(p) elif ip.proto == IP_PROTOS.udp: p = (ip / UDP(buffer.getvalue()[:8]) / @@ -792,6 +790,7 @@ class MethodHolder(VppTestCase): p = (ip / TCP(buffer.getvalue())) elif ip.nh == IP_PROTOS.udp: p = (ip / UDP(buffer.getvalue())) + self.logger.debug(ppp("Reassembled:", p)) self.assert_packet_checksums_valid(p) return p @@ -1154,9 +1153,6 @@ class MethodHolder(VppTestCase): data = b"A" * 16 + b"B" * 16 + b"C" * 3 self.port_in = random.randint(1025, 65535) - reass = self.vapi.nat_reass_dump() - reass_n_start = len(reass) - # in2out pkts = self.create_stream_frag(self.pg0, self.pg1.remote_ip4, @@ -1221,11 +1217,6 @@ class MethodHolder(VppTestCase): self.assertEqual(p[layer].id, self.port_in) self.assertEqual(data, p[Raw].load) - reass = self.vapi.nat_reass_dump() - reass_n_end = len(reass) - - self.assertEqual(reass_n_end - reass_n_start, 2) - def frag_in_order_in_plus_out(self, proto=IP_PROTOS.tcp): layer = self.proto2layer(proto) @@ -1236,9 +1227,6 @@ class MethodHolder(VppTestCase): self.port_in = random.randint(1025, 65535) for i in range(2): - reass = self.vapi.nat_reass_dump() - reass_n_start = len(reass) - # out2in pkts = self.create_stream_frag(self.pg0, self.server_out_addr, @@ -1290,11 +1278,6 @@ class MethodHolder(VppTestCase): self.assertEqual(p[layer].id, self.port_in) self.assertEqual(data, p[Raw].load) - reass = self.vapi.nat_reass_dump() - reass_n_end = len(reass) - - self.assertEqual(reass_n_end - reass_n_start, 2) - def reass_hairpinning(self, proto=IP_PROTOS.tcp): layer = self.proto2layer(proto) @@ -3581,25 +3564,6 @@ class TestNAT44(MethodHolder): self.verify_no_nat44_user() - def test_set_get_reass(self): - """ NAT44 set/get virtual fragmentation reassembly """ - reas_cfg1 = self.vapi.nat_get_reass() - - self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout + 5, - max_reass=reas_cfg1.ip4_max_reass * 2, - max_frag=reas_cfg1.ip4_max_frag * 2, - drop_frag=0) - - reas_cfg2 = self.vapi.nat_get_reass() - - self.assertEqual(reas_cfg1.ip4_timeout + 5, reas_cfg2.ip4_timeout) - self.assertEqual(reas_cfg1.ip4_max_reass * 2, reas_cfg2.ip4_max_reass) - self.assertEqual(reas_cfg1.ip4_max_frag * 2, reas_cfg2.ip4_max_frag) - - self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=5, - drop_frag=1) - self.assertTrue(self.vapi.nat_get_reass().ip4_drop_frag) - def test_frag_in_order(self): """ NAT44 translate fragments arriving in order """ @@ -3612,22 +3576,10 @@ class TestNAT44(MethodHolder): sw_if_index=self.pg1.sw_if_index, is_add=1) - reas_cfg1 = self.vapi.nat_get_reass() - # this test was intermittently failing in some cases - # until we temporarily bump the reassembly timeouts - self.vapi.nat_set_reass(timeout=20, max_reass=1024, max_frag=5, - drop_frag=0) - self.frag_in_order(proto=IP_PROTOS.tcp) self.frag_in_order(proto=IP_PROTOS.udp) self.frag_in_order(proto=IP_PROTOS.icmp) - # restore the reassembly timeouts - self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout, - max_reass=reas_cfg1.ip4_max_reass, - max_frag=reas_cfg1.ip4_max_frag, - drop_frag=reas_cfg1.ip4_drop_frag) - def test_frag_forwarding(self): """ NAT44 forwarding fragment test """ self.vapi.nat44_add_del_interface_addr( @@ -3772,60 +3724,6 @@ class TestNAT44(MethodHolder): self.assertGreaterEqual(tcp.sport, 1025) self.assertLessEqual(tcp.sport, 1027) - def test_ipfix_max_frags(self): - """ IPFIX logging maximum fragments pending reassembly exceeded """ - self.nat44_add_address(self.nat_addr) - flags = self.config_flags.NAT_IS_INSIDE - self.vapi.nat44_interface_add_del_feature( - sw_if_index=self.pg0.sw_if_index, - flags=flags, is_add=1) - self.vapi.nat44_interface_add_del_feature( - sw_if_index=self.pg1.sw_if_index, - is_add=1) - self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=1, - drop_frag=0) - self.vapi.set_ipfix_exporter(collector_address=self.pg3.remote_ip4, - src_address=self.pg3.local_ip4, - path_mtu=512, - template_interval=10) - self.vapi.nat_ipfix_enable_disable(domain_id=self.ipfix_domain_id, - src_port=self.ipfix_src_port, - enable=1) - - data = b"A" * 4 + b"B" * 16 + b"C" * 3 - self.tcp_port_in = random.randint(1025, 65535) - pkts = self.create_stream_frag(self.pg0, - self.pg1.remote_ip4, - self.tcp_port_in, - 20, - data) - pkts.reverse() - self.pg0.add_stream(pkts) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - self.pg1.assert_nothing_captured() - sleep(1) - self.vapi.ipfix_flush() - capture = self.pg3.get_capture(9) - ipfix = IPFIXDecoder() - # first load template - for p in capture: - self.assertTrue(p.haslayer(IPFIX)) - self.assertEqual(p[IP].src, self.pg3.local_ip4) - self.assertEqual(p[IP].dst, self.pg3.remote_ip4) - self.assertEqual(p[UDP].sport, self.ipfix_src_port) - self.assertEqual(p[UDP].dport, 4739) - self.assertEqual(p[IPFIX].observationDomainID, - self.ipfix_domain_id) - if p.haslayer(Template): - ipfix.add_template(p.getlayer(Template)) - # verify events in data set - for p in capture: - if p.haslayer(Data): - data = ipfix.decode_data_set(p.getlayer(Set)) - self.verify_ipfix_max_fragments_ip4(data, 1, - self.pg0.remote_ip4n) - def test_multiple_outside_vrf(self): """ Multiple outside VRF """ vrf_id1 = 1 @@ -4323,7 +4221,6 @@ class TestNAT44(MethodHolder): self.logger.info(self.vapi.cli("show nat44 static mappings")) self.logger.info(self.vapi.cli("show nat44 interface address")) self.logger.info(self.vapi.cli("show nat44 sessions detail")) - self.logger.info(self.vapi.cli("show nat virtual-reassembly")) self.logger.info(self.vapi.cli("show nat44 hash tables detail")) self.logger.info(self.vapi.cli("show nat timeouts")) self.logger.info( @@ -4565,17 +4462,7 @@ class TestNAT44EndpointDependent(MethodHolder): sw_if_index=self.pg1.sw_if_index, is_add=1) self.vapi.nat44_forwarding_enable_disable(enable=True) - reas_cfg1 = self.vapi.nat_get_reass() - # this test was intermittently failing in some cases - # until we temporarily bump the reassembly timeouts - self.vapi.nat_set_reass(timeout=20, max_reass=1024, max_frag=5, - drop_frag=0) self.frag_in_order(proto=IP_PROTOS.tcp, dont_translate=True) - # restore the reassembly timeouts - self.vapi.nat_set_reass(timeout=reas_cfg1.ip4_timeout, - max_reass=reas_cfg1.ip4_max_reass, - max_frag=reas_cfg1.ip4_max_frag, - drop_frag=reas_cfg1.ip4_drop_frag) def test_frag_out_of_order(self): """ NAT44 translate fragments arriving out of order """ @@ -4643,9 +4530,6 @@ class TestNAT44EndpointDependent(MethodHolder): self.server_out_addr, proto=IP_PROTOS.icmp) - self.vapi.nat_set_reass(timeout=10, max_reass=1024, max_frag=5, - drop_frag=0) - self.frag_in_order_in_plus_out(proto=IP_PROTOS.tcp) self.frag_in_order_in_plus_out(proto=IP_PROTOS.udp) self.frag_in_order_in_plus_out(proto=IP_PROTOS.icmp) @@ -4690,9 +4574,6 @@ class TestNAT44EndpointDependent(MethodHolder): self.server_out_addr, proto=IP_PROTOS.icmp) - self.vapi.nat_set_reass(timeout=10, max_reass=1024, max_frag=5, - drop_frag=0) - self.frag_out_of_order_in_plus_out(proto=IP_PROTOS.tcp) self.frag_out_of_order_in_plus_out(proto=IP_PROTOS.udp) self.frag_out_of_order_in_plus_out(proto=IP_PROTOS.icmp) @@ -8756,9 +8637,6 @@ class TestNAT64(MethodHolder): self.vapi.nat64_add_del_interface(is_add=1, flags=0, sw_if_index=self.pg1.sw_if_index) - reass = self.vapi.nat_reass_dump() - reass_n_start = len(reass) - # in2out data = b'a' * 200 pkts = self.create_stream_frag_ip6(self.pg0, self.pg1.remote_ip4, @@ -8786,17 +8664,13 @@ class TestNAT64(MethodHolder): self.pg_enable_capture(self.pg_interfaces) self.pg_start() frags = self.pg0.get_capture(len(pkts)) + self.logger.debug(ppc("Captured:", frags)) src = self.compose_ip6(self.pg1.remote_ip4, '64:ff9b::', 96) p = self.reass_frags_and_verify_ip6(frags, src, self.pg0.remote_ip6) self.assertEqual(p[TCP].sport, 20) self.assertEqual(p[TCP].dport, self.tcp_port_in) self.assertEqual(data, p[Raw].load) - reass = self.vapi.nat_reass_dump() - reass_n_end = len(reass) - - self.assertEqual(reass_n_end - reass_n_start, 2) - def test_reass_hairpinning(self): """ NAT64 fragments hairpinning """ data = b'a' * 200 @@ -8835,6 +8709,7 @@ class TestNAT64(MethodHolder): self.pg_enable_capture(self.pg_interfaces) self.pg_start() frags = self.pg0.get_capture(len(pkts)) + self.logger.debug(ppc("Captured:", frags)) p = self.reass_frags_and_verify_ip6(frags, nat_addr_ip6, server.ip6) self.assertNotEqual(p[TCP].sport, client_in_port) self.assertEqual(p[TCP].dport, server_in_port) @@ -9007,57 +8882,6 @@ class TestNAT64(MethodHolder): data = ipfix.decode_data_set(p.getlayer(Set)) self.verify_ipfix_max_bibs(data, max_bibs) - def test_ipfix_max_frags(self): - """ IPFIX logging maximum fragments pending reassembly exceeded """ - self.vapi.nat64_add_del_pool_addr_range(start_addr=self.nat_addr, - end_addr=self.nat_addr, - vrf_id=0xFFFFFFFF, - is_add=1) - flags = self.config_flags.NAT_IS_INSIDE - self.vapi.nat64_add_del_interface(is_add=1, flags=flags, - sw_if_index=self.pg0.sw_if_index) - self.vapi.nat64_add_del_interface(is_add=1, flags=0, - sw_if_index=self.pg1.sw_if_index) - self.vapi.nat_set_reass(timeout=2, max_reass=1024, max_frag=1, - drop_frag=0, is_ip6=1) - self.vapi.set_ipfix_exporter(collector_address=self.pg3.remote_ip4, - src_address=self.pg3.local_ip4, - path_mtu=512, - template_interval=10) - self.vapi.nat_ipfix_enable_disable(domain_id=self.ipfix_domain_id, - src_port=self.ipfix_src_port, - enable=1) - - data = b'a' * 200 - pkts = self.create_stream_frag_ip6(self.pg0, self.pg1.remote_ip4, - self.tcp_port_in, 20, data) - pkts.reverse() - self.pg0.add_stream(pkts) - self.pg_enable_capture(self.pg_interfaces) - self.pg_start() - self.pg1.assert_nothing_captured() - sleep(1) - self.vapi.ipfix_flush() - capture = self.pg3.get_capture(9) - ipfix = IPFIXDecoder() - # first load template - for p in capture: - self.assertTrue(p.haslayer(IPFIX)) - self.assertEqual(p[IP].src, self.pg3.local_ip4) - self.assertEqual(p[IP].dst, self.pg3.remote_ip4) - self.assertEqual(p[UDP].sport, self.ipfix_src_port) - self.assertEqual(p[UDP].dport, 4739) - self.assertEqual(p[IPFIX].observationDomainID, - self.ipfix_domain_id) - if p.haslayer(Template): - ipfix.add_template(p.getlayer(Template)) - # verify events in data set - for p in capture: - if p.haslayer(Data): - data = ipfix.decode_data_set(p.getlayer(Set)) - self.verify_ipfix_max_fragments_ip6(data, 1, - self.pg0.remote_ip6n) - def test_ipfix_bib_ses(self): """ IPFIX logging NAT64 BIB/session create and delete events """ self.tcp_port_in = random.randint(1025, 65535) @@ -9257,7 +9081,6 @@ class TestNAT64(MethodHolder): self.logger.info(self.vapi.cli("show nat64 prefix")) self.logger.info(self.vapi.cli("show nat64 bib all")) self.logger.info(self.vapi.cli("show nat64 session table all")) - self.logger.info(self.vapi.cli("show nat virtual-reassembly")) class TestDSlite(MethodHolder): @@ -9625,6 +9448,7 @@ class TestNAT66(MethodHolder): self.pg_enable_capture(self.pg_interfaces) self.pg_start() capture = self.pg1.get_capture(len(pkts)) + for packet in capture: try: self.assertEqual(packet[IPv6].src, self.nat_addr) diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 59a8256b902..b174587efdd 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -185,9 +185,8 @@ typedef struct /* reassembly */ union { - /* group input/output/handoff to simplify the code, this way: - * we can handoff while keeping input variables intact - * and also we can write the output and still use next_index later */ + /* group input/output to simplify the code, this way + * we can handoff while keeping input variables intact */ struct { /* input variables */ @@ -201,23 +200,28 @@ typedef struct { u16 owner_thread_index; }; - /* output variables */ - struct + }; + /* output variables */ + struct + { + union { - union + /* shallow virtual reassembly output variables */ + struct { - /* shallow virtual reassembly output variables */ - struct - { - u8 ip_proto; /* protocol in ip header */ - u16 l4_src_port; /* tcp/udp/icmp src port */ - u16 l4_dst_port; /* tcp/udp/icmp dst port */ - }; - /* full reassembly output variables */ - struct - { - u16 estimated_mtu; /* estimated MTU calculated during reassembly */ - }; + u8 ip_proto; /* protocol in ip header */ + u8 icmp_type_or_tcp_flags; + u8 is_non_first_fragment; + u8 save_rewrite_length; + u16 l4_src_port; /* tcp/udp/icmp src port */ + u16 l4_dst_port; /* tcp/udp/icmp dst port */ + u32 tcp_ack_number; + u32 tcp_seq_number; + }; + /* full reassembly output variables */ + struct + { + u16 estimated_mtu; /* estimated MTU calculated during reassembly */ }; }; }; @@ -384,7 +388,10 @@ typedef struct STATIC_ASSERT (STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length) == STRUCT_SIZE_OF (vnet_buffer_opaque_t, - mpls.save_rewrite_length) + ip.reass.save_rewrite_length) + && STRUCT_SIZE_OF (vnet_buffer_opaque_t, + ip.reass.save_rewrite_length) == + STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) && STRUCT_SIZE_OF (vnet_buffer_opaque_t, mpls.save_rewrite_length) == 1 && VNET_REWRITE_TOTAL_BYTES < UINT8_MAX, diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index 79cf22c4d70..1d3607ea34c 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -196,13 +196,13 @@ typedef union #define IP4_ROUTER_ALERT_OPTION 20 -always_inline int +always_inline u16 ip4_get_fragment_offset (const ip4_header_t * i) { return clib_net_to_host_u16 (i->flags_and_fragment_offset) & 0x1fff; } -always_inline int +always_inline u16 ip4_get_fragment_more (const ip4_header_t * i) { return clib_net_to_host_u16 (i->flags_and_fragment_offset) & diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h index b1905e4154b..21538a90e44 100644 --- a/src/vnet/ip/ip4_to_ip6.h +++ b/src/vnet/ip/ip4_to_ip6.h @@ -25,8 +25,8 @@ /** * IPv4 to IPv6 set call back function type */ -typedef int (*ip4_to_ip6_set_fn_t) (ip4_header_t * ip4, ip6_header_t * ip6, - void *ctx); +typedef int (*ip4_to_ip6_set_fn_t) (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *ctx); /* *INDENT-OFF* */ static u8 icmp_to_icmp6_updater_pointer_table[] = @@ -261,7 +261,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, sizeof (*inner_frag)); ip6 = vlib_buffer_get_current (p); memmove (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, - 20 + 8); + 20 + 8); ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); icmp = (icmp46_header_t *) (ip4 + 1); @@ -287,7 +287,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, vlib_buffer_advance (p, -2 * (sizeof (*ip6) - sizeof (*ip4))); ip6 = vlib_buffer_get_current (p); memmove (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, - 20 + 8); + 20 + 8); ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); icmp = (icmp46_header_t *) u8_ptr_add (ip4, sizeof (*ip4)); @@ -340,7 +340,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, inner_ip6->hop_limit = inner_ip4->ttl; inner_ip6->protocol = inner_ip4->protocol; - if ((rv = inner_fn (inner_ip4, inner_ip6, inner_ctx)) != 0) + if ((rv = inner_fn (p, inner_ip4, inner_ip6, inner_ctx)) != 0) return rv; if (PREDICT_FALSE (inner_frag != NULL)) @@ -411,7 +411,7 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, ip6->hop_limit = ip4->ttl; ip6->protocol = IP_PROTOCOL_ICMP6; - if ((rv = fn (ip4, ip6, ctx)) != 0) + if ((rv = fn (p, ip4, ip6, ctx)) != 0) return rv; //Truncate when the packet exceeds the minimal IPv6 MTU @@ -437,217 +437,4 @@ icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, return 0; } -/** - * @brief Translate IPv4 fragmented packet to IPv6. - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip4_to_ip6_fragmented (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) -{ - ip4_header_t *ip4; - ip6_header_t *ip6; - ip6_frag_hdr_t *frag; - int rv; - - ip4 = vlib_buffer_get_current (p); - frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); - ip6 = - (ip6_header_t *) u8_ptr_add (ip4, - sizeof (*ip4) - sizeof (*frag) - - sizeof (*ip6)); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); - - //We know that the protocol was one of ICMP, TCP or UDP - //because the first fragment was found and cached - frag->next_hdr = - (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol; - frag->identification = frag_id_4to6 (ip4->fragment_id); - frag->rsv = 0; - frag->fragment_offset_and_more = - ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4), - clib_net_to_host_u16 - (ip4->flags_and_fragment_offset) & - IP4_HEADER_FLAG_MORE_FRAGMENTS); - - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); - ip6->payload_length = - clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - - sizeof (*ip4) + sizeof (*frag)); - ip6->hop_limit = ip4->ttl; - ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - - if ((rv = fn (ip4, ip6, ctx)) != 0) - return rv; - - return 0; -} - -/** - * @brief Translate IPv4 UDP/TCP packet to IPv6. - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) -{ - ip4_header_t *ip4; - ip6_header_t *ip6; - ip_csum_t csum; - u16 *checksum; - ip6_frag_hdr_t *frag; - u32 frag_id; - int rv; - ip4_address_t old_src, old_dst; - - ip4 = vlib_buffer_get_current (p); - - if (ip4->protocol == IP_PROTOCOL_UDP) - { - udp_header_t *udp = ip4_next_header (ip4); - checksum = &udp->checksum; - - //UDP checksum is optional over IPv4 but mandatory for IPv6 - //We do not check udp->length sanity but use our safe computed value instead - if (PREDICT_FALSE (!*checksum)) - { - u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4); - csum = ip_incremental_checksum (0, udp, udp_len); - csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); - csum = - ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP)); - csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address))); - *checksum = ~ip_csum_fold (csum); - } - } - else - { - tcp_header_t *tcp = ip4_next_header (ip4); - checksum = &tcp->checksum; - } - - old_src.as_u32 = ip4->src_address.as_u32; - old_dst.as_u32 = ip4->dst_address.as_u32; - - // Deal with fragmented packets - if (PREDICT_FALSE (ip4->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))) - { - ip6 = - (ip6_header_t *) u8_ptr_add (ip4, - sizeof (*ip4) - sizeof (*ip6) - - sizeof (*frag)); - frag = - (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); - frag_id = frag_id_4to6 (ip4->fragment_id); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); - } - else - { - ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); - frag = NULL; - } - - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); - ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); - ip6->hop_limit = ip4->ttl; - ip6->protocol = ip4->protocol; - - if (PREDICT_FALSE (frag != NULL)) - { - frag->next_hdr = ip6->protocol; - frag->identification = frag_id; - frag->rsv = 0; - frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1); - ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); - } - - if ((rv = fn (ip4, ip6, ctx)) != 0) - return rv; - - csum = ip_csum_sub_even (*checksum, old_src.as_u32); - csum = ip_csum_sub_even (csum, old_dst.as_u32); - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); - *checksum = ip_csum_fold (csum); - - return 0; -} - -/** - * @brief Translate IPv4 packet to IPv6 (IP header only). - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip4_to_ip6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) -{ - ip4_header_t *ip4; - ip6_header_t *ip6; - ip6_frag_hdr_t *frag; - u32 frag_id; - int rv; - - ip4 = vlib_buffer_get_current (p); - - // Deal with fragmented packets - if (PREDICT_FALSE (ip4->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))) - { - ip6 = - (ip6_header_t *) u8_ptr_add (ip4, - sizeof (*ip4) - sizeof (*ip6) - - sizeof (*frag)); - frag = - (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); - frag_id = frag_id_4to6 (ip4->fragment_id); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); - } - else - { - ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); - frag = NULL; - } - - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); - ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); - ip6->hop_limit = ip4->ttl; - ip6->protocol = ip4->protocol; - - if (PREDICT_FALSE (frag != NULL)) - { - frag->next_hdr = ip6->protocol; - frag->identification = frag_id; - frag->rsv = 0; - frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1); - ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); - } - - if ((rv = fn (ip4, ip6, ctx)) != 0) - return rv; - - return 0; -} - #endif /* __included_ip4_to_ip6_h__ */ diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 2ba55b75e09..e823214dac9 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -579,6 +579,67 @@ ip6_ext_header_find (vlib_main_t * vm, vlib_buffer_t * b, return result; } +/* + * walk extension headers, looking for a specific extension header and last + * extension header, calculating length of all extension headers + * + * @param vm + * @param b buffer to limit search to + * @param ip6_header ipv6 header + * @param find_hdr extension header to look for (ignored if ext_hdr is NULL) + * @param length[out] length of all extension headers + * @param ext_hdr[out] extension header of type find_hdr (may be NULL) + * @param last_ext_hdr[out] last extension header (may be NULL) + * + * @return 0 on success, -1 on failure (ext headers crossing buffer boundary) + */ +always_inline int +ip6_walk_ext_hdr (vlib_main_t * vm, vlib_buffer_t * b, + const ip6_header_t * ip6_header, u8 find_hdr, u32 * length, + ip6_ext_header_t ** ext_hdr, + ip6_ext_header_t ** last_ext_hdr) +{ + if (!ip6_ext_hdr (ip6_header->protocol)) + { + *length = 0; + *ext_hdr = NULL; + *last_ext_hdr = NULL; + return 0; + } + *length = 0; + ip6_ext_header_t *h = (void *) (ip6_header + 1); + if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h))) + { + return -1; + } + *length += ip6_ext_header_len (h); + *last_ext_hdr = h; + *ext_hdr = NULL; + if (ip6_header->protocol == find_hdr) + { + *ext_hdr = h; + } + while (ip6_ext_hdr (h->next_hdr)) + { + if (h->next_hdr == find_hdr) + { + h = ip6_ext_next_header (h); + *ext_hdr = h; + } + else + { + h = ip6_ext_next_header (h); + } + if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h))) + { + return -1; + } + *length += ip6_ext_header_len (h); + *last_ext_hdr = h; + } + return 0; +} + /* *INDENT-OFF* */ typedef CLIB_PACKED (struct { u8 next_hdr; diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h index d13a0c1f104..17a11e6df0a 100644 --- a/src/vnet/ip/ip6_to_ip4.h +++ b/src/vnet/ip/ip6_to_ip4.h @@ -24,8 +24,12 @@ /** * IPv6 to IPv4 set call back function type */ -typedef int (*ip6_to_ip4_set_fn_t) (ip6_header_t * ip6, ip4_header_t * ip4, - void *ctx); +typedef int (*ip6_to_ip4_icmp_set_fn_t) (ip6_header_t * ip6, + ip4_header_t * ip4, void *ctx); + +typedef int (*ip6_to_ip4_tcp_udp_set_fn_t) (vlib_buffer_t * b, + ip6_header_t * ip6, + ip4_header_t * ip4, void *ctx); /* *INDENT-OFF* */ static u8 icmp6_to_icmp_updater_pointer_table[] = @@ -47,6 +51,8 @@ static u8 icmp6_to_icmp_updater_pointer_table[] = /** * @brief Parse some useful information from IPv6 header. * + * @param vm vlib main + * @param b vlib buffer * @param ip6 IPv6 header. * @param buff_len Buffer length. * @param l4_protocol L4 protocol number. @@ -56,21 +62,37 @@ static u8 icmp6_to_icmp_updater_pointer_table[] = * @returns 0 on success, non-zero value otherwise. */ static_always_inline int -ip6_parse (const ip6_header_t * ip6, u32 buff_len, - u8 * l4_protocol, u16 * l4_offset, u16 * frag_hdr_offset) +ip6_parse (vlib_main_t * vm, vlib_buffer_t * b, const ip6_header_t * ip6, + u32 buff_len, u8 * l4_protocol, u16 * l4_offset, + u16 * frag_hdr_offset) { - if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + ip6_ext_header_t *last_hdr, *frag_hdr; + u32 length; + if (ip6_walk_ext_hdr + (vm, b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &length, &frag_hdr, + &last_hdr)) { - *l4_protocol = ((ip6_frag_hdr_t *) (ip6 + 1))->next_hdr; - *frag_hdr_offset = sizeof (*ip6); - *l4_offset = sizeof (*ip6) + sizeof (ip6_frag_hdr_t); + return -1; + } + + if (length > 0) + { + if (frag_hdr) + { + *frag_hdr_offset = (u8 *) frag_hdr - (u8 *) ip6; + } + else + { + *frag_hdr_offset = 0; + } + *l4_protocol = last_hdr->next_hdr; } else { - *l4_protocol = ip6->protocol; *frag_hdr_offset = 0; - *l4_offset = sizeof (*ip6); + *l4_protocol = ip6->protocol; } + *l4_offset = sizeof (*ip6) + length; return (buff_len < (*l4_offset + 4)) || (clib_net_to_host_u16 (ip6->payload_length) < @@ -78,23 +100,32 @@ ip6_parse (const ip6_header_t * ip6, u32 buff_len, } /** - * @brief Get TCP/UDP port number or ICMP id from IPv6 packet. + * @brief Get L4 information like port number or ICMP id from IPv6 packet. * * @param ip6 IPv6 header. - * @param sender 1 get sender port, 0 get receiver port. * @param buffer_len Buffer length. + * @param ip_protocol L4 protocol + * @param src_port L4 src port or icmp id + * @param dst_post L4 dst port or icmp id + * @param icmp_type_or_tcp_flags ICMP type or TCP flags, if applicable + * @param tcp_ack_number TCP ack number, if applicable + * @param tcp_seq_number TCP seq number, if applicable * - * @returns Port number on success, 0 otherwise. + * @returns 1 on success, 0 otherwise. */ always_inline u16 -ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len) +ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6, + u16 buffer_len, u8 * ip_protocol, u16 * src_port, + u16 * dst_port, u8 * icmp_type_or_tcp_flags, + u32 * tcp_ack_number, u32 * tcp_seq_number) { u8 l4_protocol; u16 l4_offset; u16 frag_offset; u8 *l4; - if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) + if (ip6_parse + (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) return 0; if (frag_offset && @@ -102,26 +133,45 @@ ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len) u8_ptr_add (ip6, frag_offset)))) return 0; //Can't deal with non-first fragment for now + if (ip_protocol) + { + *ip_protocol = l4_protocol; + } l4 = u8_ptr_add (ip6, l4_offset); if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP) { - return (sender) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t - *) - (l4))->dst_port; + if (src_port) + *src_port = ((udp_header_t *) (l4))->src_port; + if (dst_port) + *dst_port = ((udp_header_t *) (l4))->dst_port; + if (icmp_type_or_tcp_flags && l4_protocol == IP_PROTOCOL_TCP) + *icmp_type_or_tcp_flags = ((tcp_header_t *) (l4))->flags; + if (tcp_ack_number && l4_protocol == IP_PROTOCOL_TCP) + *tcp_ack_number = ((tcp_header_t *) (l4))->ack_number; + if (tcp_seq_number && l4_protocol == IP_PROTOCOL_TCP) + *tcp_seq_number = ((tcp_header_t *) (l4))->seq_number; } else if (l4_protocol == IP_PROTOCOL_ICMP6) { icmp46_header_t *icmp = (icmp46_header_t *) (l4); + if (icmp_type_or_tcp_flags) + *icmp_type_or_tcp_flags = ((icmp46_header_t *) (l4))->type; if (icmp->type == ICMP6_echo_request) { - return (sender) ? ((u16 *) (icmp))[2] : -1; + if (src_port) + *src_port = ((u16 *) (icmp))[2]; + if (dst_port) + *dst_port = ((u16 *) (icmp))[2]; } else if (icmp->type == ICMP6_echo_reply) { - return (sender) ? -1 : ((u16 *) (icmp))[2]; + if (src_port) + *src_port = ((u16 *) (icmp))[2]; + if (dst_port) + *dst_port = ((u16 *) (icmp))[2]; } } - return 0; + return 1; } /** @@ -223,14 +273,14 @@ icmp6_to_icmp_header (icmp46_header_t * icmp, ip6_header_t ** inner_ip6) /** * @brief Translate TOS value from IPv6 to IPv4. * - * @param ip6 IPv6 header. + * @param ip_version_traffic_class_and_flow_label in network byte order * * @returns IPv4 TOS value. */ static_always_inline u8 -ip6_translate_tos (const ip6_header_t * ip6) +ip6_translate_tos (u32 ip_version_traffic_class_and_flow_label) { - return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label) + return (clib_net_to_host_u32 (ip_version_traffic_class_and_flow_label) & 0x0ff00000) >> 20; } @@ -246,8 +296,9 @@ ip6_translate_tos (const ip6_header_t * ip6) * @returns 0 on success, non-zero value otherwise. */ always_inline int -icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, - ip6_to_ip4_set_fn_t inner_fn, void *inner_ctx) +icmp6_to_icmp (vlib_main_t * vm, vlib_buffer_t * p, + ip6_to_ip4_icmp_set_fn_t fn, void *ctx, + ip6_to_ip4_icmp_set_fn_t inner_fn, void *inner_ctx) { ip6_header_t *ip6, *inner_ip6; ip4_header_t *ip4, *inner_ip4; @@ -285,7 +336,7 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, // TO // [ IPv4][IC][ IPv4][L4 header ... - if (ip6_parse (inner_ip6, ip6_pay_len - 8, + if (ip6_parse (vm, p, inner_ip6, ip6_pay_len - 8, &inner_protocol, &inner_l4_offset, &inner_frag_offset)) return -1; @@ -336,7 +387,9 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, inner_ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - inner_ip4->tos = ip6_translate_tos (inner_ip6); + inner_ip4->tos = + ip6_translate_tos + (inner_ip6->ip_version_traffic_class_and_flow_label); inner_ip4->length = u16_net_add (inner_ip6->payload_length, sizeof (*ip4) + sizeof (*ip6) - inner_l4_offset); @@ -389,7 +442,7 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); + ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label); ip4->fragment_id = 0; ip4->flags_and_fragment_offset = 0; ip4->ttl = ip6->hop_limit; @@ -412,220 +465,6 @@ icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, return 0; } -/** - * @brief Translate IPv6 fragmented packet to IPv4. - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip6_to_ip4_fragmented (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx) -{ - ip6_header_t *ip6; - ip6_frag_hdr_t *frag; - ip4_header_t *ip4; - u16 frag_id; - u8 frag_more; - u16 frag_offset; - u8 l4_protocol; - u16 l4_offset; - int rv; - - ip6 = vlib_buffer_get_current (p); - - if (ip6_parse - (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) - return -1; - - frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); - ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); - vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); - - frag_id = frag_id_6to4 (frag->identification); - frag_more = ip6_frag_hdr_more (frag); - frag_offset = ip6_frag_hdr_offset (frag); - - if ((rv = fn (ip6, ip4, ctx)) != 0) - return rv; - - ip4->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); - ip4->length = u16_net_add (ip6->payload_length, - sizeof (*ip4) - l4_offset + sizeof (*ip6)); - ip4->fragment_id = frag_id; - ip4->flags_and_fragment_offset = - clib_host_to_net_u16 (frag_offset | - (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); - ip4->ttl = ip6->hop_limit; - ip4->protocol = - (l4_protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : l4_protocol; - ip4->checksum = ip4_header_checksum (ip4); - - return 0; -} - -/** - * @brief Translate IPv6 UDP/TCP packet to IPv4. - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip6_to_ip4_tcp_udp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, - u8 udp_checksum) -{ - ip6_header_t *ip6; - u16 *checksum; - ip_csum_t csum = 0; - ip4_header_t *ip4; - u16 fragment_id; - u16 flags; - u16 frag_offset; - u8 l4_protocol; - u16 l4_offset; - int rv; - ip6_address_t old_src, old_dst; - - ip6 = vlib_buffer_get_current (p); - - if (ip6_parse - (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) - return -1; - - if (l4_protocol == IP_PROTOCOL_TCP) - { - tcp_header_t *tcp = ip6_next_header (ip6); - checksum = &tcp->checksum; - } - else - { - udp_header_t *udp = ip6_next_header (ip6); - checksum = &udp->checksum; - } - - old_src.as_u64[0] = ip6->src_address.as_u64[0]; - old_src.as_u64[1] = ip6->src_address.as_u64[1]; - old_dst.as_u64[0] = ip6->dst_address.as_u64[0]; - old_dst.as_u64[1] = ip6->dst_address.as_u64[1]; - - ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); - - vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); - - if (PREDICT_FALSE (frag_offset)) - { - //Only the first fragment - ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); - fragment_id = frag_id_6to4 (hdr->identification); - flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); - } - else - { - fragment_id = 0; - flags = 0; - } - - if ((rv = fn (ip6, ip4, ctx)) != 0) - return rv; - - ip4->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); - ip4->length = u16_net_add (ip6->payload_length, - sizeof (*ip4) + sizeof (*ip6) - l4_offset); - ip4->fragment_id = fragment_id; - ip4->flags_and_fragment_offset = flags; - ip4->ttl = ip6->hop_limit; - ip4->protocol = l4_protocol; - ip4->checksum = ip4_header_checksum (ip4); - - //UDP checksum is optional over IPv4 - if (!udp_checksum && l4_protocol == IP_PROTOCOL_UDP) - { - *checksum = 0; - } - else - { - csum = ip_csum_sub_even (*checksum, old_src.as_u64[0]); - csum = ip_csum_sub_even (csum, old_src.as_u64[1]); - csum = ip_csum_sub_even (csum, old_dst.as_u64[0]); - csum = ip_csum_sub_even (csum, old_dst.as_u64[1]); - csum = ip_csum_add_even (csum, ip4->dst_address.as_u32); - csum = ip_csum_add_even (csum, ip4->src_address.as_u32); - *checksum = ip_csum_fold (csum); - } - - return 0; -} - -/** - * @brief Translate IPv6 packet to IPv4 (IP header only). - * - * @param p Buffer to translate. - * @param fn The function to translate header. - * @param ctx A context passed in the header translate function. - * - * @returns 0 on success, non-zero value otherwise. - */ -always_inline int -ip6_to_ip4 (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx) -{ - ip6_header_t *ip6; - ip4_header_t *ip4; - u16 fragment_id; - u16 flags; - u16 frag_offset; - u8 l4_protocol; - u16 l4_offset; - int rv; - - ip6 = vlib_buffer_get_current (p); - - if (ip6_parse - (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) - return -1; - - ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); - - vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); - - if (PREDICT_FALSE (frag_offset)) - { - //Only the first fragment - ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); - fragment_id = frag_id_6to4 (hdr->identification); - flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); - } - else - { - fragment_id = 0; - flags = 0; - } - - if ((rv = fn (ip6, ip4, ctx)) != 0) - return rv; - - ip4->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); - ip4->length = u16_net_add (ip6->payload_length, - sizeof (*ip4) + sizeof (*ip6) - l4_offset); - ip4->fragment_id = fragment_id; - ip4->flags_and_fragment_offset = flags; - ip4->ttl = ip6->hop_limit; - ip4->protocol = l4_protocol; - ip4->checksum = ip4_header_checksum (ip4); - - return 0; -} - #endif /* __included_ip6_to_ip4_h__ */ /* diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c index d7130629219..b94e9b28cea 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.c +++ b/src/vnet/ip/reass/ip4_sv_reass.c @@ -98,6 +98,9 @@ typedef struct bool is_complete; // ip protocol u8 ip_proto; + u8 icmp_type_or_tcp_flags; + u32 tcp_ack_number; + u32 tcp_seq_number; // l4 src port u16 l4_src_port; // l4 dst port @@ -151,6 +154,9 @@ typedef struct // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; + // reference count for enabling/disabling feature - per interface + u32 *output_feature_use_refcount_per_intf; + } ip4_sv_reass_main_t; extern ip4_sv_reass_main_t ip4_sv_reass_main; @@ -172,6 +178,7 @@ typedef enum REASS_FRAGMENT_CACHE, REASS_FINISH, REASS_FRAGMENT_FORWARD, + REASS_PASSTHROUGH, } ip4_sv_reass_trace_operation_e; typedef struct @@ -193,7 +200,10 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *); - s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); + if (REASS_PASSTHROUGH != t->action) + { + s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); + } switch (t->action) { case REASS_FRAGMENT_CACHE: @@ -211,6 +221,9 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args) t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), clib_net_to_host_u16 (t->l4_dst_port)); break; + case REASS_PASSTHROUGH: + s = format (s, "[not-fragmented]"); + break; } return s; } @@ -223,13 +236,16 @@ ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, { vlib_buffer_t *b = vlib_get_buffer (vm, bi); ip4_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0])); - t->reass_id = reass->id; + if (reass) + { + t->reass_id = reass->id; + t->op_id = reass->trace_op_counter; + ++reass->trace_op_counter; + } t->action = action; - t->op_id = reass->trace_op_counter; t->ip_proto = ip_proto; t->l4_src_port = l4_src_port; t->l4_dst_port = l4_dst_port; - ++reass->trace_op_counter; #if 0 static u8 *s = NULL; s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t); @@ -358,19 +374,29 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm, always_inline ip4_sv_reass_rc_t ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, ip4_sv_reass_main_t * rm, ip4_sv_reass_per_thread_t * rt, - ip4_sv_reass_t * reass, u32 bi0) + ip4_header_t * ip0, ip4_sv_reass_t * reass, u32 bi0) { - vlib_buffer_t *fb = vlib_get_buffer (vm, bi0); + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK; - ip4_header_t *fip = vlib_buffer_get_current (fb); - const u32 fragment_first = ip4_get_fragment_offset_bytes (fip); + const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); if (0 == fragment_first) { - reass->ip_proto = fip->protocol; - reass->l4_src_port = ip4_get_port (fip, 1); - reass->l4_dst_port = ip4_get_port (fip, 0); + reass->ip_proto = ip0->protocol; + reass->l4_src_port = ip4_get_port (ip0, 1); + reass->l4_dst_port = ip4_get_port (ip0, 0); if (!reass->l4_src_port || !reass->l4_dst_port) return IP4_SV_REASS_RC_UNSUPP_IP_PROTO; + if (IP_PROTOCOL_TCP == reass->ip_proto) + { + reass->icmp_type_or_tcp_flags = ((tcp_header_t *) (ip0 + 1))->flags; + reass->tcp_ack_number = ((tcp_header_t *) (ip0 + 1))->ack_number; + reass->tcp_seq_number = ((tcp_header_t *) (ip0 + 1))->seq_number; + } + else if (IP_PROTOCOL_ICMP == reass->ip_proto) + { + reass->icmp_type_or_tcp_flags = + ((icmp46_header_t *) (ip0 + 1))->type; + } reass->is_complete = true; vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -383,7 +409,7 @@ ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, vec_add1 (reass->cached_buffers, bi0); if (!reass->is_complete) { - if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { ip4_sv_reass_add_trace (vm, node, rm, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0); @@ -398,7 +424,8 @@ ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, always_inline uword ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature, bool is_custom) + vlib_frame_t * frame, bool is_feature, + bool is_output_feature, bool is_custom) { u32 *from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, *to_next, next_index; @@ -423,14 +450,52 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); - ip4_header_t *ip0 = vlib_buffer_get_current (b0); + ip4_header_t *ip0 = + (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0), + is_output_feature * + vnet_buffer (b0)-> + ip.save_rewrite_length); if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0)) { // this is a regular packet - no fragmentation + if (is_custom) + { + next0 = vnet_buffer (b0)->ip.reass.next_index; + } + else + { + next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; + } + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; + if (IP_PROTOCOL_TCP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((tcp_header_t *) (ip0 + 1))->flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + ((tcp_header_t *) (ip0 + 1))->ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + ((tcp_header_t *) (ip0 + 1))->seq_number; + } + else if (IP_PROTOCOL_ICMP == ip0->protocol) + { + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + ((icmp46_header_t *) (ip0 + 1))->type; + } vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1); vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0); - next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip4_sv_reass_add_trace (vm, node, rm, NULL, bi0, + REASS_PASSTHROUGH, + vnet_buffer (b0)->ip.reass.ip_proto, + vnet_buffer (b0)->ip. + reass.l4_src_port, + vnet_buffer (b0)->ip. + reass.l4_dst_port); + } goto packet_enqueue; } const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0); @@ -474,10 +539,27 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (reass->is_complete) { + if (is_custom) + { + next0 = vnet_buffer (b0)->ip.reass.next_index; + } + else + { + next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; + } + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + ! !fragment_first; vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; - next0 = IP4_SV_REASSEMBLY_NEXT_INPUT; error0 = IP4_ERROR_NONE; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -491,7 +573,7 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } ip4_sv_reass_rc_t rc = - ip4_sv_reass_update (vm, node, rm, rt, reass, bi0); + ip4_sv_reass_update (vm, node, rm, rt, ip0, reass, bi0); switch (rc) { case IP4_SV_REASS_RC_OK: @@ -538,7 +620,18 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, to_next[0] = bi0; to_next += 1; n_left_to_next -= 1; + ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14)); + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + ! !ip4_get_fragment_offset (vlib_buffer_get_current (b0)); vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -568,10 +661,6 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); } - if (is_custom) - { - next0 = vnet_buffer (b0)->ip.reass.next_index; - } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); @@ -599,6 +688,7 @@ VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ , + false /* is_output_feature */ , false /* is_custom */ ); } @@ -625,6 +715,7 @@ VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ , + false /* is_output_feature */ , false /* is_custom */ ); } @@ -654,6 +745,42 @@ VNET_FEATURE_INIT (ip4_sv_reass_feature) = { }; /* *INDENT-ON* */ +VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ , + true /* is_output_feature */ , + false /* is_custom */ ); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = { + .name = "ip4-sv-reassembly-output-feature", + .vector_size = sizeof (u32), + .format_trace = format_ip4_sv_reass_trace, + .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings), + .error_strings = ip4_sv_reass_error_strings, + .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT, + .next_nodes = + { + [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input", + [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop", + [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = { + .arc_name = "ip4-output", + .node_name = "ip4-sv-reassembly-output-feature", + .runs_before = 0, + .runs_after = 0, +}; +/* *INDENT-ON* */ + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = { .name = "ip4-sv-reassembly-custom-next", @@ -677,6 +804,7 @@ VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ , + false /* is_output_feature */ , true /* is_custom */ ); } @@ -824,6 +952,7 @@ ip4_sv_reass_init_function (vlib_main_t * vm) vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0); rm->feature_use_refcount_per_intf = NULL; + rm->output_feature_use_refcount_per_intf = NULL; return error; } @@ -1010,9 +1139,8 @@ VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = { vnet_api_error_t ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable) { - return vnet_feature_enable_disable ("ip4-unicast", - "ip4-sv-reassembly-feature", - sw_if_index, enable_disable, 0, 0); + return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, + enable_disable); } #endif /* CLIB_MARCH_VARIANT */ @@ -1177,7 +1305,8 @@ ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable) } else { - --rm->feature_use_refcount_per_intf[sw_if_index]; + if (rm->feature_use_refcount_per_intf[sw_if_index]) + --rm->feature_use_refcount_per_intf[sw_if_index]; if (!rm->feature_use_refcount_per_intf[sw_if_index]) return vnet_feature_enable_disable ("ip4-unicast", "ip4-sv-reassembly-feature", @@ -1192,6 +1321,35 @@ ip4_sv_reass_custom_register_next_node (uword node_index) return vlib_node_add_next (vlib_get_main (), ip4_sv_reass_custom_node.index, node_index); } + +int +ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, + int is_enable) +{ + ip4_sv_reass_main_t *rm = &ip4_sv_reass_main; + vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index); + if (is_enable) + { + if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) + { + ++rm->output_feature_use_refcount_per_intf[sw_if_index]; + return vnet_feature_enable_disable ("ip4-output", + "ip4-sv-reassembly-output-feature", + sw_if_index, 1, 0, 0); + } + ++rm->output_feature_use_refcount_per_intf[sw_if_index]; + } + else + { + if (rm->output_feature_use_refcount_per_intf[sw_if_index]) + --rm->output_feature_use_refcount_per_intf[sw_if_index]; + if (!rm->output_feature_use_refcount_per_intf[sw_if_index]) + return vnet_feature_enable_disable ("ip4-output", + "ip4-sv-reassembly-output-feature", + sw_if_index, 0, 0, 0); + } + return 0; +} #endif /* diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h index cf9f36502a0..e926dbeebcc 100644 --- a/src/vnet/ip/reass/ip4_sv_reass.h +++ b/src/vnet/ip/reass/ip4_sv_reass.h @@ -45,6 +45,8 @@ vnet_api_error_t ip4_sv_reass_enable_disable (u32 sw_if_index, int ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable); +int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, + int is_enable); uword ip4_sv_reass_custom_register_next_node (uword node_index); diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c index 0837f0606ec..442617703a1 100644 --- a/src/vnet/ip/reass/ip6_sv_reass.c +++ b/src/vnet/ip/reass/ip6_sv_reass.c @@ -96,6 +96,9 @@ typedef struct bool is_complete; // ip protocol u8 ip_proto; + u8 icmp_type_or_tcp_flags; + u32 tcp_ack_number; + u32 tcp_seq_number; // l4 src port u16 l4_src_port; // l4 dst port @@ -170,6 +173,7 @@ typedef enum REASS_FRAGMENT_CACHE, REASS_FINISH, REASS_FRAGMENT_FORWARD, + REASS_PASSTHROUGH, } ip6_sv_reass_trace_operation_e; typedef struct @@ -188,7 +192,10 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_sv_reass_trace_t *t = va_arg (*args, ip6_sv_reass_trace_t *); - s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); + if (REASS_PASSTHROUGH != t->action) + { + s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); + } switch (t->action) { case REASS_FRAGMENT_CACHE: @@ -206,6 +213,9 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args) t->ip_proto, clib_net_to_host_u16 (t->l4_src_port), clib_net_to_host_u16 (t->l4_dst_port)); break; + case REASS_PASSTHROUGH: + s = format (s, "[not-fragmented]"); + break; } return s; } @@ -219,13 +229,16 @@ ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, { vlib_buffer_t *b = vlib_get_buffer (vm, bi); ip6_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0])); - t->reass_id = reass->id; + if (reass) + { + t->reass_id = reass->id; + t->op_id = reass->trace_op_counter; + ++reass->trace_op_counter; + } t->action = action; - t->op_id = reass->trace_op_counter; t->ip_proto = ip_proto; t->l4_src_port = l4_src_port; t->l4_dst_port = l4_dst_port; - ++reass->trace_op_counter; #if 0 static u8 *s = NULL; s = format (s, "%U", format_ip6_sv_reass_trace, NULL, NULL, t); @@ -391,18 +404,13 @@ ip6_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, fvnb->ip.reass.next_range_bi = ~0; if (0 == fragment_first) { - ip6_ext_header_t *ext_hdr = (void *) frag_hdr; - while (ip6_ext_hdr (ext_hdr->next_hdr) - && vlib_object_within_buffer_data (vm, fb, ext_hdr, - ext_hdr->n_data_u64s * 8)) - { - ext_hdr = ip6_ext_next_header (ext_hdr); - } - reass->ip_proto = ext_hdr->next_hdr; - reass->l4_src_port = ip6_get_port (fip, 1, fb->current_length); - reass->l4_dst_port = ip6_get_port (fip, 0, fb->current_length); - if (!reass->l4_src_port || !reass->l4_dst_port) + if (!ip6_get_port + (vm, fb, fip, fb->current_length, &reass->ip_proto, + &reass->l4_src_port, &reass->l4_dst_port, + &reass->icmp_type_or_tcp_flags, &reass->tcp_ack_number, + &reass->tcp_seq_number)) return IP6_SV_REASS_RC_UNSUPP_IP_PROTO; + reass->is_complete = true; vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -538,12 +546,34 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, if (!frag_hdr) { // this is a regular packet - no fragmentation - vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol; - vnet_buffer (b0)->ip.reass.l4_src_port = - ip6_get_port (ip0, 1, b0->current_length); - vnet_buffer (b0)->ip.reass.l4_dst_port = - ip6_get_port (ip0, 0, b0->current_length); + if (!ip6_get_port + (vm, b0, ip0, b0->current_length, + &(vnet_buffer (b0)->ip.reass.ip_proto), + &(vnet_buffer (b0)->ip.reass.l4_src_port), + &(vnet_buffer (b0)->ip.reass.l4_dst_port), + &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags), + &(vnet_buffer (b0)->ip.reass.tcp_ack_number), + &(vnet_buffer (b0)->ip.reass.tcp_seq_number))) + { + error0 = IP6_ERROR_REASS_UNSUPP_IP_PROTO; + next0 = IP6_SV_REASSEMBLY_NEXT_DROP; + goto packet_enqueue; + } + ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14)); + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0; next0 = IP6_SV_REASSEMBLY_NEXT_INPUT; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip6_sv_reass_add_trace (vm, node, rm, NULL, bi0, + REASS_PASSTHROUGH, + vnet_buffer (b0)->ip.reass.ip_proto, + vnet_buffer (b0)->ip. + reass.l4_src_port, + vnet_buffer (b0)->ip. + reass.l4_dst_port); + } goto packet_enqueue; } vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset = @@ -601,7 +631,18 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, if (reass->is_complete) { + ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14)); + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + ! !ip6_frag_hdr_offset (frag_hdr); vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; next0 = IP6_SV_REASSEMBLY_NEXT_INPUT; @@ -668,7 +709,21 @@ ip6_sv_reassembly_inline (vlib_main_t * vm, { vnet_feature_next (&next0, b0); } + frag_hdr = + vlib_buffer_get_current (b0) + + vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset; + ASSERT (vnet_buffer (b0)->ip.save_rewrite_length < (2 << 14)); + vnet_buffer (b0)->ip.reass.save_rewrite_length = + vnet_buffer (b0)->ip.save_rewrite_length; + vnet_buffer (b0)->ip.reass.is_non_first_fragment = + ! !ip6_frag_hdr_offset (frag_hdr); vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto; + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags = + reass->icmp_type_or_tcp_flags; + vnet_buffer (b0)->ip.reass.tcp_ack_number = + reass->tcp_ack_number; + vnet_buffer (b0)->ip.reass.tcp_seq_number = + reass->tcp_seq_number; vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port; vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -1124,9 +1179,8 @@ VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = { vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable) { - return vnet_feature_enable_disable ("ip6-unicast", - "ip6-sv-reassembly-feature", - sw_if_index, enable_disable, 0, 0); + return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, + enable_disable); } #endif /* CLIB_MARCH_VARIANT */ diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.h b/src/vnet/vxlan-gbp/vxlan_gbp.h index d17f75799d6..c422d54af4c 100644 --- a/src/vnet/vxlan-gbp/vxlan_gbp.h +++ b/src/vnet/vxlan-gbp/vxlan_gbp.h @@ -127,6 +127,7 @@ typedef struct u32 dev_instance; /* Real device instance in tunnel vector */ u32 user_instance; /* Instance name being shown to user */ + VNET_DECLARE_REWRITE; } vxlan_gbp_tunnel_t; -- 2.16.6