X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fnat%2Fnat64_in2out.c;h=8d4b1a89cad26cd1c2c47e271fc582f556a51677;hb=f126e746f;hp=718c69d7bdf2e7672434afc7fe94423c7a4b1d58;hpb=8fed4240be68b2b4b4b6c531233044f3f1ac70c4;p=vpp.git diff --git a/src/plugins/nat/nat64_in2out.c b/src/plugins/nat/nat64_in2out.c index 718c69d7bdf..8d4b1a89cad 100644 --- a/src/plugins/nat/nat64_in2out.c +++ b/src/plugins/nat/nat64_in2out.c @@ -18,7 +18,6 @@ */ #include -#include #include #include #include @@ -47,42 +46,19 @@ format_nat64_in2out_trace (u8 * s, va_list * args) return s; } -typedef struct -{ - u32 sw_if_index; - u32 next_index; - u8 cached; -} nat64_in2out_reass_trace_t; - -static u8 * -format_nat64_in2out_reass_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - nat64_in2out_reass_trace_t *t = - va_arg (*args, nat64_in2out_reass_trace_t *); - - s = - format (s, "NAT64-in2out-reass: sw_if_index %d, next index %d, status %s", - t->sw_if_index, t->next_index, - t->cached ? "cached" : "translated"); - - return s; -} - -vlib_node_registration_t nat64_in2out_node; -vlib_node_registration_t nat64_in2out_slowpath_node; -vlib_node_registration_t nat64_in2out_reass_node; -vlib_node_registration_t nat64_in2out_handoff_node; - #define foreach_nat64_in2out_error \ _(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ _(IN2OUT_PACKETS, "good in2out packets processed") \ _(NO_TRANSLATION, "no translation") \ _(UNKNOWN, "unknown") \ -_(DROP_FRAGMENT, "Drop fragment") \ -_(MAX_REASS, "Maximum reassemblies exceeded") \ -_(MAX_FRAG, "Maximum fragments per reassembly exceeded") +_(DROP_FRAGMENT, "drop fragment") \ +_(TCP_PACKETS, "TCP packets") \ +_(UDP_PACKETS, "UDP packets") \ +_(ICMP_PACKETS, "ICMP packets") \ +_(OTHER_PACKETS, "other protocol packets") \ +_(FRAGMENTS, "fragments") \ +_(CACHED_FRAGMENTS, "cached fragments") \ +_(PROCESSED_FRAGMENTS, "processed fragments") typedef enum @@ -105,7 +81,6 @@ typedef enum NAT64_IN2OUT_NEXT_IP6_LOOKUP, NAT64_IN2OUT_NEXT_DROP, NAT64_IN2OUT_NEXT_SLOWPATH, - NAT64_IN2OUT_NEXT_REASS, NAT64_IN2OUT_N_NEXT, } nat64_in2out_next_t; @@ -162,32 +137,75 @@ is_hairpinning (ip6_address_t * dst_addr) } static int -nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, - void *arg) +nat64_in2out_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p, u16 l4_offset, + u16 frag_hdr_offset, nat64_in2out_set_ctx_t * ctx) { + ip6_header_t *ip6; + ip_csum_t csum = 0; + ip4_header_t *ip4; + u16 fragment_id; + u8 frag_more; + u16 frag_offset; nat64_main_t *nm = &nat64_main; - nat64_in2out_set_ctx_t *ctx = arg; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; - ip46_address_t saddr, daddr; + ip46_address_t old_saddr, old_daddr; + ip4_address_t new_daddr; u32 sw_if_index, fib_index; - udp_header_t *udp = ip6_next_header (ip6); - u8 proto = ip6->protocol; - u16 sport = udp->src_port; - u16 dport = udp->dst_port; + u8 proto = vnet_buffer (p)->ip.reass.ip_proto; + u16 sport = vnet_buffer (p)->ip.reass.l4_src_port; + u16 dport = vnet_buffer (p)->ip.reass.l4_dst_port; nat64_db_t *db = &nm->db[ctx->thread_index]; + ip6 = vlib_buffer_get_current (p); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + ip4 = vlib_buffer_get_current (p); + + u32 ip_version_traffic_class_and_flow_label = + ip6->ip_version_traffic_class_and_flow_label; + u16 payload_length = ip6->payload_length; + u8 hop_limit = ip6->hop_limit; + + old_saddr.as_u64[0] = ip6->src_address.as_u64[0]; + old_saddr.as_u64[1] = ip6->src_address.as_u64[1]; + old_daddr.as_u64[0] = ip6->dst_address.as_u64[0]; + old_daddr.as_u64[1] = ip6->dst_address.as_u64[1]; + + if (PREDICT_FALSE (frag_hdr_offset)) + { + //Only the first fragment + ip6_frag_hdr_t *hdr = + (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset); + fragment_id = frag_id_6to4 (hdr->identification); + frag_more = ip6_frag_hdr_more (hdr); + frag_offset = ip6_frag_hdr_offset (hdr); + } + else + { + fragment_id = 0; + frag_offset = 0; + frag_more = 0; + } + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip_version_traffic_class_and_flow_label); + ip4->length = + u16_net_add (payload_length, sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (frag_offset | + (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); + ip4->ttl = hop_limit; + ip4->protocol = (proto == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : proto; + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); - saddr.as_u64[0] = ip6->src_address.as_u64[0]; - saddr.as_u64[1] = ip6->src_address.as_u64[1]; - daddr.as_u64[0] = ip6->dst_address.as_u64[0]; - daddr.as_u64[1] = ip6->dst_address.as_u64[1]; - ste = - nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, + nat64_db_st_entry_find (db, &old_saddr, &old_daddr, sport, dport, proto, fib_index, 1); if (ste) @@ -198,7 +216,8 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, } else { - bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1); + bibe = + nat64_db_bib_entry_find (db, &old_saddr, sport, proto, fib_index, 1); if (!bibe) { @@ -210,36 +229,58 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, - sport, out_port, fib_index, proto, 0); + nat64_db_bib_entry_create (ctx->thread_index, db, + &old_saddr.ip6, &out_addr, sport, + out_port, fib_index, proto, 0); if (!bibe) return -1; + + vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0, + db->bib.bib_entries_num); } - nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); + nat64_extract_ip4 (&old_daddr.ip6, &new_daddr, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, - &daddr.ip4, dport); + nat64_db_st_entry_create (ctx->thread_index, db, bibe, + &old_daddr.ip6, &new_daddr, dport); if (!ste) return -1; + + vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0, + db->st.st_entries_num); } ip4->src_address.as_u32 = bibe->out_addr.as_u32; - udp->src_port = bibe->out_port; - ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; - if (proto == IP_PROTOCOL_TCP) + ip4->checksum = ip4_header_checksum (ip4); + + if (!vnet_buffer (p)->ip.reass.is_non_first_fragment) { - u16 *checksum; - ip_csum_t csum; - tcp_header_t *tcp = ip6_next_header (ip6); + udp_header_t *udp = (udp_header_t *) (ip4 + 1); + udp->src_port = bibe->out_port; - nat64_tcp_session_set_state (ste, tcp, 1); - checksum = &tcp->checksum; - csum = ip_csum_sub_even (*checksum, sport); - csum = ip_csum_add_even (csum, udp->src_port); - *checksum = ip_csum_fold (csum); + //UDP checksum is optional over IPv4 + if (proto == IP_PROTOCOL_UDP) + { + udp->checksum = 0; + } + else + { + tcp_header_t *tcp = (tcp_header_t *) (ip4 + 1); + csum = ip_csum_sub_even (tcp->checksum, old_saddr.as_u64[0]); + csum = ip_csum_sub_even (csum, old_saddr.as_u64[1]); + csum = ip_csum_sub_even (csum, old_daddr.as_u64[0]); + csum = ip_csum_sub_even (csum, old_daddr.as_u64[1]); + csum = ip_csum_add_even (csum, ip4->dst_address.as_u32); + csum = ip_csum_add_even (csum, ip4->src_address.as_u32); + csum = ip_csum_sub_even (csum, sport); + csum = ip_csum_add_even (csum, udp->src_port); + mss_clamping (nm->sm, tcp, &csum); + tcp->checksum = ip_csum_fold (csum); + + nat64_tcp_session_set_state (ste, tcp, 1); + } } nat64_session_reset_timeout (ste, ctx->vm); @@ -299,19 +340,26 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, - &out_addr, in_id, out_id, - fib_index, IP_PROTOCOL_ICMP, 0); + nat64_db_bib_entry_create (ctx->thread_index, db, + &ip6->src_address, &out_addr, + in_id, out_id, fib_index, + IP_PROTOCOL_ICMP, 0); if (!bibe) return -1; + + vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0, + db->bib.bib_entries_num); } nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, - &daddr.ip4, 0); + nat64_db_st_entry_create (ctx->thread_index, db, bibe, + &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; + + vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0, + db->st.st_entries_num); } nat64_session_reset_timeout (ste, ctx->vm); @@ -435,18 +483,18 @@ unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg) ip46_address_t saddr, daddr; nat64_db_t *db = &nm->db[ctx->thread_index]; - if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr)) + if (ip6_address_is_equal (&ste->in_r_addr, &ctx->dst_addr)) { bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index); if (!bibe) return -1; - if (ip46_address_is_equal (&bibe->in_addr, &ctx->src_addr) + if (ip6_address_is_equal (&bibe->in_addr, &ctx->src_addr) && bibe->fib_index == ctx->fib_index) { - memset (&saddr, 0, sizeof (saddr)); + clib_memset (&saddr, 0, sizeof (saddr)); saddr.ip4.as_u32 = bibe->out_addr.as_u32; - memset (&daddr, 0, sizeof (daddr)); + clib_memset (&daddr, 0, sizeof (daddr)); nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index); if (nat64_db_st_entry_find @@ -462,16 +510,43 @@ unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg) } static int -nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, - void *arg) +nat64_in2out_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, u8 l4_protocol, + u16 l4_offset, u16 frag_hdr_offset, + nat64_in2out_set_ctx_t * s_ctx) { + ip6_header_t *ip6; + ip4_header_t *ip4; + u16 fragment_id; + u16 frag_offset; + u8 frag_more; + + ip6 = vlib_buffer_get_current (p); + + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + if (PREDICT_FALSE (frag_hdr_offset)) + { + //Only the first fragment + ip6_frag_hdr_t *hdr = + (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset); + fragment_id = frag_id_6to4 (hdr->identification); + frag_offset = ip6_frag_hdr_offset (hdr); + frag_more = ip6_frag_hdr_more (hdr); + } + else + { + fragment_id = 0; + frag_offset = 0; + frag_more = 0; + } + nat64_main_t *nm = &nat64_main; - nat64_in2out_set_ctx_t *s_ctx = arg; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; ip46_address_t saddr, daddr, addr; u32 sw_if_index, fib_index; - u8 proto = ip6->protocol; int i; nat64_db_t *db = &nm->db[s_ctx->thread_index]; @@ -485,17 +560,19 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, daddr.as_u64[1] = ip6->dst_address.as_u64[1]; ste = - nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1); + nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, l4_protocol, fib_index, + 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, l4_protocol, ste->bibe_index); if (!bibe) return -1; } else { - bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1); + bibe = + nat64_db_bib_entry_find (db, &saddr, 0, l4_protocol, fib_index, 1); if (!bibe) { @@ -507,7 +584,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1], .out_addr.as_u32 = 0, .fib_index = fib_index, - .proto = proto, + .proto = l4_protocol, .thread_index = s_ctx->thread_index, }; @@ -517,9 +594,9 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx); /* Verify if out address is not already in use for protocol */ - memset (&addr, 0, sizeof (addr)); + clib_memset (&addr, 0, sizeof (addr)); addr.ip4.as_u32 = ctx.out_addr.as_u32; - if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) + if (nat64_db_bib_entry_find (db, &addr, 0, l4_protocol, 0, 0)) ctx.out_addr.as_u32 = 0; if (!ctx.out_addr.as_u32) @@ -527,7 +604,8 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, for (i = 0; i < vec_len (nm->addr_pool); i++) { addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32; - if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) + if (!nat64_db_bib_entry_find + (db, &addr, 0, l4_protocol, 0, 0)) break; } } @@ -536,18 +614,25 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, - &ctx.out_addr, 0, 0, fib_index, proto, - 0); + nat64_db_bib_entry_create (s_ctx->thread_index, db, + &ip6->src_address, &ctx.out_addr, + 0, 0, fib_index, l4_protocol, 0); if (!bibe) return -1; + + vlib_set_simple_counter (&nm->total_bibs, s_ctx->thread_index, 0, + db->bib.bib_entries_num); } nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); + nat64_db_st_entry_create (s_ctx->thread_index, db, bibe, + &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; + + vlib_set_simple_counter (&nm->total_sessions, s_ctx->thread_index, 0, + db->st.st_entries_num); } nat64_session_reset_timeout (ste, s_ctx->vm); @@ -555,27 +640,39 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, ip4->src_address.as_u32 = bibe->out_addr.as_u32; ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label); + ip4->length = u16_net_add (ip6->payload_length, + sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (frag_offset | + (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); + ip4->ttl = ip6->hop_limit; + ip4->protocol = l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + return 0; } - - static int nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, - ip6_header_t * ip6, u32 thread_index) + ip6_header_t * ip6, u32 l4_offset, + u32 thread_index) { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; ip46_address_t saddr, daddr; u32 sw_if_index, fib_index; - udp_header_t *udp = ip6_next_header (ip6); - tcp_header_t *tcp = ip6_next_header (ip6); - u8 proto = ip6->protocol; - u16 sport = udp->src_port; - u16 dport = udp->dst_port; - u16 *checksum; - ip_csum_t csum; + udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, l4_offset); + tcp_header_t *tcp = (tcp_header_t *) u8_ptr_add (ip6, l4_offset); + u8 proto = vnet_buffer (b)->ip.reass.ip_proto; + u16 sport = vnet_buffer (b)->ip.reass.l4_src_port; + u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port; + u16 *checksum = NULL; + ip_csum_t csum = 0; nat64_db_t *db = &nm->db[thread_index]; sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; @@ -587,17 +684,17 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, daddr.as_u64[0] = ip6->dst_address.as_u64[0]; daddr.as_u64[1] = ip6->dst_address.as_u64[1]; - if (proto == IP_PROTOCOL_UDP) - checksum = &udp->checksum; - else - checksum = &tcp->checksum; - - csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_sub_even (csum, sport); - csum = ip_csum_sub_even (csum, dport); + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + if (proto == IP_PROTOCOL_UDP) + checksum = &udp->checksum; + else + checksum = &tcp->checksum; + csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); + } ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, @@ -623,18 +720,25 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, - sport, out_port, fib_index, proto, 0); + nat64_db_bib_entry_create (thread_index, db, &ip6->src_address, + &out_addr, sport, out_port, fib_index, + proto, 0); if (!bibe) return -1; + + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); } nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, + nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address, &daddr.ip4, dport); if (!ste) return -1; + + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); } if (proto == IP_PROTOCOL_TCP) @@ -642,10 +746,14 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_session_reset_timeout (ste, vm); - sport = udp->src_port = bibe->out_port; + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + udp->src_port = bibe->out_port; + } + nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index); - memset (&daddr, 0, sizeof (daddr)); + clib_memset (&daddr, 0, sizeof (daddr)); daddr.ip4.as_u32 = ste->out_r_addr.as_u32; bibe = 0; @@ -664,15 +772,20 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; - udp->dst_port = bibe->in_port; - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_add_even (csum, udp->src_port); - csum = ip_csum_add_even (csum, udp->dst_port); - *checksum = ip_csum_fold (csum); + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); + csum = ip_csum_sub_even (csum, sport); + csum = ip_csum_sub_even (csum, dport); + udp->dst_port = bibe->in_port; + csum = ip_csum_add_even (csum, udp->src_port); + csum = ip_csum_add_even (csum, udp->dst_port); + *checksum = ip_csum_fold (csum); + } return 0; } @@ -745,8 +858,8 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, dport = udp->dst_port = bibe->out_port; nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index); - memset (&saddr, 0, sizeof (saddr)); - memset (&daddr, 0, sizeof (daddr)); + clib_memset (&saddr, 0, sizeof (saddr)); + clib_memset (&daddr, 0, sizeof (daddr)); saddr.ip4.as_u32 = ste->out_r_addr.as_u32; daddr.ip4.as_u32 = bibe->out_addr.as_u32; @@ -858,7 +971,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx); /* Verify if out address is not already in use for protocol */ - memset (&addr, 0, sizeof (addr)); + clib_memset (&addr, 0, sizeof (addr)); addr.ip4.as_u32 = ctx.out_addr.as_u32; if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) ctx.out_addr.as_u32 = 0; @@ -877,25 +990,32 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, return -1; bibe = - nat64_db_bib_entry_create (db, &ip6->src_address, + nat64_db_bib_entry_create (thread_index, db, &ip6->src_address, &ctx.out_addr, 0, 0, fib_index, proto, 0); if (!bibe) return -1; + + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); } nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); + nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address, + &daddr.ip4, 0); if (!ste) return -1; + + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); } nat64_session_reset_timeout (ste, vm); nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index); - memset (&daddr, 0, sizeof (daddr)); + clib_memset (&daddr, 0, sizeof (daddr)); daddr.ip4.as_u32 = ste->out_r_addr.as_u32; bibe = 0; @@ -927,9 +1047,13 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 pkts_processed = 0; u32 stats_node_index; u32 thread_index = vm->thread_index; + nat64_main_t *nm = &nat64_main; + + u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets = + 0, fragments = 0; stats_node_index = - is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index; + is_slow_path ? nm->in2out_slowpath_node_index : nm->in2out_node_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -947,7 +1071,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *b0; u32 next0; ip6_header_t *ip60; - u16 l4_offset0, frag_offset0; + u16 l4_offset0, frag_hdr_offset0; u8 l4_protocol0; u32 proto0; nat64_in2out_set_ctx_t ctx0; @@ -972,8 +1096,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_FALSE (ip6_parse - (ip60, b0->current_length, &l4_protocol0, &l4_offset0, - &frag_offset0))) + (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0, + &frag_hdr_offset0))) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN]; @@ -994,6 +1118,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, { if (PREDICT_TRUE (proto0 == ~0)) { + other_packets++; if (is_hairpinning (&ip60->dst_address)) { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; @@ -1007,7 +1132,9 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto trace0; } - if (ip6_to_ip4 (b0, nat64_in2out_unk_proto_set_cb, &ctx0)) + if (nat64_in2out_unk_proto + (vm, b0, l4_protocol0, l4_offset0, frag_hdr_offset0, + &ctx0)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1026,15 +1153,9 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } } - if (PREDICT_FALSE - (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)) - { - next0 = NAT64_IN2OUT_NEXT_REASS; - goto trace0; - } - if (proto0 == SNAT_PROTOCOL_ICMP) { + icmp_packets++; if (is_hairpinning (&ip60->dst_address)) { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; @@ -1049,7 +1170,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } if (icmp6_to_icmp - (b0, nat64_in2out_icmp_set_cb, &ctx0, + (vm, b0, nat64_in2out_icmp_set_cb, &ctx0, nat64_in2out_inner_icmp_set_cb, &ctx0)) { next0 = NAT64_IN2OUT_NEXT_DROP; @@ -1059,11 +1180,16 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP) { + if (proto0 == SNAT_PROTOCOL_TCP) + tcp_packets++; + else + udp_packets++; + if (is_hairpinning (&ip60->dst_address)) { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; if (nat64_in2out_tcp_udp_hairpinning - (vm, b0, ip60, thread_index)) + (vm, b0, ip60, l4_offset0, thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1072,8 +1198,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto trace0; } - if (ip6_to_ip4_tcp_udp - (b0, nat64_in2out_tcp_udp_set_cb, &ctx0, 0)) + if (nat64_in2out_tcp_udp + (vm, b0, l4_offset0, frag_hdr_offset0, &ctx0)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; @@ -1092,7 +1218,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, t->is_slow_path = is_slow_path; } - pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP; + pkts_processed += next0 == NAT64_IN2OUT_NEXT_IP4_LOOKUP; /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, @@ -1103,19 +1229,30 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_node_increment_counter (vm, stats_node_index, NAT64_IN2OUT_ERROR_IN2OUT_PACKETS, pkts_processed); + vlib_node_increment_counter (vm, stats_node_index, + NAT64_IN2OUT_ERROR_TCP_PACKETS, tcp_packets); + vlib_node_increment_counter (vm, stats_node_index, + NAT64_IN2OUT_ERROR_UDP_PACKETS, udp_packets); + vlib_node_increment_counter (vm, stats_node_index, + NAT64_IN2OUT_ERROR_ICMP_PACKETS, icmp_packets); + vlib_node_increment_counter (vm, stats_node_index, + NAT64_IN2OUT_ERROR_OTHER_PACKETS, + other_packets); + vlib_node_increment_counter (vm, stats_node_index, + NAT64_IN2OUT_ERROR_FRAGMENTS, fragments); + return frame->n_vectors; } -static uword -nat64_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (nat64_in2out_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { return nat64_in2out_node_fn_inline (vm, node, frame, 0); } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (nat64_in2out_node) = { - .function = nat64_in2out_node_fn, .name = "nat64-in2out", .vector_size = sizeof (u32), .format_trace = format_nat64_in2out_trace, @@ -1129,23 +1266,19 @@ VLIB_REGISTER_NODE (nat64_in2out_node) = { [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath", - [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass", }, }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_node, nat64_in2out_node_fn); - -static uword -nat64_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { return nat64_in2out_node_fn_inline (vm, node, frame, 1); } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = { - .function = nat64_in2out_slowpath_node_fn, .name = "nat64-in2out-slowpath", .vector_size = sizeof (u32), .format_trace = format_nat64_in2out_trace, @@ -1159,14 +1292,10 @@ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = { [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath", - [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass", }, }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_slowpath_node, - nat64_in2out_slowpath_node_fn); - typedef struct nat64_in2out_frag_set_ctx_t_ { vlib_main_t *vm; @@ -1177,447 +1306,29 @@ typedef struct nat64_in2out_frag_set_ctx_t_ u8 first_frag; } nat64_in2out_frag_set_ctx_t; -static int -nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) -{ - nat64_main_t *nm = &nat64_main; - nat64_in2out_frag_set_ctx_t *ctx = arg; - nat64_db_st_entry_t *ste; - nat64_db_bib_entry_t *bibe; - udp_header_t *udp; - nat64_db_t *db = &nm->db[ctx->thread_index]; - - ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); - if (!ste) - return -1; - - bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); - if (!bibe) - return -1; - - nat64_session_reset_timeout (ste, ctx->vm); - - if (ctx->first_frag) - { - udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset); - - if (ctx->proto == IP_PROTOCOL_TCP) - { - u16 *checksum; - ip_csum_t csum; - tcp_header_t *tcp = (tcp_header_t *) udp; - - nat64_tcp_session_set_state (ste, tcp, 1); - checksum = &tcp->checksum; - csum = ip_csum_sub_even (*checksum, tcp->src_port); - csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_add_even (csum, bibe->out_port); - csum = ip_csum_add_even (csum, bibe->out_addr.as_u32); - csum = ip_csum_add_even (csum, ste->out_r_addr.as_u32); - *checksum = ip_csum_fold (csum); - } - - udp->src_port = bibe->out_port; - } - - ip4->src_address.as_u32 = bibe->out_addr.as_u32; - ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; - - return 0; -} - -static int -nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6, - nat64_in2out_frag_set_ctx_t * ctx) -{ - nat64_main_t *nm = &nat64_main; - nat64_db_st_entry_t *ste; - nat64_db_bib_entry_t *bibe; - udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, ctx->l4_offset); - tcp_header_t *tcp = (tcp_header_t *) udp; - u16 sport = udp->src_port; - u16 dport = udp->dst_port; - u16 *checksum; - ip_csum_t csum; - ip46_address_t daddr; - nat64_db_t *db = &nm->db[ctx->thread_index]; - - if (ctx->first_frag) - { - if (ctx->proto == IP_PROTOCOL_UDP) - checksum = &udp->checksum; - else - checksum = &tcp->checksum; - - csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_sub_even (csum, sport); - csum = ip_csum_sub_even (csum, dport); - } - - ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); - if (!ste) - return -1; - - bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); - if (!bibe) - return -1; - - if (ctx->proto == IP_PROTOCOL_TCP) - nat64_tcp_session_set_state (ste, tcp, 1); - - nat64_session_reset_timeout (ste, ctx->vm); - - sport = bibe->out_port; - dport = ste->r_port; - - nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index); - - memset (&daddr, 0, sizeof (daddr)); - daddr.ip4.as_u32 = ste->out_r_addr.as_u32; - - bibe = 0; - /* *INDENT-OFF* */ - vec_foreach (db, nm->db) - { - bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0); - - if (bibe) - break; - } - /* *INDENT-ON* */ - - if (!bibe) - return -1; - - ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; - ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; - - if (ctx->first_frag) - { - udp->dst_port = bibe->in_port; - udp->src_port = sport; - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); - csum = ip_csum_add_even (csum, udp->src_port); - csum = ip_csum_add_even (csum, udp->dst_port); - *checksum = ip_csum_fold (csum); - } - return 0; -} +#define foreach_nat64_in2out_handoff_error \ +_(CONGESTION_DROP, "congestion drop") \ +_(SAME_WORKER, "same worker") \ +_(DO_HANDOFF, "do handoff") -static uword -nat64_in2out_reass_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +typedef enum { - u32 n_left_from, *from, *to_next; - nat64_in2out_next_t next_index; - u32 pkts_processed = 0; - u32 *fragments_to_drop = 0; - u32 *fragments_to_loopback = 0; - nat64_main_t *nm = &nat64_main; - u32 thread_index = vm->thread_index; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - u32 next0; - u8 cached0 = 0; - ip6_header_t *ip60; - u16 l4_offset0, frag_offset0; - u8 l4_protocol0; - nat_reass_ip6_t *reass0; - ip6_frag_hdr_t *frag0; - nat64_db_bib_entry_t *bibe0; - nat64_db_st_entry_t *ste0; - udp_header_t *udp0; - snat_protocol_t proto0; - u32 sw_if_index0, fib_index0; - ip46_address_t saddr0, daddr0; - nat64_in2out_frag_set_ctx_t ctx0; - nat64_db_t *db = &nm->db[thread_index]; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP; - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - fib_index0 = - fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, - sw_if_index0); - - ctx0.thread_index = thread_index; - - if (PREDICT_FALSE (nat_reass_is_drop_frag (1))) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - ip60 = (ip6_header_t *) vlib_buffer_get_current (b0); - - if (PREDICT_FALSE - (ip6_parse - (ip60, b0->current_length, &l4_protocol0, &l4_offset0, - &frag_offset0))) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN]; - goto trace0; - } - - if (PREDICT_FALSE - (!(l4_protocol0 == IP_PROTOCOL_TCP - || l4_protocol0 == IP_PROTOCOL_UDP))) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT]; - goto trace0; - } - - udp0 = (udp_header_t *) u8_ptr_add (ip60, l4_offset0); - frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, frag_offset0); - proto0 = ip_proto_to_snat_proto (l4_protocol0); - - reass0 = nat_ip6_reass_find_or_create (ip60->src_address, - ip60->dst_address, - frag0->identification, - l4_protocol0, - 1, &fragments_to_drop); - - if (PREDICT_FALSE (!reass0)) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_REASS]; - goto trace0; - } - - if (PREDICT_TRUE (ip6_frag_hdr_offset (frag0))) - { - ctx0.first_frag = 0; - if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0)) - { - if (nat_ip6_reass_add_fragment (reass0, bi0)) - { - b0->error = node->errors[NAT64_IN2OUT_ERROR_MAX_FRAG]; - next0 = NAT64_IN2OUT_NEXT_DROP; - goto trace0; - } - cached0 = 1; - goto trace0; - } - } - else - { - ctx0.first_frag = 1; - - saddr0.as_u64[0] = ip60->src_address.as_u64[0]; - saddr0.as_u64[1] = ip60->src_address.as_u64[1]; - daddr0.as_u64[0] = ip60->dst_address.as_u64[0]; - daddr0.as_u64[1] = ip60->dst_address.as_u64[1]; - - ste0 = - nat64_db_st_entry_find (db, &saddr0, &daddr0, - udp0->src_port, udp0->dst_port, - l4_protocol0, fib_index0, 1); - if (!ste0) - { - bibe0 = - nat64_db_bib_entry_find (db, &saddr0, udp0->src_port, - l4_protocol0, fib_index0, 1); - if (!bibe0) - { - u16 out_port0; - ip4_address_t out_addr0; - if (nat64_alloc_out_addr_and_port - (fib_index0, proto0, &out_addr0, &out_port0, - thread_index)) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = - node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; - goto trace0; - } - - bibe0 = - nat64_db_bib_entry_create (db, - &ip60->src_address, - &out_addr0, udp0->src_port, - out_port0, fib_index0, - l4_protocol0, 0); - if (!bibe0) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = - node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; - goto trace0; - } - } - nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4, - fib_index0); - ste0 = - nat64_db_st_entry_create (db, bibe0, - &ip60->dst_address, &daddr0.ip4, - udp0->dst_port); - if (!ste0) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = - node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; - goto trace0; - } - } - reass0->sess_index = nat64_db_st_entry_get_index (db, ste0); - - nat_ip6_reass_get_frags (reass0, &fragments_to_loopback); - } - - ctx0.sess_index = reass0->sess_index; - ctx0.proto = l4_protocol0; - ctx0.vm = vm; - ctx0.l4_offset = l4_offset0; - - if (PREDICT_FALSE (is_hairpinning (&ip60->dst_address))) - { - next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; - if (nat64_in2out_frag_hairpinning (b0, ip60, &ctx0)) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; - } - goto trace0; - } - else - { - if (ip6_to_ip4_fragmented (b0, nat64_in2out_frag_set_cb, &ctx0)) - { - next0 = NAT64_IN2OUT_NEXT_DROP; - b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN]; - goto trace0; - } - } - - trace0: - if (PREDICT_FALSE - ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - nat64_in2out_reass_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->cached = cached0; - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - - if (cached0) - { - n_left_to_next++; - to_next--; - } - else - { - pkts_processed += next0 != NAT64_IN2OUT_NEXT_DROP; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy (from, - fragments_to_loopback + (len - - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, nat64_in2out_reass_node.index, - NAT64_IN2OUT_ERROR_IN2OUT_PACKETS, - pkts_processed); - - nat_send_all_to_node (vm, fragments_to_drop, node, - &node->errors[NAT64_IN2OUT_ERROR_DROP_FRAGMENT], - NAT64_IN2OUT_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); - return frame->n_vectors; -} +#define _(sym,str) NAT64_IN2OUT_HANDOFF_ERROR_##sym, + foreach_nat64_in2out_handoff_error +#undef _ + NAT64_IN2OUT_HANDOFF_N_ERROR, +} nat64_in2out_handoff_error_t; -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat64_in2out_reass_node) = { - .function = nat64_in2out_reass_node_fn, - .name = "nat64-in2out-reass", - .vector_size = sizeof (u32), - .format_trace = format_nat64_in2out_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN (nat64_in2out_error_strings), - .error_strings = nat64_in2out_error_strings, - .n_next_nodes = NAT64_IN2OUT_N_NEXT, - /* edit / add dispositions here */ - .next_nodes = { - [NAT64_IN2OUT_NEXT_DROP] = "error-drop", - [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", - [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", - [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath", - [NAT64_IN2OUT_NEXT_REASS] = "nat64-in2out-reass", - }, +static char *nat64_in2out_handoff_error_strings[] = { +#define _(sym,string) string, + foreach_nat64_in2out_handoff_error +#undef _ }; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node, - nat64_in2out_reass_node_fn); typedef struct { u32 next_worker_index; - u8 do_handoff; } nat64_in2out_handoff_trace_t; static u8 * @@ -1627,190 +1338,86 @@ format_nat64_in2out_handoff_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); nat64_in2out_handoff_trace_t *t = va_arg (*args, nat64_in2out_handoff_trace_t *); - char *m; - m = t->do_handoff ? "next worker" : "same worker"; - s = format (s, "NAT64-IN2OUT-HANDOFF: %s %d", m, t->next_worker_index); + s = + format (s, "NAT64-IN2OUT-HANDOFF: next-worker %d", t->next_worker_index); return s; } -static inline uword -nat64_in2out_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (nat64_in2out_handoff_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { nat64_main_t *nm = &nat64_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0; - static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; - static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index - = 0; - vlib_frame_queue_elt_t *hf = 0; - vlib_frame_queue_t *fq; - vlib_frame_t *f = 0, *d = 0; - int i; - u32 n_left_to_next_worker = 0, *to_next_worker = 0; - u32 next_worker_index = 0; - u32 current_worker_index = ~0; - u32 thread_index = vm->thread_index; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u32 n_enq, n_left_from, *from; + u16 thread_indices[VLIB_FRAME_SIZE], *ti; u32 fq_index; - u32 to_node_index; - - fq_index = nm->fq_in2out_index; - to_node_index = nat64_in2out_node.index; - - if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) - { - vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); - - vec_validate_init_empty (congested_handoff_queue_by_worker_index, - tm->n_vlib_mains - 1, - (vlib_frame_queue_t *) (~0)); - } + u32 thread_index = vm->thread_index; + u32 do_handoff = 0, same_worker = 0; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; + vlib_get_buffers (vm, from, bufs, n_left_from); + + b = bufs; + ti = thread_indices; + + fq_index = nm->fq_in2out_index; while (n_left_from > 0) { - u32 bi0; - vlib_buffer_t *b0; ip6_header_t *ip0; - u8 do_handoff; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - ip0 = vlib_buffer_get_current (b0); - - next_worker_index = nat64_get_worker_in2out (&ip0->src_address); - - if (PREDICT_FALSE (next_worker_index != thread_index)) - { - do_handoff = 1; - - if (next_worker_index != current_worker_index) - { - fq = - is_vlib_frame_queue_congested (fq_index, next_worker_index, - 30, - congested_handoff_queue_by_worker_index); - - if (fq) - { - /* if this is 1st frame */ - if (!d) - { - d = vlib_get_frame_to_node (vm, nm->error_node_index); - to_next_drop = vlib_frame_vector_args (d); - } - to_next_drop[0] = bi0; - to_next_drop += 1; - d->n_vectors++; - goto trace0; - } - - if (hf) - hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; - - hf = - vlib_get_worker_handoff_queue_elt (fq_index, - next_worker_index, - handoff_queue_elt_by_worker_index); - n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; - to_next_worker = &hf->buffer_index[hf->n_vectors]; - current_worker_index = next_worker_index; - } - - ASSERT (to_next_worker != 0); + ip0 = vlib_buffer_get_current (b[0]); + ti[0] = nat64_get_worker_in2out (&ip0->src_address); - /* enqueue to correct worker thread */ - to_next_worker[0] = bi0; - to_next_worker++; - n_left_to_next_worker--; - - if (n_left_to_next_worker == 0) - { - hf->n_vectors = VLIB_FRAME_SIZE; - vlib_put_frame_queue_elt (hf); - current_worker_index = ~0; - handoff_queue_elt_by_worker_index[next_worker_index] = 0; - hf = 0; - } - } + if (ti[0] != thread_index) + do_handoff++; else - { - do_handoff = 0; - /* if this is 1st frame */ - if (!f) - { - f = vlib_get_frame_to_node (vm, to_node_index); - to_next = vlib_frame_vector_args (f); - } + same_worker++; - to_next[0] = bi0; - to_next += 1; - f->n_vectors++; - } - - trace0: if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) + && (b[0]->flags & VLIB_BUFFER_IS_TRACED))) { nat64_in2out_handoff_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->next_worker_index = next_worker_index; - t->do_handoff = do_handoff; + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->next_worker_index = ti[0]; } - } - - if (f) - vlib_put_frame_to_node (vm, to_node_index, f); - if (d) - vlib_put_frame_to_node (vm, nm->error_node_index, d); + n_left_from -= 1; + ti += 1; + b += 1; + } - if (hf) - hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + n_enq = + vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices, + frame->n_vectors, 1); + + if (n_enq < frame->n_vectors) + vlib_node_increment_counter (vm, node->node_index, + NAT64_IN2OUT_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); + vlib_node_increment_counter (vm, node->node_index, + NAT64_IN2OUT_HANDOFF_ERROR_SAME_WORKER, + same_worker); + vlib_node_increment_counter (vm, node->node_index, + NAT64_IN2OUT_HANDOFF_ERROR_DO_HANDOFF, + do_handoff); - /* Ship frames to the worker nodes */ - for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) - { - if (handoff_queue_elt_by_worker_index[i]) - { - hf = handoff_queue_elt_by_worker_index[i]; - /* - * It works better to let the handoff node - * rate-adapt, always ship the handoff queue element. - */ - if (1 || hf->n_vectors == hf->last_n_vectors) - { - vlib_put_frame_queue_elt (hf); - handoff_queue_elt_by_worker_index[i] = 0; - } - else - hf->last_n_vectors = hf->n_vectors; - } - congested_handoff_queue_by_worker_index[i] = - (vlib_frame_queue_t *) (~0); - } - hf = 0; - current_worker_index = ~0; return frame->n_vectors; } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = { - .function = nat64_in2out_handoff_node_fn, .name = "nat64-in2out-handoff", .vector_size = sizeof (u32), .format_trace = format_nat64_in2out_handoff_trace, .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(nat64_in2out_handoff_error_strings), + .error_strings = nat64_in2out_handoff_error_strings, .n_next_nodes = 1, @@ -1820,9 +1427,6 @@ VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = { }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_handoff_node, - nat64_in2out_handoff_node_fn); - /* * fd.io coding-style-patch-verification: ON *