*/
#include <nat/nat64.h>
-#include <nat/nat_reass.h>
#include <nat/nat_inlines.h>
#include <vnet/ip/ip4_to_ip6.h>
#include <vnet/fib/ip4_fib.h>
return s;
}
-typedef struct
-{
- u32 sw_if_index;
- u32 next_index;
- u8 cached;
-} nat64_out2in_reass_trace_t;
-
-static u8 *
-format_nat64_out2in_reass_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat64_out2in_reass_trace_t *t =
- va_arg (*args, nat64_out2in_reass_trace_t *);
-
- s =
- format (s, "NAT64-out2in-reass: sw_if_index %d, next index %d, status %s",
- t->sw_if_index, t->next_index,
- t->cached ? "cached" : "translated");
-
- return s;
-}
-
-vlib_node_registration_t nat64_out2in_node;
-vlib_node_registration_t nat64_out2in_reass_node;
-vlib_node_registration_t nat64_out2in_handoff_node;
-
#define foreach_nat64_out2in_error \
-_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
-_(OUT2IN_PACKETS, "Good out2in packets processed") \
-_(NO_TRANSLATION, "No translation") \
+_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \
+_(OUT2IN_PACKETS, "good out2in packets processed") \
+_(NO_TRANSLATION, "no translation") \
_(UNKNOWN, "unknown") \
-_(DROP_FRAGMENT, "Drop fragment") \
-_(MAX_REASS, "Maximum reassemblies exceeded") \
-_(MAX_FRAG, "Maximum fragments per reassembly exceeded")
+_(DROP_FRAGMENT, "drop fragment") \
+_(TCP_PACKETS, "TCP packets") \
+_(UDP_PACKETS, "UDP packets") \
+_(ICMP_PACKETS, "ICMP packets") \
+_(OTHER_PACKETS, "other protocol packets") \
+_(FRAGMENTS, "fragments") \
+_(CACHED_FRAGMENTS, "cached fragments") \
+_(PROCESSED_FRAGMENTS, "processed fragments")
typedef enum
NAT64_OUT2IN_NEXT_IP6_LOOKUP,
NAT64_OUT2IN_NEXT_IP4_LOOKUP,
NAT64_OUT2IN_NEXT_DROP,
- NAT64_OUT2IN_NEXT_REASS,
NAT64_OUT2IN_N_NEXT,
} nat64_out2in_next_t;
} nat64_out2in_set_ctx_t;
static int
-nat64_out2in_tcp_udp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
- void *arg)
+nat64_out2in_tcp_udp (vlib_main_t * vm, vlib_buffer_t * b,
+ nat64_out2in_set_ctx_t * ctx)
{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ ip_csum_t csum;
+ u16 *checksum = NULL;
+ ip6_frag_hdr_t *frag;
+ u32 frag_id;
+ ip4_address_t old_src, old_dst;
+
nat64_main_t *nm = &nat64_main;
- nat64_out2in_set_ctx_t *ctx = arg;
nat64_db_bib_entry_t *bibe;
nat64_db_st_entry_t *ste;
- ip46_address_t saddr, daddr;
+ ip46_address_t saddr;
+ ip46_address_t daddr;
ip6_address_t ip6_saddr;
- udp_header_t *udp = ip4_next_header (ip4);
- tcp_header_t *tcp = ip4_next_header (ip4);
- u8 proto = ip4->protocol;
- u16 dport = udp->dst_port;
- u16 sport = udp->src_port;
+ u8 proto = vnet_buffer (b)->ip.reass.ip_proto;
+ u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port;
+ u16 sport = vnet_buffer (b)->ip.reass.l4_src_port;
u32 sw_if_index, fib_index;
- u16 *checksum;
- ip_csum_t csum;
nat64_db_t *db = &nm->db[ctx->thread_index];
+ ip4 = vlib_buffer_get_current (b);
+
+ udp_header_t *udp = ip4_next_header (ip4);
+ tcp_header_t *tcp = ip4_next_header (ip4);
+ if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
+ {
+ if (ip4->protocol == IP_PROTOCOL_UDP)
+ {
+ checksum = &udp->checksum;
+ //UDP checksum is optional over IPv4 but mandatory for IPv6
+ //We do not check udp->length sanity but use our safe computed value instead
+ if (PREDICT_FALSE (!*checksum))
+ {
+ u16 udp_len =
+ clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
+ csum = ip_incremental_checksum (0, udp, udp_len);
+ csum =
+ ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
+ csum =
+ ip_csum_with_carry (csum,
+ clib_host_to_net_u16 (IP_PROTOCOL_UDP));
+ csum =
+ ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address)));
+ *checksum = ~ip_csum_fold (csum);
+ }
+ }
+ else
+ {
+ checksum = &tcp->checksum;
+ }
+ }
+
+ old_src.as_u32 = ip4->src_address.as_u32;
+ old_dst.as_u32 = ip4->dst_address.as_u32;
+
+ // Deal with fragmented packets
+ u16 frag_offset = ip4_get_fragment_offset (ip4);
+ if (PREDICT_FALSE (ip4_get_fragment_more (ip4) || frag_offset))
+ {
+ ip6 =
+ (ip6_header_t *) u8_ptr_add (ip4,
+ sizeof (*ip4) - sizeof (*ip6) -
+ sizeof (*frag));
+ frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+ frag_id = frag_id_4to6 (ip4->fragment_id);
+ vlib_buffer_advance (b, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
+ vlib_buffer_advance (b, sizeof (*ip4) - sizeof (*ip6));
+ frag = NULL;
+ }
+
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+ ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = ip4->protocol;
+
sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
- memset (&saddr, 0, sizeof (saddr));
+ clib_memset (&saddr, 0, sizeof (saddr));
saddr.ip4.as_u32 = ip4->src_address.as_u32;
- memset (&daddr, 0, sizeof (daddr));
+ clib_memset (&daddr, 0, sizeof (daddr));
daddr.ip4.as_u32 = ip4->dst_address.as_u32;
ste =
if (!bibe)
return -1;
- nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
+ nat64_compose_ip6 (&ip6_saddr, &old_src, bibe->fib_index);
ste =
- nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, sport);
+ nat64_db_st_entry_create (ctx->thread_index, db, bibe, &ip6_saddr,
+ &saddr.ip4, sport);
+
+ if (!ste)
+ return -1;
+
+ vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
+ db->st.st_entries_num);
}
ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
- udp->dst_port = bibe->in_port;
- if (proto == IP_PROTOCOL_UDP)
- checksum = &udp->checksum;
- else
+ vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+
+ nat64_session_reset_timeout (ste, ctx->vm);
+
+ if (PREDICT_FALSE (frag != NULL))
{
- checksum = &tcp->checksum;
- nat64_tcp_session_set_state (ste, tcp, 0);
+ frag->next_hdr = ip6->protocol;
+ frag->identification = frag_id;
+ frag->rsv = 0;
+ frag->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (frag_offset, 1);
+ ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
}
- csum = ip_csum_sub_even (*checksum, dport);
- csum = ip_csum_add_even (csum, udp->dst_port);
- *checksum = ip_csum_fold (csum);
+ if (!vnet_buffer (b)->ip.reass.is_non_first_fragment)
+ {
+ udp->dst_port = bibe->in_port;
- vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
+ if (proto == IP_PROTOCOL_TCP)
+ {
+ nat64_tcp_session_set_state (ste, tcp, 0);
+ }
- nat64_session_reset_timeout (ste, ctx->vm);
+ csum = ip_csum_sub_even (*checksum, dport);
+ csum = ip_csum_add_even (csum, udp->dst_port);
+ csum = ip_csum_sub_even (csum, old_src.as_u32);
+ csum = ip_csum_sub_even (csum, old_dst.as_u32);
+ csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
+ *checksum = ip_csum_fold (csum);
+ }
return 0;
}
static int
-nat64_out2in_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
+nat64_out2in_icmp_set_cb (vlib_buffer_t * b, ip4_header_t * ip4,
+ ip6_header_t * ip6, void *arg)
{
nat64_main_t *nm = &nat64_main;
nat64_out2in_set_ctx_t *ctx = arg;
sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
- memset (&saddr, 0, sizeof (saddr));
+ clib_memset (&saddr, 0, sizeof (saddr));
saddr.ip4.as_u32 = ip4->src_address.as_u32;
- memset (&daddr, 0, sizeof (daddr));
+ clib_memset (&daddr, 0, sizeof (daddr));
daddr.ip4.as_u32 = ip4->dst_address.as_u32;
if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply)
nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
ste =
- nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
+ nat64_db_st_entry_create (ctx->thread_index, db,
+ bibe, &ip6_saddr, &saddr.ip4, 0);
+
+ if (!ste)
+ return -1;
+
+ vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
+ db->st.st_entries_num);
}
nat64_session_reset_timeout (ste, ctx->vm);
}
static int
-nat64_out2in_inner_icmp_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
- void *arg)
+nat64_out2in_inner_icmp_set_cb (vlib_buffer_t * b, ip4_header_t * ip4,
+ ip6_header_t * ip6, void *arg)
{
nat64_main_t *nm = &nat64_main;
nat64_out2in_set_ctx_t *ctx = arg;
fib_index =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
- memset (&saddr, 0, sizeof (saddr));
+ clib_memset (&saddr, 0, sizeof (saddr));
saddr.ip4.as_u32 = ip4->src_address.as_u32;
- memset (&daddr, 0, sizeof (daddr));
+ clib_memset (&daddr, 0, sizeof (daddr));
daddr.ip4.as_u32 = ip4->dst_address.as_u32;
if (proto == IP_PROTOCOL_ICMP6)
}
static int
-nat64_out2in_unk_proto_set_cb (ip4_header_t * ip4, ip6_header_t * ip6,
- void *arg)
+nat64_out2in_unk_proto (vlib_main_t * vm, vlib_buffer_t * p,
+ nat64_out2in_set_ctx_t * ctx)
{
+ ip4_header_t *ip4 = vlib_buffer_get_current (p);
+ ip6_header_t *ip6;
+ ip6_frag_hdr_t *frag;
+ u32 frag_id;
+
nat64_main_t *nm = &nat64_main;
- nat64_out2in_set_ctx_t *ctx = arg;
nat64_db_bib_entry_t *bibe;
nat64_db_st_entry_t *ste;
ip46_address_t saddr, daddr;
u8 proto = ip4->protocol;
nat64_db_t *db = &nm->db[ctx->thread_index];
+ // Deal with fragmented packets
+ u16 frag_offset = ip4_get_fragment_offset (ip4);
+ if (PREDICT_FALSE (ip4_get_fragment_more (ip4) || frag_offset))
+ {
+ ip6 =
+ (ip6_header_t *) u8_ptr_add (ip4,
+ sizeof (*ip4) - sizeof (*ip6) -
+ sizeof (*frag));
+ frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+ frag_id = frag_id_4to6 (ip4->fragment_id);
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
+ frag = NULL;
+ }
+
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+ ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = ip4->protocol;
+
+ if (PREDICT_FALSE (frag != NULL))
+ {
+ frag->next_hdr = ip6->protocol;
+ frag->identification = frag_id;
+ frag->rsv = 0;
+ frag->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (frag_offset, 1);
+ ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
+ }
+
sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX];
fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
- memset (&saddr, 0, sizeof (saddr));
+ clib_memset (&saddr, 0, sizeof (saddr));
saddr.ip4.as_u32 = ip4->src_address.as_u32;
- memset (&daddr, 0, sizeof (daddr));
+ clib_memset (&daddr, 0, sizeof (daddr));
daddr.ip4.as_u32 = ip4->dst_address.as_u32;
ste =
return -1;
nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index);
- ste = nat64_db_st_entry_create (db, bibe, &ip6_saddr, &saddr.ip4, 0);
+ ste = nat64_db_st_entry_create (ctx->thread_index, db,
+ bibe, &ip6_saddr, &saddr.ip4, 0);
+
+ if (!ste)
+ return -1;
+
+ vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0,
+ db->st.st_entries_num);
}
nat64_session_reset_timeout (ste, ctx->vm);
return 0;
}
-static uword
-nat64_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
u32 n_left_from, *from, *to_next;
nat64_out2in_next_t next_index;
+ nat64_main_t *nm = &nat64_main;
u32 pkts_processed = 0;
u32 thread_index = vm->thread_index;
+ u32 tcp_packets = 0, udp_packets = 0, icmp_packets = 0, other_packets =
+ 0, fragments = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
if (PREDICT_FALSE (proto0 == ~0))
{
- if (ip4_to_ip6 (b0, nat64_out2in_unk_proto_set_cb, &ctx0))
+ if (nat64_out2in_unk_proto (vm, b0, &ctx0))
{
next0 = NAT64_OUT2IN_NEXT_DROP;
b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
}
- goto trace0;
- }
-
- if (PREDICT_FALSE (ip4_is_fragment (ip40)))
- {
- next0 = NAT64_OUT2IN_NEXT_REASS;
+ other_packets++;
goto trace0;
}
if (proto0 == SNAT_PROTOCOL_ICMP)
{
+ icmp_packets++;
if (icmp_to_icmp6
(b0, nat64_out2in_icmp_set_cb, &ctx0,
nat64_out2in_inner_icmp_set_cb, &ctx0))
}
else
{
- if (ip4_to_ip6_tcp_udp (b0, nat64_out2in_tcp_udp_set_cb, &ctx0))
+ if (proto0 == SNAT_PROTOCOL_TCP)
+ tcp_packets++;
+ else
+ udp_packets++;
+
+ if (nat64_out2in_tcp_udp (vm, b0, &ctx0))
{
udp0 = ip4_next_header (ip40);
/*
t->next_index = next0;
}
- pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
+ pkts_processed += next0 == NAT64_OUT2IN_NEXT_IP6_LOOKUP;
/* verify speculative enqueue, maybe switch current next frame */
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
- vlib_node_increment_counter (vm, nat64_out2in_node.index,
+ vlib_node_increment_counter (vm, nm->out2in_node_index,
NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
pkts_processed);
+ vlib_node_increment_counter (vm, nm->out2in_node_index,
+ NAT64_OUT2IN_ERROR_TCP_PACKETS, tcp_packets);
+ vlib_node_increment_counter (vm, nm->out2in_node_index,
+ NAT64_OUT2IN_ERROR_UDP_PACKETS, udp_packets);
+ vlib_node_increment_counter (vm, nm->out2in_node_index,
+ NAT64_OUT2IN_ERROR_ICMP_PACKETS, icmp_packets);
+ vlib_node_increment_counter (vm, nm->out2in_node_index,
+ NAT64_OUT2IN_ERROR_OTHER_PACKETS,
+ other_packets);
+ vlib_node_increment_counter (vm, nm->out2in_node_index,
+ NAT64_OUT2IN_ERROR_FRAGMENTS, fragments);
+
return frame->n_vectors;
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_out2in_node) = {
- .function = nat64_out2in_node_fn,
.name = "nat64-out2in",
.vector_size = sizeof (u32),
.format_trace = format_nat64_out2in_trace,
[NAT64_OUT2IN_NEXT_DROP] = "error-drop",
[NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
[NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
},
};
/* *INDENT-ON* */
-VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_node, nat64_out2in_node_fn);
-
typedef struct nat64_out2in_frag_set_ctx_t_
{
vlib_main_t *vm;
u8 first_frag;
} nat64_out2in_frag_set_ctx_t;
-static int
-nat64_out2in_frag_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
-{
- nat64_main_t *nm = &nat64_main;
- nat64_out2in_frag_set_ctx_t *ctx = arg;
- nat64_db_st_entry_t *ste;
- nat64_db_bib_entry_t *bibe;
- udp_header_t *udp = ip4_next_header (ip4);
- ip_csum_t csum;
- u16 *checksum;
- nat64_db_t *db = &nm->db[ctx->thread_index];
-
- ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index);
- if (!ste)
- return -1;
-
- bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index);
- if (!bibe)
- return -1;
-
- if (ctx->first_frag)
- {
- udp->dst_port = bibe->in_port;
-
- if (ip4->protocol == IP_PROTOCOL_UDP)
- {
- checksum = &udp->checksum;
-
- if (!checksum)
- {
- u16 udp_len =
- clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
- csum = ip_incremental_checksum (0, udp, udp_len);
- csum =
- ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
- csum =
- ip_csum_with_carry (csum,
- clib_host_to_net_u16 (IP_PROTOCOL_UDP));
- csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[0]);
- csum = ip_csum_with_carry (csum, ste->in_r_addr.as_u64[1]);
- csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[0]);
- csum = ip_csum_with_carry (csum, bibe->in_addr.as_u64[1]);
- *checksum = ~ip_csum_fold (csum);
- }
- else
- {
- csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
- csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
- csum = ip_csum_sub_even (csum, bibe->out_port);
- csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
- csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
- csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
- csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
- csum = ip_csum_add_even (csum, bibe->in_port);
- *checksum = ip_csum_fold (csum);
- }
- }
- else
- {
- tcp_header_t *tcp = ip4_next_header (ip4);
- nat64_tcp_session_set_state (ste, tcp, 0);
- checksum = &tcp->checksum;
- csum = ip_csum_sub_even (*checksum, bibe->out_addr.as_u32);
- csum = ip_csum_sub_even (csum, ste->out_r_addr.as_u32);
- csum = ip_csum_sub_even (csum, bibe->out_port);
- csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[0]);
- csum = ip_csum_add_even (csum, ste->in_r_addr.as_u64[1]);
- csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[0]);
- csum = ip_csum_add_even (csum, bibe->in_addr.as_u64[1]);
- csum = ip_csum_add_even (csum, bibe->in_port);
- *checksum = ip_csum_fold (csum);
- }
-
- }
+#define foreach_nat64_out2in_handoff_error \
+_(CONGESTION_DROP, "congestion drop") \
+_(SAME_WORKER, "same worker") \
+_(DO_HANDOFF, "do handoff")
- ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0];
- ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1];
-
- ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0];
- ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1];
-
- vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index;
-
- nat64_session_reset_timeout (ste, ctx->vm);
-
- return 0;
-}
-
-static uword
-nat64_out2in_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+typedef enum
{
- u32 n_left_from, *from, *to_next;
- nat64_out2in_next_t next_index;
- u32 pkts_processed = 0;
- u32 *fragments_to_drop = 0;
- u32 *fragments_to_loopback = 0;
- nat64_main_t *nm = &nat64_main;
- u32 thread_index = vm->thread_index;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- ip4_header_t *ip40;
- u8 cached0 = 0;
- u32 sw_if_index0, fib_index0;
- udp_header_t *udp0;
- nat_reass_ip4_t *reass0;
- ip46_address_t saddr0, daddr0;
- nat64_db_st_entry_t *ste0;
- nat64_db_bib_entry_t *bibe0;
- ip6_address_t ip6_saddr0;
- nat64_out2in_frag_set_ctx_t ctx0;
- nat64_db_t *db = &nm->db[thread_index];
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP;
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- fib_index0 =
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
- sw_if_index0);
-
- ctx0.thread_index = thread_index;
-
- if (PREDICT_FALSE (nat_reass_is_drop_frag (1)))
- {
- next0 = NAT64_OUT2IN_NEXT_DROP;
- b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
- goto trace0;
- }
-
- ip40 = vlib_buffer_get_current (b0);
-
- if (PREDICT_FALSE (!(ip40->protocol == IP_PROTOCOL_TCP
- || ip40->protocol == IP_PROTOCOL_UDP)))
- {
- next0 = NAT64_OUT2IN_NEXT_DROP;
- b0->error = node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT];
- goto trace0;
- }
-
- udp0 = ip4_next_header (ip40);
-
- reass0 = nat_ip4_reass_find_or_create (ip40->src_address,
- ip40->dst_address,
- ip40->fragment_id,
- ip40->protocol,
- 1, &fragments_to_drop);
-
- if (PREDICT_FALSE (!reass0))
- {
- next0 = NAT64_OUT2IN_NEXT_DROP;
- b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_REASS];
- goto trace0;
- }
-
- if (PREDICT_FALSE (ip4_is_first_fragment (ip40)))
- {
- ctx0.first_frag = 1;
-
- memset (&saddr0, 0, sizeof (saddr0));
- saddr0.ip4.as_u32 = ip40->src_address.as_u32;
- memset (&daddr0, 0, sizeof (daddr0));
- daddr0.ip4.as_u32 = ip40->dst_address.as_u32;
-
- ste0 =
- nat64_db_st_entry_find (db, &daddr0, &saddr0,
- udp0->dst_port, udp0->src_port,
- ip40->protocol, fib_index0, 0);
- if (!ste0)
- {
- bibe0 =
- nat64_db_bib_entry_find (db, &daddr0, udp0->dst_port,
- ip40->protocol, fib_index0, 0);
- if (!bibe0)
- {
- next0 = NAT64_OUT2IN_NEXT_DROP;
- b0->error =
- node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
- goto trace0;
- }
-
- nat64_compose_ip6 (&ip6_saddr0, &ip40->src_address,
- bibe0->fib_index);
- ste0 =
- nat64_db_st_entry_create (db, bibe0, &ip6_saddr0,
- &saddr0.ip4, udp0->src_port);
-
- if (!ste0)
- {
- next0 = NAT64_OUT2IN_NEXT_DROP;
- b0->error =
- node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION];
- goto trace0;
- }
- }
- reass0->sess_index = nat64_db_st_entry_get_index (db, ste0);
- reass0->thread_index = thread_index;
-
- nat_ip4_reass_get_frags (reass0, &fragments_to_loopback);
- }
- else
- {
- ctx0.first_frag = 0;
-
- if (PREDICT_FALSE (reass0->sess_index == (u32) ~ 0))
- {
- if (nat_ip4_reass_add_fragment (reass0, bi0))
- {
- b0->error = node->errors[NAT64_OUT2IN_ERROR_MAX_FRAG];
- next0 = NAT64_OUT2IN_NEXT_DROP;
- goto trace0;
- }
- cached0 = 1;
- goto trace0;
- }
- }
-
- ctx0.sess_index = reass0->sess_index;
- ctx0.proto = ip40->protocol;
- ctx0.vm = vm;
- ctx0.b = b0;
-
- if (ip4_to_ip6_fragmented (b0, nat64_out2in_frag_set_cb, &ctx0))
- {
- next0 = NAT64_OUT2IN_NEXT_DROP;
- b0->error = node->errors[NAT64_OUT2IN_ERROR_UNKNOWN];
- goto trace0;
- }
-
- trace0:
- if (PREDICT_FALSE
- ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- nat64_out2in_reass_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->cached = cached0;
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- }
-
- if (cached0)
- {
- n_left_to_next++;
- to_next--;
- }
- else
- {
- pkts_processed += next0 != NAT64_OUT2IN_NEXT_DROP;
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- if (n_left_from == 0 && vec_len (fragments_to_loopback))
- {
- from = vlib_frame_vector_args (frame);
- u32 len = vec_len (fragments_to_loopback);
- if (len <= VLIB_FRAME_SIZE)
- {
- clib_memcpy (from, fragments_to_loopback,
- sizeof (u32) * len);
- n_left_from = len;
- vec_reset_length (fragments_to_loopback);
- }
- else
- {
- clib_memcpy (from,
- fragments_to_loopback + (len -
- VLIB_FRAME_SIZE),
- sizeof (u32) * VLIB_FRAME_SIZE);
- n_left_from = VLIB_FRAME_SIZE;
- _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
- }
- }
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_node_increment_counter (vm, nat64_out2in_reass_node.index,
- NAT64_OUT2IN_ERROR_OUT2IN_PACKETS,
- pkts_processed);
-
- nat_send_all_to_node (vm, fragments_to_drop, node,
- &node->errors[NAT64_OUT2IN_ERROR_DROP_FRAGMENT],
- NAT64_OUT2IN_NEXT_DROP);
-
- vec_free (fragments_to_drop);
- vec_free (fragments_to_loopback);
- return frame->n_vectors;
-}
+#define _(sym,str) NAT64_OUT2IN_HANDOFF_ERROR_##sym,
+ foreach_nat64_out2in_handoff_error
+#undef _
+ NAT64_OUT2IN_HANDOFF_N_ERROR,
+} nat64_out2in_handoff_error_t;
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat64_out2in_reass_node) = {
- .function = nat64_out2in_reass_node_fn,
- .name = "nat64-out2in-reass",
- .vector_size = sizeof (u32),
- .format_trace = format_nat64_out2in_reass_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (nat64_out2in_error_strings),
- .error_strings = nat64_out2in_error_strings,
- .n_next_nodes = NAT64_OUT2IN_N_NEXT,
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT64_OUT2IN_NEXT_DROP] = "error-drop",
- [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [NAT64_OUT2IN_NEXT_REASS] = "nat64-out2in-reass",
- },
+static char *nat64_out2in_handoff_error_strings[] = {
+#define _(sym,string) string,
+ foreach_nat64_out2in_handoff_error
+#undef _
};
-/* *INDENT-ON* */
-
-VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_reass_node,
- nat64_out2in_reass_node_fn);
typedef struct
{
u32 next_worker_index;
- u8 do_handoff;
} nat64_out2in_handoff_trace_t;
static u8 *
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
nat64_out2in_handoff_trace_t *t =
va_arg (*args, nat64_out2in_handoff_trace_t *);
- char *m;
- m = t->do_handoff ? "next worker" : "same worker";
- s = format (s, "NAT64-OUT2IN-HANDOFF: %s %d", m, t->next_worker_index);
+ s =
+ format (s, "NAT64-OUT2IN-HANDOFF: next-worker %d", t->next_worker_index);
return s;
}
-static inline uword
-nat64_out2in_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (nat64_out2in_handoff_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
nat64_main_t *nm = &nat64_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0;
- static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
- static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
- = 0;
- vlib_frame_queue_elt_t *hf = 0;
- vlib_frame_queue_t *fq;
- vlib_frame_t *f = 0, *d = 0;
- int i;
- u32 n_left_to_next_worker = 0, *to_next_worker = 0;
- u32 next_worker_index = 0;
- u32 current_worker_index = ~0;
- u32 thread_index = vm->thread_index;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u32 n_enq, n_left_from, *from;
+ u16 thread_indices[VLIB_FRAME_SIZE], *ti;
u32 fq_index;
- u32 to_node_index;
-
- fq_index = nm->fq_out2in_index;
- to_node_index = nat64_out2in_node.index;
-
- if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
- {
- vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
-
- vec_validate_init_empty (congested_handoff_queue_by_worker_index,
- tm->n_vlib_mains - 1,
- (vlib_frame_queue_t *) (~0));
- }
+ u32 thread_index = vm->thread_index;
+ u32 do_handoff = 0, same_worker = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ b = bufs;
+ ti = thread_indices;
+
+ fq_index = nm->fq_out2in_index;
while (n_left_from > 0)
{
- u32 bi0;
- vlib_buffer_t *b0;
ip4_header_t *ip0;
- u8 do_handoff;
-
- bi0 = from[0];
- from += 1;
- n_left_from -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- ip0 = vlib_buffer_get_current (b0);
-
- next_worker_index = nat64_get_worker_out2in (ip0);
-
- if (PREDICT_FALSE (next_worker_index != thread_index))
- {
- do_handoff = 1;
-
- if (next_worker_index != current_worker_index)
- {
- fq =
- is_vlib_frame_queue_congested (fq_index, next_worker_index,
- 30,
- congested_handoff_queue_by_worker_index);
-
- if (fq)
- {
- /* if this is 1st frame */
- if (!d)
- {
- d = vlib_get_frame_to_node (vm, nm->error_node_index);
- to_next_drop = vlib_frame_vector_args (d);
- }
-
- to_next_drop[0] = bi0;
- to_next_drop += 1;
- d->n_vectors++;
- goto trace0;
- }
-
- if (hf)
- hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
-
- hf =
- vlib_get_worker_handoff_queue_elt (fq_index,
- next_worker_index,
- handoff_queue_elt_by_worker_index);
- n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
- to_next_worker = &hf->buffer_index[hf->n_vectors];
- current_worker_index = next_worker_index;
- }
-
- ASSERT (to_next_worker != 0);
- /* enqueue to correct worker thread */
- to_next_worker[0] = bi0;
- to_next_worker++;
- n_left_to_next_worker--;
+ ip0 = vlib_buffer_get_current (b[0]);
+ ti[0] = nat64_get_worker_out2in (b[0], ip0);
- if (n_left_to_next_worker == 0)
- {
- hf->n_vectors = VLIB_FRAME_SIZE;
- vlib_put_frame_queue_elt (hf);
- current_worker_index = ~0;
- handoff_queue_elt_by_worker_index[next_worker_index] = 0;
- hf = 0;
- }
- }
+ if (ti[0] != thread_index)
+ do_handoff++;
else
- {
- do_handoff = 0;
- /* if this is 1st frame */
- if (!f)
- {
- f = vlib_get_frame_to_node (vm, to_node_index);
- to_next = vlib_frame_vector_args (f);
- }
+ same_worker++;
- to_next[0] = bi0;
- to_next += 1;
- f->n_vectors++;
- }
-
- trace0:
if (PREDICT_FALSE
((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
{
nat64_out2in_handoff_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->next_worker_index = next_worker_index;
- t->do_handoff = do_handoff;
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->next_worker_index = ti[0];
}
- }
- if (f)
- vlib_put_frame_to_node (vm, to_node_index, f);
-
- if (d)
- vlib_put_frame_to_node (vm, nm->error_node_index, d);
+ n_left_from -= 1;
+ ti += 1;
+ b += 1;
+ }
- if (hf)
- hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+ n_enq =
+ vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
+ frame->n_vectors, 1);
+
+ if (n_enq < frame->n_vectors)
+ vlib_node_increment_counter (vm, node->node_index,
+ NAT64_OUT2IN_HANDOFF_ERROR_CONGESTION_DROP,
+ frame->n_vectors - n_enq);
+ vlib_node_increment_counter (vm, node->node_index,
+ NAT64_OUT2IN_HANDOFF_ERROR_SAME_WORKER,
+ same_worker);
+ vlib_node_increment_counter (vm, node->node_index,
+ NAT64_OUT2IN_HANDOFF_ERROR_DO_HANDOFF,
+ do_handoff);
- /* Ship frames to the worker nodes */
- for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
- {
- if (handoff_queue_elt_by_worker_index[i])
- {
- hf = handoff_queue_elt_by_worker_index[i];
- /*
- * It works better to let the handoff node
- * rate-adapt, always ship the handoff queue element.
- */
- if (1 || hf->n_vectors == hf->last_n_vectors)
- {
- vlib_put_frame_queue_elt (hf);
- handoff_queue_elt_by_worker_index[i] = 0;
- }
- else
- hf->last_n_vectors = hf->n_vectors;
- }
- congested_handoff_queue_by_worker_index[i] =
- (vlib_frame_queue_t *) (~0);
- }
- hf = 0;
- current_worker_index = ~0;
return frame->n_vectors;
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
- .function = nat64_out2in_handoff_node_fn,
.name = "nat64-out2in-handoff",
.vector_size = sizeof (u32),
.format_trace = format_nat64_out2in_handoff_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(nat64_out2in_handoff_error_strings),
+ .error_strings = nat64_out2in_handoff_error_strings,
.n_next_nodes = 1,
};
/* *INDENT-ON* */
-VLIB_NODE_FUNCTION_MULTIARCH (nat64_out2in_handoff_node,
- nat64_out2in_handoff_node_fn);
/*
* fd.io coding-style-patch-verification: ON
*