From 1146ff4bcd336d8efc19405f1d83914e6115a01f Mon Sep 17 00:00:00 2001 From: Simon Zhang Date: Mon, 2 Sep 2019 22:54:00 +0800 Subject: [PATCH] tcp: enable gso in tcp hoststack Type: feature Change-Id: If68d07fbe8c6f7fffd2f93c7e854367082927e4f Signed-off-by: Simon Zhang --- src/vnet/ip/ip4.h | 2 ++ src/vnet/session/transport.c | 7 ++++--- src/vnet/tcp/tcp.c | 16 ++++++++++++++++ src/vnet/tcp/tcp.h | 2 ++ src/vnet/tcp/tcp_input.c | 37 +++++++++++++++++++++++++++++++++++++ src/vnet/tcp/tcp_output.c | 28 ++++++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index 0ead3faa1b8..9da5926e13f 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -412,6 +412,8 @@ vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b, vnet_buffer (b)->l3_hdr_offset = (u8 *) ih - b->data; vnet_buffer (b)->l4_hdr_offset = vnet_buffer (b)->l3_hdr_offset + sizeof (*ih); + b->flags |= + VNET_BUFFER_F_L3_HDR_OFFSET_VALID | VNET_BUFFER_F_L4_HDR_OFFSET_VALID; } else ih->checksum = ip4_header_checksum (ih); diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index c1c908eae1a..6caa4d38cd7 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -49,7 +49,7 @@ static double transport_pacer_period; #define TRANSPORT_PACER_MIN_MSS 1460 #define TRANSPORT_PACER_MIN_BURST TRANSPORT_PACER_MIN_MSS -#define TRANSPORT_PACER_MAX_BURST (32 * TRANSPORT_PACER_MIN_MSS) +#define TRANSPORT_PACER_MAX_BURST (43 * TRANSPORT_PACER_MIN_MSS) u8 * format_transport_proto (u8 * s, va_list * args) @@ -656,9 +656,10 @@ transport_connection_snd_space (transport_connection_t * tc, u64 time_now, { time_now >>= SPACER_CPU_TICKS_PER_PERIOD_SHIFT; max_paced_burst = spacer_max_burst (&tc->pacer, time_now); - max_paced_burst = (max_paced_burst < mss) ? 0 : max_paced_burst; + max_paced_burst = + (max_paced_burst < TRANSPORT_PACER_MIN_BURST) ? 0 : max_paced_burst; snd_space = clib_min (snd_space, max_paced_burst); - snd_space = snd_space - snd_space % mss; + return snd_space >= mss ? snd_space - snd_space % mss : snd_space; } return snd_space; } diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 5090b7dd2d7..ad9bbff5c0e 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -1177,6 +1177,17 @@ tcp_half_open_session_get_transport (u32 conn_index) return &tc->connection; } +static u16 +tcp_session_cal_goal_size (tcp_connection_t * tc) +{ + u16 goal_size = tc->snd_mss; + + goal_size = TCP_MAX_GSO_SZ - tc->snd_mss % TCP_MAX_GSO_SZ; + goal_size = clib_min (goal_size, tc->snd_wnd / 2); + + return goal_size; +} + /** * Compute maximum segment size for session layer. * @@ -1194,6 +1205,11 @@ tcp_session_send_mss (transport_connection_t * trans_conn) * the current state of the connection. */ tcp_update_burst_snd_vars (tc); + if (PREDICT_FALSE (tc->is_tso)) + { + return tcp_session_cal_goal_size (tc); + } + return tc->snd_mss; } diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 9e13de4472a..99735f2af70 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -31,6 +31,7 @@ #define TCP_FIB_RECHECK_PERIOD 1 * THZ /**< Recheck every 1s */ #define TCP_MAX_OPTION_SPACE 40 #define TCP_CC_DATA_SZ 24 +#define TCP_MAX_GSO_SZ 65536 #define TCP_DUPACK_THRESHOLD 3 #define TCP_IW_N_SEGMENTS 10 @@ -304,6 +305,7 @@ typedef struct _tcp_connection transport_connection_t connection; /**< Common transport data. First! */ u8 state; /**< TCP state as per tcp_state_t */ + u8 is_tso; /** is connection could use tso */ u16 flags; /**< Connection flags (see tcp_conn_flags_e) */ u32 timers[TCP_N_TIMERS]; /**< Timer handles into timer wheel */ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 1a9eff51a72..9c1f6309b96 100755 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -14,6 +14,8 @@ */ #include +#include +#include #include #include #include @@ -2292,6 +2294,37 @@ tcp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index, return tc; } +always_inline void +tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4) +{ + vnet_main_t *vnm = vnet_get_main (); + const dpo_id_t *dpo; + const load_balance_t *lb; + vnet_hw_interface_t *hw_if; + u32 sw_if_idx, lb_idx; + + if (is_ipv4) + { + ip4_address_t *dst_addr = &(tc->c_rmt_ip.ip4); + lb_idx = ip4_fib_forwarding_lookup (tc->c_fib_index, dst_addr); + } + else + { + ip6_address_t *dst_addr = &(tc->c_rmt_ip.ip6); + lb_idx = ip6_fib_table_fwding_lookup (tc->c_fib_index, dst_addr); + } + + lb = load_balance_get (lb_idx); + dpo = load_balance_get_bucket_i (lb, 0); + + sw_if_idx = dpo->dpoi_index; + hw_if = vnet_get_sup_hw_interface (vnm, sw_if_idx); + + tc->is_tso = + ((hw_if->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0) ? 0 : 1; +} + + always_inline uword tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, int is_ip4) @@ -2508,6 +2541,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto drop; } + tcp_check_tx_offload (new_tc0, is_ip4); + /* Read data, if any */ if (PREDICT_FALSE (vnet_buffer (b0)->tcp.data_len)) { @@ -2694,6 +2729,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc0->state = TCP_STATE_ESTABLISHED; TCP_EVT (TCP_EVT_STATE_CHANGE, tc0); + tcp_check_tx_offload (tc0, is_ip4); + /* Initialize session variables */ tc0->snd_una = vnet_buffer (b0)->tcp.ack_number; tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window) diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index ff281b5661d..15aa85712e3 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -2126,10 +2126,32 @@ tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0, b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data; vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; + b0->flags |= + VNET_BUFFER_F_L3_HDR_OFFSET_VALID | VNET_BUFFER_F_L4_HDR_OFFSET_VALID; th0->checksum = 0; } } +always_inline void +tcp_check_if_gso (tcp_connection_t * tc, vlib_buffer_t * b) +{ + if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))) + return; + u16 data_len = + b->current_length + b->total_length_not_including_first_buffer - + sizeof (tcp_header_t) - tc->snd_opts_len; + + if (data_len > tc->snd_mss) + { + ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0); + ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0); + b->flags |= VNET_BUFFER_F_GSO; + vnet_buffer2 (b)->gso_l4_hdr_sz = + sizeof (tcp_header_t) + tc->snd_opts_len; + vnet_buffer2 (b)->gso_size = tc->snd_mss; + } +} + always_inline void tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0, vlib_node_runtime_t * error_node, u16 * next0, @@ -2213,6 +2235,9 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tcp_output_push_ip (vm, b[0], tc0, is_ip4); tcp_output_push_ip (vm, b[1], tc1, is_ip4); + tcp_check_if_gso (tc0, b[0]); + tcp_check_if_gso (tc1, b[1]); + tcp_output_handle_packet (tc0, b[0], error_node, &next[0], is_ip4); tcp_output_handle_packet (tc1, b[1], error_node, &next[1], is_ip4); } @@ -2221,6 +2246,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (tc0 != 0) { tcp_output_push_ip (vm, b[0], tc0, is_ip4); + tcp_check_if_gso (tc0, b[0]); tcp_output_handle_packet (tc0, b[0], error_node, &next[0], is_ip4); } @@ -2232,6 +2258,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (tc1 != 0) { tcp_output_push_ip (vm, b[1], tc1, is_ip4); + tcp_check_if_gso (tc1, b[1]); tcp_output_handle_packet (tc1, b[1], error_node, &next[1], is_ip4); } @@ -2262,6 +2289,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (tc0 != 0)) { tcp_output_push_ip (vm, b[0], tc0, is_ip4); + tcp_check_if_gso (tc0, b[0]); tcp_output_handle_packet (tc0, b[0], error_node, &next[0], is_ip4); } else -- 2.16.6