From: Florin Coras Date: Fri, 14 Feb 2020 23:41:25 +0000 (+0000) Subject: tcp: pace timer handling X-Git-Tag: v20.09-rc0~569 X-Git-Url: https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commitdiff_plain;h=aa3886993c13d71d93ef01dc73b79985d6ec997f tcp: pace timer handling Type: improvement Signed-off-by: Florin Coras Change-Id: I93067054631d6ae2411a7b08d7b681aed7a121b2 --- diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index e34f773d7d3..f24ddb3a879 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -1288,103 +1288,23 @@ tcp_session_tx_fifo_offset (transport_connection_t * trans_conn) } static void -tcp_update_time (f64 now, u8 thread_index) -{ - tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); - - tcp_set_time_now (wrk); - tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now); - tcp_flush_frames_to_output (wrk); -} - -static void -tcp_session_flush_data (transport_connection_t * tconn) -{ - tcp_connection_t *tc = (tcp_connection_t *) tconn; - if (tc->flags & TCP_CONN_PSH_PENDING) - return; - tc->flags |= TCP_CONN_PSH_PENDING; - tc->psh_seq = tc->snd_una + transport_max_tx_dequeue (tconn) - 1; -} - -/* *INDENT-OFF* */ -const static transport_proto_vft_t tcp_proto = { - .enable = vnet_tcp_enable_disable, - .start_listen = tcp_session_bind, - .stop_listen = tcp_session_unbind, - .push_header = tcp_session_push_header, - .get_connection = tcp_session_get_transport, - .get_listener = tcp_session_get_listener, - .get_half_open = tcp_half_open_session_get_transport, - .connect = tcp_session_open, - .close = tcp_session_close, - .cleanup = tcp_session_cleanup, - .reset = tcp_session_reset, - .send_mss = tcp_session_send_mss, - .send_space = tcp_session_send_space, - .update_time = tcp_update_time, - .tx_fifo_offset = tcp_session_tx_fifo_offset, - .flush_data = tcp_session_flush_data, - .custom_tx = tcp_session_custom_tx, - .format_connection = format_tcp_session, - .format_listener = format_tcp_listener_session, - .format_half_open = format_tcp_half_open_session, - .transport_options = { - .tx_type = TRANSPORT_TX_PEEK, - .service_type = TRANSPORT_SERVICE_VC, - }, -}; -/* *INDENT-ON* */ - -void -tcp_connection_tx_pacer_update (tcp_connection_t * tc) -{ - if (!transport_connection_is_tx_paced (&tc->connection)) - return; - - f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); - - transport_connection_tx_pacer_update (&tc->connection, - tcp_cc_get_pacing_rate (tc), - srtt * CLIB_US_TIME_FREQ); -} - -void -tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window, - u32 start_bucket) -{ - f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); - transport_connection_tx_pacer_reset (&tc->connection, - tcp_cc_get_pacing_rate (tc), - start_bucket, - srtt * CLIB_US_TIME_FREQ); -} - -static void -tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index) +tcp_timer_waitclose_handler (tcp_connection_t * tc) { - tcp_connection_t *tc; - - tc = tcp_connection_get (conn_index, thread_index); - if (!tc) - return; - switch (tc->state) { case TCP_STATE_CLOSE_WAIT: tcp_connection_timers_reset (tc); session_transport_closed_notify (&tc->connection); - + /* App never returned with a close */ if (!(tc->flags & TCP_CONN_FINPNDG)) { - clib_warning ("close-wait with fin sent"); tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); + tcp_worker_stats_inc (tc->c_thread_index, to_closewait, 1); break; } - /* Session didn't come back with a close. Send FIN either way - * and switch to LAST_ACK. */ + /* Send FIN either way and switch to LAST_ACK. */ tcp_cong_recovery_off (tc); /* Make sure we don't try to send unsent data */ tc->snd_nxt = tc->snd_una; @@ -1393,7 +1313,7 @@ tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index) /* Make sure we don't wait in LAST ACK forever */ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time); - tcp_worker_stats_inc (thread_index, to_closewait, 1); + tcp_worker_stats_inc (tc->c_thread_index, to_closewait2, 1); /* Don't delete the connection yet */ break; @@ -1415,21 +1335,21 @@ tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index) tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); } - tcp_worker_stats_inc (thread_index, to_finwait1, 1); + tcp_worker_stats_inc (tc->c_thread_index, to_finwait1, 1); break; case TCP_STATE_LAST_ACK: tcp_connection_timers_reset (tc); tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); session_transport_closed_notify (&tc->connection); - tcp_worker_stats_inc (thread_index, to_lastack, 1); + tcp_worker_stats_inc (tc->c_thread_index, to_lastack, 1); break; case TCP_STATE_CLOSING: tcp_connection_timers_reset (tc); tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); session_transport_closed_notify (&tc->connection); - tcp_worker_stats_inc (thread_index, to_closing, 1); + tcp_worker_stats_inc (tc->c_thread_index, to_closing, 1); break; case TCP_STATE_FIN_WAIT_2: tcp_send_reset (tc); @@ -1437,7 +1357,7 @@ tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index) tcp_connection_set_state (tc, TCP_STATE_CLOSED); session_transport_closed_notify (&tc->connection); tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); - tcp_worker_stats_inc (thread_index, to_finwait2, 1); + tcp_worker_stats_inc (tc->c_thread_index, to_finwait2, 1); break; default: tcp_connection_del (tc); @@ -1456,16 +1376,145 @@ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] = }; /* *INDENT-ON* */ +static void +tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk) +{ + u32 n_timers, connection_index, timer_id, thread_index, timer_handle; + tcp_connection_t *tc; + int i; + + if (!(n_timers = clib_fifo_elts (wrk->pending_timers))) + return; + + thread_index = wrk->vm->thread_index; + for (i = 0; i < clib_min (n_timers, 32); i++) + { + clib_fifo_sub1 (wrk->pending_timers, timer_handle); + connection_index = timer_handle & 0x0FFFFFFF; + timer_id = timer_handle >> 28; + + if (PREDICT_TRUE (timer_id != TCP_TIMER_RETRANSMIT_SYN)) + tc = tcp_connection_get (connection_index, thread_index); + else + tc = tcp_half_open_connection_get (connection_index); + + if (PREDICT_FALSE (!tc)) + continue; + + /* Skip timer if it was rearmed while pending dispatch */ + if (PREDICT_FALSE (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID)) + continue; + + (*timer_expiration_handlers[timer_id]) (tc); + } +} + +/** + * Flush ip lookup tx frames populated by timer pops + */ +static void +tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk) +{ + if (wrk->ip_lookup_tx_frames[0]) + { + vlib_put_frame_to_node (wrk->vm, ip4_lookup_node.index, + wrk->ip_lookup_tx_frames[0]); + wrk->ip_lookup_tx_frames[0] = 0; + } + if (wrk->ip_lookup_tx_frames[1]) + { + vlib_put_frame_to_node (wrk->vm, ip6_lookup_node.index, + wrk->ip_lookup_tx_frames[1]); + wrk->ip_lookup_tx_frames[1] = 0; + } +} + +static void +tcp_update_time (f64 now, u8 thread_index) +{ + tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); + + tcp_set_time_now (wrk); + tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now); + tcp_dispatch_pending_timers (wrk); + tcp_flush_frames_to_output (wrk); +} + +static void +tcp_session_flush_data (transport_connection_t * tconn) +{ + tcp_connection_t *tc = (tcp_connection_t *) tconn; + if (tc->flags & TCP_CONN_PSH_PENDING) + return; + tc->flags |= TCP_CONN_PSH_PENDING; + tc->psh_seq = tc->snd_una + transport_max_tx_dequeue (tconn) - 1; +} + +/* *INDENT-OFF* */ +const static transport_proto_vft_t tcp_proto = { + .enable = vnet_tcp_enable_disable, + .start_listen = tcp_session_bind, + .stop_listen = tcp_session_unbind, + .push_header = tcp_session_push_header, + .get_connection = tcp_session_get_transport, + .get_listener = tcp_session_get_listener, + .get_half_open = tcp_half_open_session_get_transport, + .connect = tcp_session_open, + .close = tcp_session_close, + .cleanup = tcp_session_cleanup, + .reset = tcp_session_reset, + .send_mss = tcp_session_send_mss, + .send_space = tcp_session_send_space, + .update_time = tcp_update_time, + .tx_fifo_offset = tcp_session_tx_fifo_offset, + .flush_data = tcp_session_flush_data, + .custom_tx = tcp_session_custom_tx, + .format_connection = format_tcp_session, + .format_listener = format_tcp_listener_session, + .format_half_open = format_tcp_half_open_session, + .transport_options = { + .tx_type = TRANSPORT_TX_PEEK, + .service_type = TRANSPORT_SERVICE_VC, + }, +}; +/* *INDENT-ON* */ + +void +tcp_connection_tx_pacer_update (tcp_connection_t * tc) +{ + if (!transport_connection_is_tx_paced (&tc->connection)) + return; + + f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); + + transport_connection_tx_pacer_update (&tc->connection, + tcp_cc_get_pacing_rate (tc), + srtt * CLIB_US_TIME_FREQ); +} + +void +tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window, + u32 start_bucket) +{ + f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); + transport_connection_tx_pacer_reset (&tc->connection, + tcp_cc_get_pacing_rate (tc), + start_bucket, + srtt * CLIB_US_TIME_FREQ); +} + static void tcp_expired_timers_dispatch (u32 * expired_timers) { u32 thread_index = vlib_get_thread_index (); u32 connection_index, timer_id, n_expired; + tcp_worker_ctx_t *wrk; tcp_connection_t *tc; int i; + wrk = tcp_get_worker (thread_index); n_expired = vec_len (expired_timers); - tcp_worker_stats_inc (thread_index, timer_expirations, n_expired); + tcp_workerp_stats_inc (wrk, timer_expirations, n_expired); /* * Invalidate all timer handles before dispatching. This avoids dangling @@ -1486,15 +1535,7 @@ tcp_expired_timers_dispatch (u32 * expired_timers) tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID; } - /* - * Dispatch expired timers - */ - for (i = 0; i < n_expired; i++) - { - connection_index = expired_timers[i] & 0x0FFFFFFF; - timer_id = expired_timers[i] >> 28; - (*timer_expiration_handlers[timer_id]) (connection_index, thread_index); - } + clib_fifo_add (wrk->pending_timers, expired_timers, n_expired); } static void @@ -2297,14 +2338,19 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++) { wrk = tcp_get_worker (thread); - vlib_cli_output (vm, "Thread %d:\n", thread); + vlib_cli_output (vm, "Thread %u:\n", thread); + + if (clib_fifo_elts (wrk->pending_timers)) + vlib_cli_output (vm, " %lu pending timers", + clib_fifo_elts (wrk->pending_timers)); #define _(name,type,str) \ if (wrk->stats.name) \ - vlib_cli_output (vm, " %ld %s", wrk->stats.name, str); + vlib_cli_output (vm, " %lu %s", wrk->stats.name, str); foreach_tcp_wrk_stat #undef _ } + return 0; } diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index a60e105f0d2..e7b53b8ec42 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -82,13 +82,6 @@ typedef enum _tcp_timers TCP_N_TIMERS } tcp_timers_e; -typedef void (timer_expiration_handler) (u32 index, u32 thread_index); - -extern timer_expiration_handler tcp_timer_delack_handler; -extern timer_expiration_handler tcp_timer_retransmit_handler; -extern timer_expiration_handler tcp_timer_persist_handler; -extern timer_expiration_handler tcp_timer_retransmit_syn_handler; - #define TCP_TIMER_HANDLE_INVALID ((u32) ~0) #define TCP_TIMER_TICK 0.1 /**< Timer tick in seconds */ @@ -478,6 +471,13 @@ struct _tcp_cc_algorithm #define tcp_csum_offload(tc) (!((tc)->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD)) +typedef void (timer_expiration_handler) (tcp_connection_t * tc); + +extern timer_expiration_handler tcp_timer_delack_handler; +extern timer_expiration_handler tcp_timer_retransmit_handler; +extern timer_expiration_handler tcp_timer_persist_handler; +extern timer_expiration_handler tcp_timer_retransmit_syn_handler; + always_inline void tcp_cong_recovery_off (tcp_connection_t * tc) { @@ -507,6 +507,7 @@ typedef struct _tcp_lookup_dispatch _(rxt_segs, u64, "segments retransmitted") \ _(tr_events, u32, "timer retransmit events") \ _(to_closewait, u32, "timeout close-wait") \ + _(to_closewait2, u32, "timeout close-wait w/data") \ _(to_finwait1, u32, "timeout fin-wait-1") \ _(to_finwait2, u32, "timeout fin-wait-2") \ _(to_lastack, u32, "timeout last-ack") \ @@ -554,6 +555,9 @@ typedef struct tcp_worker_ctx_ /** tx buffer free list */ u32 *tx_buffers; + /* Fifo of pending timer expirations */ + u32 *pending_timers; + /** worker timer wheel */ tw_timer_wheel_16t_2w_512sl_t timer_wheel; @@ -831,7 +835,6 @@ void tcp_send_fin (tcp_connection_t * tc); void tcp_send_ack (tcp_connection_t * tc); void tcp_update_burst_snd_vars (tcp_connection_t * tc); void tcp_update_rto (tcp_connection_t * tc); -void tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk); void tcp_send_window_update_ack (tcp_connection_t * tc); void tcp_program_ack (tcp_connection_t * tc); diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 69b34cc595f..5eccda60ad2 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -972,32 +972,6 @@ tcp_send_synack (tcp_connection_t * tc) TCP_EVT (TCP_EVT_SYNACK_SENT, tc); } -/** - * Flush ip lookup tx frames populated by timer pops - */ -static void -tcp_flush_frame_to_ip_lookup (tcp_worker_ctx_t * wrk, u8 is_ip4) -{ - if (wrk->ip_lookup_tx_frames[!is_ip4]) - { - u32 next_index; - next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; - vlib_put_frame_to_node (wrk->vm, next_index, - wrk->ip_lookup_tx_frames[!is_ip4]); - wrk->ip_lookup_tx_frames[!is_ip4] = 0; - } -} - -/** - * Flush v4 and v6 tcp and ip-lookup tx frames for thread index - */ -void -tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk) -{ - tcp_flush_frame_to_ip_lookup (wrk, 1); - tcp_flush_frame_to_ip_lookup (wrk, 0); -} - /** * Send FIN */ @@ -1208,11 +1182,8 @@ tcp_program_retransmit (tcp_connection_t * tc) * Sends delayed ACK when timer expires */ void -tcp_timer_delack_handler (u32 index, u32 thread_index) +tcp_timer_delack_handler (tcp_connection_t * tc) { - tcp_connection_t *tc; - - tc = tcp_connection_get (index, thread_index); tcp_send_ack (tc); } @@ -1443,19 +1414,17 @@ tcp_cc_init_rxt_timeout (tcp_connection_t * tc) } void -tcp_timer_retransmit_handler (u32 tc_index, u32 thread_index) +tcp_timer_retransmit_handler (tcp_connection_t * tc) { - tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); vlib_main_t *vm = wrk->vm; - tcp_connection_t *tc; vlib_buffer_t *b = 0; u32 bi, n_bytes; tcp_workerp_stats_inc (wrk, tr_events, 1); - tc = tcp_connection_get (tc_index, thread_index); - /* Note: the connection may have been closed and pool_put */ - if (PREDICT_FALSE (tc == 0 || tc->state == TCP_STATE_SYN_SENT)) + /* Should be handled by a different handler */ + if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT)) return; /* Wait-close and retransmit could pop at the same time */ @@ -1592,18 +1561,15 @@ tcp_timer_retransmit_handler (u32 tc_index, u32 thread_index) * SYN retransmit timer handler. Active open only. */ void -tcp_timer_retransmit_syn_handler (u32 tc_index, u32 thread_index) +tcp_timer_retransmit_syn_handler (tcp_connection_t * tc) { - tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); vlib_main_t *vm = wrk->vm; - tcp_connection_t *tc; vlib_buffer_t *b = 0; u32 bi; - tc = tcp_half_open_connection_get (tc_index); - /* Note: the connection may have transitioned to ESTABLISHED... */ - if (PREDICT_FALSE (tc == 0 || tc->state != TCP_STATE_SYN_SENT)) + if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT)) return; /* Half-open connection actually moved to established but we were @@ -1658,21 +1624,16 @@ tcp_timer_retransmit_syn_handler (u32 tc_index, u32 thread_index) * */ void -tcp_timer_persist_handler (u32 index, u32 thread_index) +tcp_timer_persist_handler (tcp_connection_t * tc) { - tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); u32 bi, max_snd_bytes, available_bytes, offset; tcp_main_t *tm = vnet_get_tcp_main (); vlib_main_t *vm = wrk->vm; - tcp_connection_t *tc; vlib_buffer_t *b; int n_bytes = 0; u8 *data; - tc = tcp_connection_get_if_valid (index, thread_index); - if (!tc) - return; - /* Problem already solved or worse */ if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss || (tc->flags & TCP_CONN_FINSNT))