X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp.c;h=1c8ce34a728a8ceda3ddc98afa1d80757b83bfaf;hb=5f5d50ee9b342964caeeee10612cd002497fb40c;hp=15ac7d37edc38c76b1edffe9c4a6ed3c638c7a20;hpb=ca1c8f3e782dc68a51aa2792771d9b4aac696ddd;p=vpp.git diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 15ac7d37edc..1c8ce34a728 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -103,7 +103,7 @@ tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl) return listener->c_c_index; } -u32 +static u32 tcp_session_bind (u32 session_index, transport_endpoint_t * tep) { return tcp_connection_bind (session_index, tep); @@ -126,14 +126,14 @@ tcp_connection_unbind (u32 listener_index) pool_put_index (tm->listener_pool, listener_index); } -u32 +static u32 tcp_session_unbind (u32 listener_index) { tcp_connection_unbind (listener_index); return 0; } -transport_connection_t * +static transport_connection_t * tcp_session_get_listener (u32 listener_index) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -146,7 +146,7 @@ tcp_session_get_listener (u32 listener_index) * Cleanup half-open connection * */ -void +static void tcp_half_open_connection_del (tcp_connection_t * tc) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -178,7 +178,7 @@ tcp_half_open_connection_cleanup (tcp_connection_t * tc) return 0; } -tcp_connection_t * +static tcp_connection_t * tcp_half_open_connection_new (void) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -277,19 +277,19 @@ tcp_connection_reset (tcp_connection_t * tc) tcp_connection_cleanup (tc); break; case TCP_STATE_ESTABLISHED: + tcp_connection_timers_reset (tc); + /* Set the cleanup timer, in case the session layer/app don't + * cleanly close the connection */ + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); stream_session_reset_notify (&tc->connection); - /* fall through */ + break; case TCP_STATE_CLOSE_WAIT: case TCP_STATE_FIN_WAIT_1: case TCP_STATE_FIN_WAIT_2: case TCP_STATE_CLOSING: tc->state = TCP_STATE_CLOSED; TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc); - - /* Make sure all timers are cleared */ tcp_connection_timers_reset (tc); - - /* Wait for cleanup from session layer but not forever */ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); break; case TCP_STATE_CLOSED: @@ -326,17 +326,22 @@ tcp_connection_close (tcp_connection_t * tc) tc->state = TCP_STATE_FIN_WAIT_1; break; case TCP_STATE_ESTABLISHED: - if (!stream_session_tx_fifo_max_dequeue (&tc->connection)) + if (!session_tx_fifo_max_dequeue (&tc->connection)) tcp_send_fin (tc); else tc->flags |= TCP_CONN_FINPNDG; tc->state = TCP_STATE_FIN_WAIT_1; break; case TCP_STATE_CLOSE_WAIT: - tcp_send_fin (tc); - tcp_connection_timers_reset (tc); - tc->state = TCP_STATE_LAST_ACK; - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + if (!session_tx_fifo_max_dequeue (&tc->connection)) + { + tcp_send_fin (tc); + tcp_connection_timers_reset (tc); + tc->state = TCP_STATE_LAST_ACK; + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + } + else + tc->flags |= TCP_CONN_FINPNDG; break; case TCP_STATE_FIN_WAIT_1: tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); @@ -353,7 +358,7 @@ tcp_connection_close (tcp_connection_t * tc) tcp_connection_del (tc); } -void +static void tcp_session_close (u32 conn_index, u32 thread_index) { tcp_connection_t *tc; @@ -361,17 +366,15 @@ tcp_session_close (u32 conn_index, u32 thread_index) tcp_connection_close (tc); } -void +static void tcp_session_cleanup (u32 conn_index, u32 thread_index) { tcp_connection_t *tc; tc = tcp_connection_get (conn_index, thread_index); tcp_connection_timers_reset (tc); - - /* Wait for the session tx events to clear */ tc->state = TCP_STATE_CLOSED; TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc); - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_connection_cleanup (tc); } /** @@ -501,6 +504,31 @@ tcp_connection_fib_attach (tcp_connection_t * tc) } #endif /* 0 */ +static void +tcp_cc_init (tcp_connection_t * tc) +{ + tc->cc_algo = tcp_cc_algo_get (TCP_CC_NEWRENO); + tc->cc_algo->init (tc); +} + +void +tcp_cc_algo_register (tcp_cc_algorithm_type_e type, + const tcp_cc_algorithm_t * vft) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vec_validate (tm->cc_algos, type); + + tm->cc_algos[type] = *vft; +} + +tcp_cc_algorithm_t * +tcp_cc_algo_get (tcp_cc_algorithm_type_e type) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + return &tm->cc_algos[type]; +} + + /** * Initialize connection send variables. */ @@ -574,7 +602,7 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr, return 0; } -int +static int tcp_connection_open (transport_endpoint_t * rmt) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -621,7 +649,7 @@ tcp_connection_open (transport_endpoint_t * rmt) return tc->c_c_index; } -int +static int tcp_session_open (transport_endpoint_t * tep) { return tcp_connection_open (tep); @@ -657,7 +685,7 @@ const char *tcp_connection_flags_str[] = { #undef _ }; -u8 * +static u8 * format_tcp_connection_flags (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -682,7 +710,7 @@ const char *tcp_conn_timers[] = { #undef _ }; -u8 * +static u8 * format_tcp_timers (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -707,7 +735,7 @@ format_tcp_timers (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_congestion_status (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -720,7 +748,13 @@ format_tcp_congestion_status (u8 * s, va_list * args) return s; } -u8 * +static i32 +tcp_rcv_wnd_available (tcp_connection_t * tc) +{ + return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las); +} + +static u8 * format_tcp_vars (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -731,8 +765,9 @@ format_tcp_vars (u8 * s, va_list * args) tc->snd_una_max - tc->iss); s = format (s, " rcv_nxt %u rcv_las %u\n", tc->rcv_nxt - tc->irs, tc->rcv_las - tc->irs); - s = format (s, " snd_wnd %u rcv_wnd %u snd_wl1 %u snd_wl2 %u\n", - tc->snd_wnd, tc->rcv_wnd, tc->snd_wl1 - tc->irs, + s = format (s, " snd_wnd %u rcv_wnd %u rcv_wscale %u ", + tc->snd_wnd, tc->rcv_wnd, tc->rcv_wscale); + s = format (s, "snd_wl1 %u snd_wl2 %u\n", tc->snd_wl1 - tc->irs, tc->snd_wl2 - tc->iss); s = format (s, " flight size %u out space %u cc space %u rcv_wnd_av %u\n", tcp_flight_size (tc), tcp_available_output_snd_space (tc), @@ -760,7 +795,7 @@ format_tcp_vars (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_connection_id (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -803,7 +838,7 @@ format_tcp_connection (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_session (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); @@ -819,7 +854,7 @@ format_tcp_session (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_listener_session (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); @@ -827,7 +862,7 @@ format_tcp_listener_session (u8 * s, va_list * args) return format (s, "%U", format_tcp_connection_id, tc); } -u8 * +static u8 * format_tcp_half_open_session (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); @@ -883,7 +918,7 @@ format_tcp_rcv_sacks (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_sack_hole (u8 * s, va_list * args) { sack_scoreboard_hole_t *hole = va_arg (*args, sack_scoreboard_hole_t *); @@ -921,14 +956,14 @@ format_tcp_scoreboard (u8 * s, va_list * args) return s; } -transport_connection_t * +static transport_connection_t * tcp_session_get_transport (u32 conn_index, u32 thread_index) { tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index); return &tc->connection; } -transport_connection_t * +static transport_connection_t * tcp_half_open_session_get_transport (u32 conn_index) { tcp_connection_t *tc = tcp_half_open_connection_get (conn_index); @@ -942,7 +977,7 @@ tcp_half_open_session_get_transport (u32 conn_index) * the tcp options to be used in the next burst and subtracts their * length from the connection's snd_mss. */ -u16 +static u16 tcp_session_send_mss (transport_connection_t * trans_conn) { tcp_connection_t *tc = (tcp_connection_t *) trans_conn; @@ -950,7 +985,7 @@ tcp_session_send_mss (transport_connection_t * trans_conn) /* Ensure snd_mss does accurately reflect the amount of data we can push * in a segment. This also makes sure that options are updated according to * the current state of the connection. */ - tcp_update_snd_mss (tc); + tcp_update_burst_snd_vars (tc); return tc->snd_mss; } @@ -982,8 +1017,8 @@ tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space) * @param tc tcp connection * @return number of bytes session is allowed to write */ -u32 -tcp_snd_space (tcp_connection_t * tc) +static inline u32 +tcp_snd_space_inline (tcp_connection_t * tc) { int snd_space, snt_limited; @@ -1032,20 +1067,20 @@ tcp_snd_space (tcp_connection_t * tc) } u32 -tcp_session_send_space (transport_connection_t * trans_conn) +tcp_snd_space (tcp_connection_t * tc) { - tcp_connection_t *tc = (tcp_connection_t *) trans_conn; - return clib_min (tcp_snd_space (tc), - tc->snd_wnd - (tc->snd_nxt - tc->snd_una)); + return tcp_snd_space_inline (tc); } -i32 -tcp_rcv_wnd_available (tcp_connection_t * tc) +static u32 +tcp_session_send_space (transport_connection_t * trans_conn) { - return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las); + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return clib_min (tcp_snd_space_inline (tc), + tc->snd_wnd - (tc->snd_nxt - tc->snd_una)); } -u32 +static u32 tcp_session_tx_fifo_offset (transport_connection_t * trans_conn) { tcp_connection_t *tc = (tcp_connection_t *) trans_conn; @@ -1056,21 +1091,29 @@ tcp_session_tx_fifo_offset (transport_connection_t * trans_conn) return (tc->snd_nxt - tc->snd_una); } -void +static void tcp_update_time (f64 now, u8 thread_index) { tcp_set_time_now (thread_index); - tw_timer_expire_timers_16t_2w_512sl (&tcp_main.timer_wheels[thread_index], + tw_timer_expire_timers_16t_2w_512sl (&tcp_main. + wrk_ctx[thread_index].timer_wheel, now); tcp_flush_frames_to_output (thread_index); } +static u32 +tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +{ + tcp_connection_t *tc = (tcp_connection_t *) tconn; + return tcp_push_header (tc, b); +} + /* *INDENT-OFF* */ const static transport_proto_vft_t tcp_proto = { .enable = vnet_tcp_enable_disable, .bind = tcp_session_bind, .unbind = tcp_session_unbind, - .push_header = tcp_push_header, + .push_header = tcp_session_push_header, .get_connection = tcp_session_get_transport, .get_listener = tcp_session_get_listener, .get_half_open = tcp_half_open_session_get_transport, @@ -1089,7 +1132,7 @@ const static transport_proto_vft_t tcp_proto = { }; /* *INDENT-ON* */ -void +static void tcp_timer_keep_handler (u32 conn_index) { u32 thread_index = vlib_get_thread_index (); @@ -1101,7 +1144,7 @@ tcp_timer_keep_handler (u32 conn_index) tcp_connection_close (tc); } -void +static void tcp_timer_establish_handler (u32 conn_index) { tcp_connection_t *tc; @@ -1129,7 +1172,7 @@ tcp_timer_establish_handler (u32 conn_index) tcp_connection_cleanup (tc); } -void +static void tcp_timer_waitclose_handler (u32 conn_index) { u32 thread_index = vlib_get_thread_index (); @@ -1149,6 +1192,10 @@ tcp_timer_waitclose_handler (u32 conn_index) clib_warning ("FIN was sent and still in CLOSE WAIT. Weird!"); } + /* Make sure we don't try to send unsent data */ + tcp_connection_timers_reset (tc); + tcp_cong_recovery_off (tc); + tc->snd_una_max = tc->snd_nxt = tc->snd_una; tcp_send_fin (tc); tc->state = TCP_STATE_LAST_ACK; @@ -1194,13 +1241,13 @@ tcp_expired_timers_dispatch (u32 * expired_timers) } } -void +static void tcp_initialize_timer_wheels (tcp_main_t * tm) { tw_timer_wheel_16t_2w_512sl_t *tw; /* *INDENT-OFF* */ foreach_vlib_main (({ - tw = &tm->timer_wheels[ii]; + tw = &tm->wrk_ctx[ii].timer_wheel; tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch, 100e-3 /* timer period 100ms */ , ~0); tw->last_run_time = vlib_time_now (this_vlib_main); @@ -1208,7 +1255,7 @@ tcp_initialize_timer_wheels (tcp_main_t * tm) /* *INDENT-ON* */ } -clib_error_t * +static clib_error_t * tcp_main_enable (vlib_main_t * vm) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -1269,13 +1316,6 @@ tcp_main_enable (vlib_main_t * vm) pool_init_fixed (tm->half_open_connections, tm->preallocated_half_open_connections); - /* Initialize per worker thread tx buffers (used for control messages) */ - vec_validate (tm->tx_buffers, num_threads - 1); - - /* Initialize timer wheels */ - vec_validate (tm->timer_wheels, num_threads - 1); - tcp_initialize_timer_wheels (tm); - /* Initialize clocks per tick for TCP timestamp. Used to compute * monotonically increasing timestamps. */ tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock @@ -1286,15 +1326,12 @@ tcp_main_enable (vlib_main_t * vm) clib_spinlock_init (&tm->half_open_lock); } - vec_validate (tm->tx_frames[0], num_threads - 1); - vec_validate (tm->tx_frames[1], num_threads - 1); - vec_validate (tm->ip_lookup_tx_frames[0], num_threads - 1); - vec_validate (tm->ip_lookup_tx_frames[1], num_threads - 1); + vec_validate (tm->wrk_ctx, num_threads - 1); + tcp_initialize_timer_wheels (tm); tm->bytes_per_buffer = vlib_buffer_free_list_buffer_size (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vec_validate (tm->time_now, num_threads - 1); return error; } @@ -1326,7 +1363,7 @@ tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) tm->punt_unknown6 = is_add; } -clib_error_t * +static clib_error_t * tcp_init (vlib_main_t * vm) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -1362,9 +1399,8 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat - (input, "preallocated-connections %d", - &tm->preallocated_connections)) + if (unformat (input, "preallocated-connections %d", + &tm->preallocated_connections)) ; else if (unformat (input, "preallocated-half-open-connections %d", &tm->preallocated_half_open_connections)) @@ -1372,6 +1408,9 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "buffer-fail-fraction %f", &tm->buffer_fail_fraction)) ; + else if (unformat (input, "max-rx-fifo %U", unformat_memory_size, + &tm->max_rx_fifo)) + ; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input);