X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp.c;h=e34f773d7d33ee812e67c4c10c4b1c0d64646124;hb=3f78bbb8e875f3fccbcd167289ba2d307cb86263;hp=1d030c9ce71bf1761591943b3c7fe5e11f65cb37;hpb=db39656d7a85ac6e9e6fbed9ea59b3089acc8cae;p=vpp.git diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 1d030c9ce71..e34f773d7d3 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -23,9 +23,8 @@ #include #include #include -#include +#include #include -#include tcp_main_t tcp_main; @@ -238,13 +237,12 @@ tcp_half_open_connection_new (void) void tcp_connection_cleanup (tcp_connection_t * tc) { - tcp_main_t *tm = &tcp_main; - TCP_EVT (TCP_EVT_DELETE, tc); /* Cleanup local endpoint if this was an active connect */ - transport_endpoint_cleanup (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip, - tc->c_lcl_port); + if (!(tc->cfg_flags & TCP_CFG_F_NO_ENDPOINT)) + transport_endpoint_cleanup (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip, + tc->c_lcl_port); /* Check if connection is not yet fully established */ if (tc->state == TCP_STATE_SYN_SENT) @@ -257,8 +255,6 @@ tcp_connection_cleanup (tcp_connection_t * tc) } else { - int thread_index = tc->c_thread_index; - /* Make sure all timers are cleared */ tcp_connection_timers_reset (tc); @@ -274,10 +270,7 @@ tcp_connection_cleanup (tcp_connection_t * tc) if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) tcp_bt_cleanup (tc); - /* Poison the entry */ - if (CLIB_DEBUG > 0) - clib_memset (tc, 0xFA, sizeof (*tc)); - pool_put (tm->connections[thread_index], tc); + tcp_connection_free (tc); } } @@ -298,12 +291,12 @@ tcp_connection_del (tcp_connection_t * tc) tcp_connection_t * tcp_connection_alloc (u8 thread_index) { - tcp_main_t *tm = vnet_get_tcp_main (); + tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); tcp_connection_t *tc; - pool_get (tm->connections[thread_index], tc); + pool_get (wrk->connections, tc); clib_memset (tc, 0, sizeof (*tc)); - tc->c_c_index = tc - tm->connections[thread_index]; + tc->c_c_index = tc - wrk->connections; tc->c_thread_index = thread_index; return tc; } @@ -311,12 +304,12 @@ tcp_connection_alloc (u8 thread_index) tcp_connection_t * tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t * base) { - tcp_main_t *tm = vnet_get_tcp_main (); + tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); tcp_connection_t *tc; - pool_get (tm->connections[thread_index], tc); + pool_get (wrk->connections, tc); clib_memcpy_fast (tc, base, sizeof (*tc)); - tc->c_c_index = tc - tm->connections[thread_index]; + tc->c_c_index = tc - wrk->connections; tc->c_thread_index = thread_index; return tc; } @@ -324,63 +317,14 @@ tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t * base) void tcp_connection_free (tcp_connection_t * tc) { - tcp_main_t *tm = &tcp_main; + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); if (CLIB_DEBUG) { - u8 thread_index = tc->c_thread_index; clib_memset (tc, 0xFA, sizeof (*tc)); - pool_put (tm->connections[thread_index], tc); + pool_put (wrk->connections, tc); return; } - pool_put (tm->connections[tc->c_thread_index], tc); -} - -/** Notify session that connection has been reset. - * - * Switch state to closed and wait for session to call cleanup. - */ -void -tcp_connection_reset (tcp_connection_t * tc) -{ - TCP_EVT (TCP_EVT_RST_RCVD, tc); - switch (tc->state) - { - case TCP_STATE_SYN_RCVD: - /* Cleanup everything. App wasn't notified yet */ - session_transport_delete_notify (&tc->connection); - tcp_connection_cleanup (tc); - break; - case TCP_STATE_SYN_SENT: - session_stream_connect_notify (&tc->connection, 1 /* fail */ ); - tcp_connection_cleanup (tc); - break; - case TCP_STATE_ESTABLISHED: - tcp_connection_timers_reset (tc); - /* Set the cleanup timer, in case the session layer/app don't - * cleanly close the connection */ - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); - session_transport_reset_notify (&tc->connection); - tcp_connection_set_state (tc, TCP_STATE_CLOSED); - session_transport_closed_notify (&tc->connection); - break; - case TCP_STATE_CLOSE_WAIT: - case TCP_STATE_FIN_WAIT_1: - case TCP_STATE_FIN_WAIT_2: - case TCP_STATE_CLOSING: - case TCP_STATE_LAST_ACK: - tcp_connection_timers_reset (tc); - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); - /* Make sure we mark the session as closed. In some states we may - * be still trying to send data */ - tcp_connection_set_state (tc, TCP_STATE_CLOSED); - session_transport_closed_notify (&tc->connection); - break; - case TCP_STATE_CLOSED: - case TCP_STATE_TIME_WAIT: - break; - default: - TCP_DBG ("reset state: %u", tc->state); - } + pool_put (wrk->connections, tc); } /** @@ -424,6 +368,7 @@ tcp_connection_close (tcp_connection_t * tc) tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); session_transport_closed_notify (&tc->connection); + tcp_worker_stats_inc (tc->c_thread_index, rst_unread, 1); break; } if (!transport_max_tx_dequeue (&tc->connection)) @@ -489,6 +434,7 @@ tcp_session_reset (u32 conn_index, u32 thread_index) session_transport_closed_notify (&tc->connection); tcp_send_reset (tc); tcp_connection_timers_reset (tc); + tcp_cong_recovery_off (tc); tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); } @@ -702,6 +648,9 @@ tcp_init_snd_vars (tcp_connection_t * tc) tc->snd_nxt = tc->iss + 1; tc->snd_una_max = tc->snd_nxt; tc->srtt = 100; /* 100 ms */ + + if (!tcp_cfg.csum_offload) + tc->cfg_flags |= TCP_CFG_F_NO_CSUM_OFFLOAD; } void @@ -1015,6 +964,8 @@ format_tcp_vars (u8 * s, va_list * args) tc->rto, tc->rto_boff, tc->srtt, tc->mrtt_us * 1000, tc->rttvar, tc->rtt_ts); s = format (s, " rtt_seq %u\n", tc->rtt_seq - tc->iss); + s = format (s, " next_node %u opaque 0x%x\n", tc->next_node_index, + tc->next_node_opaque); s = format (s, " cong: %U", format_tcp_congestion, tc); if (tc->state >= TCP_STATE_ESTABLISHED) @@ -1029,7 +980,7 @@ format_tcp_vars (u8 * s, va_list * args) return s; } -static u8 * +u8 * format_tcp_connection_id (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -1391,33 +1342,33 @@ tcp_connection_tx_pacer_update (tcp_connection_t * tc) if (!transport_connection_is_tx_paced (&tc->connection)) return; + f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); + transport_connection_tx_pacer_update (&tc->connection, - tcp_cc_get_pacing_rate (tc)); + tcp_cc_get_pacing_rate (tc), + srtt * CLIB_US_TIME_FREQ); } void tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window, u32 start_bucket) { - tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us); - u64 last_time = wrk->vm->clib_time.last_cpu_time; - transport_connection_tx_pacer_reset (&tc->connection, window / srtt, - start_bucket, last_time); + transport_connection_tx_pacer_reset (&tc->connection, + tcp_cc_get_pacing_rate (tc), + start_bucket, + srtt * CLIB_US_TIME_FREQ); } static void -tcp_timer_waitclose_handler (u32 conn_index) +tcp_timer_waitclose_handler (u32 conn_index, u32 thread_index) { - u32 thread_index = vlib_get_thread_index (); tcp_connection_t *tc; tc = tcp_connection_get (conn_index, thread_index); if (!tc) return; - tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID; - switch (tc->state) { case TCP_STATE_CLOSE_WAIT: @@ -1426,6 +1377,7 @@ tcp_timer_waitclose_handler (u32 conn_index) if (!(tc->flags & TCP_CONN_FINPNDG)) { + clib_warning ("close-wait with fin sent"); tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); break; @@ -1441,6 +1393,7 @@ tcp_timer_waitclose_handler (u32 conn_index) /* Make sure we don't wait in LAST ACK forever */ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time); + tcp_worker_stats_inc (thread_index, to_closewait, 1); /* Don't delete the connection yet */ break; @@ -1462,13 +1415,29 @@ tcp_timer_waitclose_handler (u32 conn_index) tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); } + tcp_worker_stats_inc (thread_index, to_finwait1, 1); break; case TCP_STATE_LAST_ACK: + tcp_connection_timers_reset (tc); + tcp_connection_set_state (tc, TCP_STATE_CLOSED); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); + session_transport_closed_notify (&tc->connection); + tcp_worker_stats_inc (thread_index, to_lastack, 1); + break; case TCP_STATE_CLOSING: tcp_connection_timers_reset (tc); tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); session_transport_closed_notify (&tc->connection); + tcp_worker_stats_inc (thread_index, to_closing, 1); + break; + case TCP_STATE_FIN_WAIT_2: + tcp_send_reset (tc); + tcp_connection_timers_reset (tc); + tcp_connection_set_state (tc, TCP_STATE_CLOSED); + session_transport_closed_notify (&tc->connection); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); + tcp_worker_stats_inc (thread_index, to_finwait2, 1); break; default: tcp_connection_del (tc); @@ -1490,19 +1459,41 @@ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] = static void tcp_expired_timers_dispatch (u32 * expired_timers) { + u32 thread_index = vlib_get_thread_index (); + u32 connection_index, timer_id, n_expired; + tcp_connection_t *tc; int i; - u32 connection_index, timer_id; - for (i = 0; i < vec_len (expired_timers); i++) + n_expired = vec_len (expired_timers); + tcp_worker_stats_inc (thread_index, timer_expirations, n_expired); + + /* + * Invalidate all timer handles before dispatching. This avoids dangling + * index references to timer wheel pool entries that have been freed. + */ + for (i = 0; i < n_expired; i++) { - /* Get session index and timer id */ connection_index = expired_timers[i] & 0x0FFFFFFF; timer_id = expired_timers[i] >> 28; + if (timer_id != TCP_TIMER_RETRANSMIT_SYN) + tc = tcp_connection_get (connection_index, thread_index); + else + tc = tcp_half_open_connection_get (connection_index); + TCP_EVT (TCP_EVT_TIMER_POP, connection_index, timer_id); - /* Handle expiration */ - (*timer_expiration_handlers[timer_id]) (connection_index); + tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID; + } + + /* + * Dispatch expired timers + */ + for (i = 0; i < n_expired; i++) + { + connection_index = expired_timers[i] & 0x0FFFFFFF; + timer_id = expired_timers[i] >> 28; + (*timer_expiration_handlers[timer_id]) (connection_index, thread_index); } } @@ -1537,6 +1528,7 @@ tcp_main_enable (vlib_main_t * vm) u32 num_threads, n_workers, prealloc_conn_per_wrk; tcp_connection_t *tc __attribute__ ((unused)); tcp_main_t *tm = vnet_get_tcp_main (); + tcp_worker_ctx_t *wrk; clib_error_t *error = 0; int thread; @@ -1559,25 +1551,28 @@ tcp_main_enable (vlib_main_t * vm) */ num_threads = 1 /* main thread */ + vtm->n_threads; - vec_validate (tm->connections, num_threads - 1); vec_validate (tm->wrk_ctx, num_threads - 1); n_workers = num_threads == 1 ? 1 : vtm->n_threads; prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers; for (thread = 0; thread < num_threads; thread++) { - vec_validate (tm->wrk_ctx[thread].pending_deq_acked, 255); - vec_validate (tm->wrk_ctx[thread].pending_disconnects, 255); - vec_reset_length (tm->wrk_ctx[thread].pending_deq_acked); - vec_reset_length (tm->wrk_ctx[thread].pending_disconnects); - tm->wrk_ctx[thread].vm = vlib_mains[thread]; + wrk = &tm->wrk_ctx[thread]; + + vec_validate (wrk->pending_deq_acked, 255); + vec_validate (wrk->pending_disconnects, 255); + vec_validate (wrk->pending_resets, 255); + vec_reset_length (wrk->pending_deq_acked); + vec_reset_length (wrk->pending_disconnects); + vec_reset_length (wrk->pending_resets); + wrk->vm = vlib_mains[thread]; /* * Preallocate connections. Assume that thread 0 won't * use preallocated threads when running multi-core */ if ((thread > 0 || num_threads == 1) && prealloc_conn_per_wrk) - pool_init_fixed (tm->connections[thread], prealloc_conn_per_wrk); + pool_init_fixed (wrk->connections, prealloc_conn_per_wrk); } /* @@ -1649,6 +1644,7 @@ tcp_configuration_init (void) tcp_cfg.initial_cwnd_multiplier = 0; tcp_cfg.enable_tx_pacing = 1; tcp_cfg.allow_tso = 0; + tcp_cfg.csum_offload = 1; tcp_cfg.cc_algo = TCP_CC_NEWRENO; tcp_cfg.rwnd_min_update_ack = 1; @@ -1790,6 +1786,8 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) tcp_cfg.enable_tx_pacing = 0; else if (unformat (input, "tso")) tcp_cfg.allow_tso = 1; + else if (unformat (input, "no-csum-offload")) + tcp_cfg.csum_offload = 0; else if (unformat (input, "cc-algo %U", unformat_tcp_cc_algo, &tcp_cfg.cc_algo)) ; @@ -1833,7 +1831,6 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm, ip4_address_t * start, ip4_address_t * end, u32 table_id) { - vnet_main_t *vnm = vnet_get_main (); u32 start_host_byte_order, end_host_byte_order; fib_prefix_t prefix; fib_node_index_t fei; @@ -1869,12 +1866,12 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm, sw_if_index = fib_entry_get_resolving_interface (fei); /* Configure proxy arp across the range */ - rv = vnet_proxy_arp_add_del (start, end, fib_index, 0 /* is_del */ ); + rv = ip4_neighbor_proxy_add (fib_index, start, end); if (rv) return rv; - rv = vnet_proxy_arp_enable_disable (vnm, sw_if_index, 1); + rv = ip4_neighbor_proxy_enable (sw_if_index); if (rv) return rv; @@ -1959,7 +1956,7 @@ tcp_configure_v6_source_address_range (vlib_main_t * vm, return VNET_API_ERROR_NO_MATCHING_INTERFACE; /* Add a proxy neighbor discovery entry for this address */ - ip6_neighbor_proxy_add_del (sw_if_index, start, 0 /* is_del */ ); + ip6_neighbor_proxy_add (sw_if_index, start); /* Add a receive adjacency for this address */ receive_dpo_add_or_lock (DPO_PROTO_IP6, ~0 /* sw_if_index */ , @@ -1989,8 +1986,8 @@ tcp_configure_v6_source_address_range (vlib_main_t * vm, } static clib_error_t * -tcp_src_address (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd_arg) +tcp_src_address_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd_arg) { ip4_address_t v4start, v4end; ip6_address_t v6start, v6end; @@ -2072,7 +2069,7 @@ VLIB_CLI_COMMAND (tcp_src_address_command, static) = { .path = "tcp src-address", .short_help = "tcp src-address [- ] add src address range", - .function = tcp_src_address, + .function = tcp_src_address_fn, }; /* *INDENT-ON* */ @@ -2286,6 +2283,70 @@ VLIB_CLI_COMMAND (show_tcp_punt_command, static) = }; /* *INDENT-ON* */ +static clib_error_t * +show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tcp_worker_ctx_t *wrk; + u32 thread; + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + return clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++) + { + wrk = tcp_get_worker (thread); + vlib_cli_output (vm, "Thread %d:\n", thread); + +#define _(name,type,str) \ + if (wrk->stats.name) \ + vlib_cli_output (vm, " %ld %s", wrk->stats.name, str); + foreach_tcp_wrk_stat +#undef _ + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_tcp_stats_command, static) = +{ + .path = "show tcp stats", + .short_help = "show tcp stats", + .function = show_tcp_stats_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tcp_worker_ctx_t *wrk; + u32 thread; + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + return clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + + for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++) + { + wrk = tcp_get_worker (thread); + clib_memset (&wrk->stats, 0, sizeof (wrk->stats)); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (clear_tcp_stats_command, static) = +{ + .path = "clear tcp stats", + .short_help = "clear tcp stats", + .function = clear_tcp_stats_fn, +}; +/* *INDENT-ON* */ + /* * fd.io coding-style-patch-verification: ON *