X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp.c;h=45eaf016b1eedf0b947ec82e95d2001ed0f5209a;hb=b26743d093141a2aef19bdf8a7fe06dcaa81329a;hp=b16b2a7dfb23da310d64ff55eea4e1d4244ce171;hpb=f988e696149f42828444c69762c036d9684b6bb0;p=vpp.git diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index b16b2a7dfb2..45eaf016b1e 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -28,6 +28,47 @@ tcp_main_t tcp_main; +typedef struct +{ + fib_protocol_t nh_proto; + vnet_link_t link_type; + ip46_address_t ip; + u32 sw_if_index; + u8 is_add; +} tcp_add_del_adj_args_t; + +static void +tcp_add_del_adj_cb (tcp_add_del_adj_args_t * args) +{ + u32 ai; + if (args->is_add) + { + adj_nbr_add_or_lock (args->nh_proto, args->link_type, &args->ip, + args->sw_if_index); + } + else + { + ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &args->ip, + args->sw_if_index); + if (ai != ADJ_INDEX_INVALID) + adj_unlock (ai); + } +} + +static void +tcp_add_del_adjacency (tcp_connection_t * tc, u8 is_add) +{ + tcp_add_del_adj_args_t args = { + .nh_proto = FIB_PROTOCOL_IP6, + .link_type = VNET_LINK_IP6, + .ip = tc->c_rmt_ip, + .sw_if_index = tc->sw_if_index, + .is_add = is_add + }; + vlib_rpc_call_main_thread (tcp_add_del_adj_cb, (u8 *) & args, + sizeof (args)); +} + static u32 tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl) { @@ -62,7 +103,7 @@ tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl) return listener->c_c_index; } -u32 +static u32 tcp_session_bind (u32 session_index, transport_endpoint_t * tep) { return tcp_connection_bind (session_index, tep); @@ -85,14 +126,14 @@ tcp_connection_unbind (u32 listener_index) pool_put_index (tm->listener_pool, listener_index); } -u32 +static u32 tcp_session_unbind (u32 listener_index) { tcp_connection_unbind (listener_index); return 0; } -transport_connection_t * +static transport_connection_t * tcp_session_get_listener (u32 listener_index) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -105,7 +146,7 @@ tcp_session_get_listener (u32 listener_index) * Cleanup half-open connection * */ -void +static void tcp_half_open_connection_del (tcp_connection_t * tc) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -137,7 +178,7 @@ tcp_half_open_connection_cleanup (tcp_connection_t * tc) return 0; } -tcp_connection_t * +static tcp_connection_t * tcp_half_open_connection_new (void) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -178,6 +219,9 @@ tcp_connection_cleanup (tcp_connection_t * tc) /* Make sure all timers are cleared */ tcp_connection_timers_reset (tc); + if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6)) + tcp_add_del_adjacency (tc, 0); + /* Poison the entry */ if (CLIB_DEBUG > 0) memset (tc, 0xFA, sizeof (*tc)); @@ -233,19 +277,19 @@ tcp_connection_reset (tcp_connection_t * tc) tcp_connection_cleanup (tc); break; case TCP_STATE_ESTABLISHED: + tcp_connection_timers_reset (tc); + /* Set the cleanup timer, in case the session layer/app don't + * cleanly close the connection */ + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); stream_session_reset_notify (&tc->connection); - /* fall through */ + break; case TCP_STATE_CLOSE_WAIT: case TCP_STATE_FIN_WAIT_1: case TCP_STATE_FIN_WAIT_2: case TCP_STATE_CLOSING: tc->state = TCP_STATE_CLOSED; TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc); - - /* Make sure all timers are cleared */ tcp_connection_timers_reset (tc); - - /* Wait for cleanup from session layer but not forever */ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); break; case TCP_STATE_CLOSED: @@ -282,17 +326,22 @@ tcp_connection_close (tcp_connection_t * tc) tc->state = TCP_STATE_FIN_WAIT_1; break; case TCP_STATE_ESTABLISHED: - if (!stream_session_tx_fifo_max_dequeue (&tc->connection)) + if (!session_tx_fifo_max_dequeue (&tc->connection)) tcp_send_fin (tc); else tc->flags |= TCP_CONN_FINPNDG; tc->state = TCP_STATE_FIN_WAIT_1; break; case TCP_STATE_CLOSE_WAIT: - tcp_connection_timers_reset (tc); - tcp_send_fin (tc); - tc->state = TCP_STATE_LAST_ACK; - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + if (!session_tx_fifo_max_dequeue (&tc->connection)) + { + tcp_send_fin (tc); + tcp_connection_timers_reset (tc); + tc->state = TCP_STATE_LAST_ACK; + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + } + else + tc->flags |= TCP_CONN_FINPNDG; break; case TCP_STATE_FIN_WAIT_1: tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); @@ -309,7 +358,7 @@ tcp_connection_close (tcp_connection_t * tc) tcp_connection_del (tc); } -void +static void tcp_session_close (u32 conn_index, u32 thread_index) { tcp_connection_t *tc; @@ -317,17 +366,15 @@ tcp_session_close (u32 conn_index, u32 thread_index) tcp_connection_close (tc); } -void +static void tcp_session_cleanup (u32 conn_index, u32 thread_index) { tcp_connection_t *tc; tc = tcp_connection_get (conn_index, thread_index); tcp_connection_timers_reset (tc); - - /* Wait for the session tx events to clear */ tc->state = TCP_STATE_CLOSED; TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc); - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_connection_cleanup (tc); } /** @@ -457,6 +504,31 @@ tcp_connection_fib_attach (tcp_connection_t * tc) } #endif /* 0 */ +static void +tcp_cc_init (tcp_connection_t * tc) +{ + tc->cc_algo = tcp_cc_algo_get (TCP_CC_NEWRENO); + tc->cc_algo->init (tc); +} + +void +tcp_cc_algo_register (tcp_cc_algorithm_type_e type, + const tcp_cc_algorithm_t * vft) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vec_validate (tm->cc_algos, type); + + tm->cc_algos[type] = *vft; +} + +tcp_cc_algorithm_t * +tcp_cc_algo_get (tcp_cc_algorithm_type_e type) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + return &tm->cc_algos[type]; +} + + /** * Initialize connection send variables. */ @@ -494,6 +566,9 @@ tcp_connection_init_vars (tcp_connection_t * tc) if (tc->state == TCP_STATE_SYN_RCVD) tcp_init_snd_vars (tc); + if (!tc->c_is_ip4 && ip6_address_is_link_local_unicast (&tc->c_rmt_ip6)) + tcp_add_del_adjacency (tc, 1); + // tcp_connection_fib_attach (tc); } @@ -527,7 +602,7 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr, return 0; } -int +static int tcp_connection_open (transport_endpoint_t * rmt) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -574,7 +649,7 @@ tcp_connection_open (transport_endpoint_t * rmt) return tc->c_c_index; } -int +static int tcp_session_open (transport_endpoint_t * tep) { return tcp_connection_open (tep); @@ -610,7 +685,7 @@ const char *tcp_connection_flags_str[] = { #undef _ }; -u8 * +static u8 * format_tcp_connection_flags (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -635,7 +710,7 @@ const char *tcp_conn_timers[] = { #undef _ }; -u8 * +static u8 * format_tcp_timers (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -660,7 +735,7 @@ format_tcp_timers (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_congestion_status (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -673,7 +748,13 @@ format_tcp_congestion_status (u8 * s, va_list * args) return s; } -u8 * +static i32 +tcp_rcv_wnd_available (tcp_connection_t * tc) +{ + return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las); +} + +static u8 * format_tcp_vars (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -687,9 +768,9 @@ format_tcp_vars (u8 * s, va_list * args) s = format (s, " snd_wnd %u rcv_wnd %u snd_wl1 %u snd_wl2 %u\n", tc->snd_wnd, tc->rcv_wnd, tc->snd_wl1 - tc->irs, tc->snd_wl2 - tc->iss); - s = format (s, " flight size %u send space %u rcv_wnd_av %d\n", + s = format (s, " flight size %u out space %u cc space %u rcv_wnd_av %u\n", tcp_flight_size (tc), tcp_available_output_snd_space (tc), - tcp_rcv_wnd_available (tc)); + tcp_available_cc_snd_space (tc), tcp_rcv_wnd_available (tc)); s = format (s, " cong %U ", format_tcp_congestion_status, tc); s = format (s, "cwnd %u ssthresh %u rtx_bytes %u bytes_acked %u\n", tc->cwnd, tc->ssthresh, tc->snd_rxt_bytes, tc->bytes_acked); @@ -713,7 +794,7 @@ format_tcp_vars (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_connection_id (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); @@ -756,7 +837,7 @@ format_tcp_connection (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_session (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); @@ -772,7 +853,7 @@ format_tcp_session (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_listener_session (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); @@ -780,7 +861,7 @@ format_tcp_listener_session (u8 * s, va_list * args) return format (s, "%U", format_tcp_connection_id, tc); } -u8 * +static u8 * format_tcp_half_open_session (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); @@ -836,7 +917,7 @@ format_tcp_rcv_sacks (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_tcp_sack_hole (u8 * s, va_list * args) { sack_scoreboard_hole_t *hole = va_arg (*args, sack_scoreboard_hole_t *); @@ -874,14 +955,14 @@ format_tcp_scoreboard (u8 * s, va_list * args) return s; } -transport_connection_t * +static transport_connection_t * tcp_session_get_transport (u32 conn_index, u32 thread_index) { tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index); return &tc->connection; } -transport_connection_t * +static transport_connection_t * tcp_half_open_session_get_transport (u32 conn_index) { tcp_connection_t *tc = tcp_half_open_connection_get (conn_index); @@ -895,7 +976,7 @@ tcp_half_open_session_get_transport (u32 conn_index) * the tcp options to be used in the next burst and subtracts their * length from the connection's snd_mss. */ -u16 +static u16 tcp_session_send_mss (transport_connection_t * trans_conn) { tcp_connection_t *tc = (tcp_connection_t *) trans_conn; @@ -903,7 +984,7 @@ tcp_session_send_mss (transport_connection_t * trans_conn) /* Ensure snd_mss does accurately reflect the amount of data we can push * in a segment. This also makes sure that options are updated according to * the current state of the connection. */ - tcp_update_snd_mss (tc); + tcp_update_burst_snd_vars (tc); return tc->snd_mss; } @@ -935,7 +1016,7 @@ tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space) * @param tc tcp connection * @return number of bytes session is allowed to write */ -u32 +static u32 tcp_snd_space (tcp_connection_t * tc) { int snd_space, snt_limited; @@ -975,7 +1056,7 @@ tcp_snd_space (tcp_connection_t * tc) * bytes of previously unsent data. */ if (tcp_in_fastrecovery (tc) && !tcp_fastrecovery_sent_1_smss (tc)) { - if (tcp_available_output_snd_space (tc) < tc->snd_mss) + if (tcp_available_cc_snd_space (tc) < tc->snd_mss) return 0; tcp_fastrecovery_1_smss_on (tc); return tc->snd_mss; @@ -984,7 +1065,7 @@ tcp_snd_space (tcp_connection_t * tc) return 0; } -u32 +static u32 tcp_session_send_space (transport_connection_t * trans_conn) { tcp_connection_t *tc = (tcp_connection_t *) trans_conn; @@ -992,13 +1073,7 @@ tcp_session_send_space (transport_connection_t * trans_conn) tc->snd_wnd - (tc->snd_nxt - tc->snd_una)); } -i32 -tcp_rcv_wnd_available (tcp_connection_t * tc) -{ - return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las); -} - -u32 +static u32 tcp_session_tx_fifo_offset (transport_connection_t * trans_conn) { tcp_connection_t *tc = (tcp_connection_t *) trans_conn; @@ -1009,11 +1084,29 @@ tcp_session_tx_fifo_offset (transport_connection_t * trans_conn) return (tc->snd_nxt - tc->snd_una); } +static void +tcp_update_time (f64 now, u8 thread_index) +{ + tcp_set_time_now (thread_index); + tw_timer_expire_timers_16t_2w_512sl (&tcp_main. + wrk_ctx[thread_index].timer_wheel, + now); + tcp_flush_frames_to_output (thread_index); +} + +static u32 +tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +{ + tcp_connection_t *tc = (tcp_connection_t *) tconn; + return tcp_push_header (tc, b); +} + /* *INDENT-OFF* */ const static transport_proto_vft_t tcp_proto = { + .enable = vnet_tcp_enable_disable, .bind = tcp_session_bind, .unbind = tcp_session_unbind, - .push_header = tcp_push_header, + .push_header = tcp_session_push_header, .get_connection = tcp_session_get_transport, .get_listener = tcp_session_get_listener, .get_half_open = tcp_half_open_session_get_transport, @@ -1022,14 +1115,17 @@ const static transport_proto_vft_t tcp_proto = { .cleanup = tcp_session_cleanup, .send_mss = tcp_session_send_mss, .send_space = tcp_session_send_space, + .update_time = tcp_update_time, .tx_fifo_offset = tcp_session_tx_fifo_offset, .format_connection = format_tcp_session, .format_listener = format_tcp_listener_session, .format_half_open = format_tcp_half_open_session, + .tx_type = TRANSPORT_TX_PEEK, + .service_type = TRANSPORT_SERVICE_VC, }; /* *INDENT-ON* */ -void +static void tcp_timer_keep_handler (u32 conn_index) { u32 thread_index = vlib_get_thread_index (); @@ -1041,7 +1137,7 @@ tcp_timer_keep_handler (u32 conn_index) tcp_connection_close (tc); } -void +static void tcp_timer_establish_handler (u32 conn_index) { tcp_connection_t *tc; @@ -1069,7 +1165,7 @@ tcp_timer_establish_handler (u32 conn_index) tcp_connection_cleanup (tc); } -void +static void tcp_timer_waitclose_handler (u32 conn_index) { u32 thread_index = vlib_get_thread_index (); @@ -1134,13 +1230,13 @@ tcp_expired_timers_dispatch (u32 * expired_timers) } } -void +static void tcp_initialize_timer_wheels (tcp_main_t * tm) { tw_timer_wheel_16t_2w_512sl_t *tw; /* *INDENT-OFF* */ foreach_vlib_main (({ - tw = &tm->timer_wheels[ii]; + tw = &tm->wrk_ctx[ii].timer_wheel; tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch, 100e-3 /* timer period 100ms */ , ~0); tw->last_run_time = vlib_time_now (this_vlib_main); @@ -1148,7 +1244,7 @@ tcp_initialize_timer_wheels (tcp_main_t * tm) /* *INDENT-ON* */ } -clib_error_t * +static clib_error_t * tcp_main_enable (vlib_main_t * vm) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -1173,10 +1269,6 @@ tcp_main_enable (vlib_main_t * vm) ip4_register_protocol (IP_PROTOCOL_TCP, tcp4_input_node.index); ip6_register_protocol (IP_PROTOCOL_TCP, tcp6_input_node.index); - /* Register as transport with session layer */ - transport_register_protocol (TRANSPORT_PROTO_TCP, 1, &tcp_proto); - transport_register_protocol (TRANSPORT_PROTO_TCP, 0, &tcp_proto); - /* * Initialize data structures */ @@ -1213,13 +1305,6 @@ tcp_main_enable (vlib_main_t * vm) pool_init_fixed (tm->half_open_connections, tm->preallocated_half_open_connections); - /* Initialize per worker thread tx buffers (used for control messages) */ - vec_validate (tm->tx_buffers, num_threads - 1); - - /* Initialize timer wheels */ - vec_validate (tm->timer_wheels, num_threads - 1); - tcp_initialize_timer_wheels (tm); - /* Initialize clocks per tick for TCP timestamp. Used to compute * monotonically increasing timestamps. */ tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock @@ -1230,15 +1315,12 @@ tcp_main_enable (vlib_main_t * vm) clib_spinlock_init (&tm->half_open_lock); } - vec_validate (tm->tx_frames[0], num_threads - 1); - vec_validate (tm->tx_frames[1], num_threads - 1); - vec_validate (tm->ip_lookup_tx_frames[0], num_threads - 1); - vec_validate (tm->ip_lookup_tx_frames[1], num_threads - 1); + vec_validate (tm->wrk_ctx, num_threads - 1); + tcp_initialize_timer_wheels (tm); tm->bytes_per_buffer = vlib_buffer_free_list_buffer_size (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - vec_validate (tm->time_now, num_threads - 1); return error; } @@ -1270,7 +1352,7 @@ tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) tm->punt_unknown6 = is_add; } -clib_error_t * +static clib_error_t * tcp_init (vlib_main_t * vm) { tcp_main_t *tm = vnet_get_tcp_main (); @@ -1287,6 +1369,12 @@ tcp_init (vlib_main_t * vm) pi->format_header = format_tcp_header; pi->unformat_pg_edit = unformat_pg_tcp_header; + /* Register as transport with session layer */ + transport_register_protocol (TRANSPORT_PROTO_TCP, &tcp_proto, + FIB_PROTOCOL_IP4, tcp4_output_node.index); + transport_register_protocol (TRANSPORT_PROTO_TCP, &tcp_proto, + FIB_PROTOCOL_IP6, tcp6_output_node.index); + tcp_api_reference (); return 0; } @@ -1297,7 +1385,6 @@ static clib_error_t * tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) { tcp_main_t *tm = vnet_get_tcp_main (); - u64 tmp; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -1308,18 +1395,6 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "preallocated-half-open-connections %d", &tm->preallocated_half_open_connections)) ; - else if (unformat (input, "local-endpoints-table-memory %U", - unformat_memory_size, &tmp)) - { - if (tmp >= 0x100000000) - return clib_error_return (0, "memory size %llx (%lld) too large", - tmp, tmp); - tm->local_endpoints_table_memory = tmp; - } - else if (unformat (input, "local-endpoints-table-buckets %d", - &tm->local_endpoints_table_buckets)) - ; - else if (unformat (input, "buffer-fail-fraction %f", &tm->buffer_fail_fraction)) ;