X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp.c;h=09913fa1242c7d1f28fecf3328547558fffcd44e;hb=0499d8839dccaeb176eb868dfe7bc4bd412506c9;hp=90b832cd73dc6df64e7c82e71e02eace6b6b5294;hpb=ea72764204dac0a1ab6412bc27b758faead2ca10;p=vpp.git diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 90b832cd73d..09913fa1242 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -71,6 +71,10 @@ tcp_add_del_adjacency (tcp_connection_t * tc, u8 is_add) static void tcp_cc_init (tcp_connection_t * tc) { + /* As per RFC 6582 initialize "recover" to iss */ + if (tcp_opts_sack_permitted (&tc->rcv_opts)) + tc->snd_congestion = tc->iss; + tc->cc_algo->init (tc); } @@ -108,7 +112,7 @@ tcp_cc_algo_new_type (const tcp_cc_algorithm_t * vft) } static u32 -tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl) +tcp_connection_bind (u32 session_index, transport_endpoint_cfg_t *lcl) { tcp_main_t *tm = &tcp_main; tcp_connection_t *listener; @@ -143,7 +147,7 @@ tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl) } static u32 -tcp_session_bind (u32 session_index, transport_endpoint_t * tep) +tcp_session_bind (u32 session_index, transport_endpoint_cfg_t *tep) { return tcp_connection_bind (session_index, tep); } @@ -181,6 +185,13 @@ tcp_session_get_listener (u32 listener_index) return &tc->connection; } +static tcp_connection_t * +tcp_half_open_connection_alloc (void) +{ + ASSERT (vlib_get_thread_index () == 0); + return tcp_connection_alloc (0); +} + /** * Cleanup half-open connection * @@ -188,12 +199,8 @@ tcp_session_get_listener (u32 listener_index) static void tcp_half_open_connection_free (tcp_connection_t * tc) { - tcp_main_t *tm = vnet_get_tcp_main (); - clib_spinlock_lock_if_init (&tm->half_open_lock); - if (CLIB_DEBUG) - clib_memset (tc, 0xFA, sizeof (*tc)); - pool_put (tm->half_open_connections, tc); - clib_spinlock_unlock_if_init (&tm->half_open_lock); + ASSERT (vlib_get_thread_index () == 0); + return tcp_connection_free (tc); } /** @@ -221,18 +228,6 @@ tcp_half_open_connection_cleanup (tcp_connection_t * tc) return 0; } -static tcp_connection_t * -tcp_half_open_connection_new (void) -{ - tcp_main_t *tm = vnet_get_tcp_main (); - tcp_connection_t *tc = 0; - ASSERT (vlib_get_thread_index () == 0); - pool_get (tm->half_open_connections, tc); - clib_memset (tc, 0, sizeof (*tc)); - tc->c_c_index = tc - tm->half_open_connections; - return tc; -} - /** * Cleans up connection state. * @@ -298,7 +293,7 @@ tcp_connection_alloc (u8 thread_index) tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); tcp_connection_t *tc; - pool_get (wrk->connections, tc); + pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES); clib_memset (tc, 0, sizeof (*tc)); tc->c_c_index = tc - wrk->connections; tc->c_thread_index = thread_index; @@ -306,13 +301,23 @@ tcp_connection_alloc (u8 thread_index) } tcp_connection_t * -tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t * base) +tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t **base) { tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index); tcp_connection_t *tc; - pool_get (wrk->connections, tc); - clib_memcpy_fast (tc, base, sizeof (*tc)); + /* Make sure connection is still valid if pool moves */ + if ((*base)->c_thread_index == thread_index) + { + u32 base_index = (*base)->c_c_index; + pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES); + *base = tcp_connection_get (base_index, thread_index); + } + else + { + pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES); + } + clib_memcpy_fast (tc, *base, sizeof (*tc)); tc->c_c_index = tc - wrk->connections; tc->c_thread_index = thread_index; return tc; @@ -354,7 +359,6 @@ tcp_program_cleanup (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers * and cleanup is called. * - * N.B. Half-close connections are not supported */ void tcp_connection_close (tcp_connection_t * tc) @@ -425,6 +429,30 @@ tcp_connection_close (tcp_connection_t * tc) } } +static void +tcp_session_half_close (u32 conn_index, u32 thread_index) +{ + tcp_worker_ctx_t *wrk; + tcp_connection_t *tc; + + tc = tcp_connection_get (conn_index, thread_index); + wrk = tcp_get_worker (tc->c_thread_index); + + /* If the connection is not in ESTABLISHED state, ignore it */ + if (tc->state != TCP_STATE_ESTABLISHED) + return; + if (!transport_max_tx_dequeue (&tc->connection)) + tcp_send_fin (tc); + else + tc->flags |= TCP_CONN_FINPNDG; + tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_1); + /* Set a timer in case the peer stops responding. Otherwise the + * connection will be stuck here forever. */ + ASSERT (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID); + tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE, + tcp_cfg.finwait1_time); +} + static void tcp_session_close (u32 conn_index, u32 thread_index) { @@ -678,7 +706,13 @@ tcp_init_snd_vars (tcp_connection_t * tc) tcp_update_time_now (tcp_get_worker (vlib_get_thread_index ())); tcp_init_rcv_mss (tc); - tc->iss = tcp_generate_random_iss (tc); + /* + * In special case of early-kill of timewait socket, the iss will already + * be initialized to ensure it is greater than the last incarnation of the + * connection. see syn_during_timewait() for more details. + */ + if (!tc->iss) + tc->iss = tcp_generate_random_iss (tc); tc->snd_una = tc->iss; tc->snd_nxt = tc->iss + 1; tc->srtt = 0.1 * THZ; /* 100 ms */ @@ -739,6 +773,7 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr, index = tm->last_v4_addr_rotor++; if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs)) tm->last_v4_addr_rotor = 0; + clib_memset (lcl_addr, 0, sizeof (*lcl_addr)); lcl_addr->ip4.as_u32 = tcp_cfg.ip4_src_addrs[index].as_u32; } else @@ -782,21 +817,20 @@ tcp_session_open (transport_endpoint_cfg_t * rmt) return rv; if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip, - lcl_port, rmt->port, TRANSPORT_PROTO_UDP, + lcl_port, rmt->port, TRANSPORT_PROTO_TCP, rmt->is_ip4)) return SESSION_E_PORTINUSE; /* 5-tuple is available so increase lcl endpoint refcount and proceed * with connection allocation */ - transport_share_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr, + transport_share_local_endpoint (TRANSPORT_PROTO_TCP, &lcl_addr, lcl_port); } /* * Create connection and send SYN */ - clib_spinlock_lock_if_init (&tm->half_open_lock); - tc = tcp_half_open_connection_new (); + tc = tcp_half_open_connection_alloc (); ip_copy (&tc->c_rmt_ip, &rmt->ip, rmt->is_ip4); ip_copy (&tc->c_lcl_ip, &lcl_addr, rmt->is_ip4); tc->c_rmt_port = rmt->port; @@ -808,12 +842,15 @@ tcp_session_open (transport_endpoint_cfg_t * rmt) /* The other connection vars will be initialized after SYN ACK */ tcp_connection_timers_init (tc); tc->mss = rmt->mss; + if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX) + tc->sw_if_index = rmt->peer.sw_if_index; + tc->next_node_index = rmt->next_node_index; + tc->next_node_opaque = rmt->next_node_opaque; TCP_EVT (TCP_EVT_OPEN, tc); tc->state = TCP_STATE_SYN_SENT; tcp_init_snd_vars (tc); tcp_send_syn (tc); - clib_spinlock_unlock_if_init (&tm->half_open_lock); return tc->c_c_index; } @@ -1316,6 +1353,7 @@ const static transport_proto_vft_t tcp_proto = { .get_half_open = tcp_half_open_session_get_transport, .attribute = tcp_session_attribute, .connect = tcp_session_open, + .half_close = tcp_session_half_close, .close = tcp_session_close, .cleanup = tcp_session_cleanup, .cleanup_ho = tcp_session_cleanup_ho, @@ -1404,7 +1442,8 @@ tcp_expired_timers_dispatch (u32 * expired_timers) clib_fifo_add (wrk->pending_timers, expired_timers, n_expired); - max_loops = clib_max (1, 0.5 * TCP_TIMER_TICK * wrk->vm->loops_per_second); + max_loops = + clib_max ((u32) 0.5 * TCP_TIMER_TICK * wrk->vm->loops_per_second, 1); max_per_loop = clib_max ((n_left + n_expired) / max_loops, 10); max_per_loop = clib_min (max_per_loop, VLIB_FRAME_SIZE); wrk->max_timers_per_loop = clib_max (n_left ? wrk->max_timers_per_loop : 0, @@ -1495,18 +1534,6 @@ tcp_main_enable (vlib_main_t * vm) vlib_time_now (vm)); } - /* - * Use a preallocated half-open connection pool? - */ - if (tcp_cfg.preallocated_half_open_connections) - pool_init_fixed (tm->half_open_connections, - tcp_cfg.preallocated_half_open_connections); - - if (num_threads > 1) - { - clib_spinlock_init (&tm->half_open_lock); - } - tcp_initialize_iss_seed (tm); tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm); @@ -1575,6 +1602,7 @@ tcp_configuration_init (void) tcp_cfg.lastack_time = 300000; /* 30s */ tcp_cfg.finwait2_time = 300000; /* 30s */ tcp_cfg.closing_time = 300000; /* 30s */ + tcp_cfg.alloc_err_timeout = 1000; /* 100ms */ /* This value is seconds */ tcp_cfg.cleanup_time = 0.1; /* 100ms */