static void
tcp_cc_init (tcp_connection_t * tc)
{
- tc->cc_algo = tcp_cc_algo_get (tcp_main.cc_algo);
tc->cc_algo->init (tc);
}
listener->c_s_index = session_index;
listener->c_fib_index = lcl->fib_index;
listener->state = TCP_STATE_LISTEN;
+ listener->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
tcp_connection_timers_init (listener);
- TCP_EVT_DBG (TCP_EVT_BIND, listener);
+ TCP_EVT (TCP_EVT_BIND, listener);
return listener->c_c_index;
}
tc = pool_elt_at_index (tm->listener_pool, listener_index);
- TCP_EVT_DBG (TCP_EVT_UNBIND, tc);
+ TCP_EVT (TCP_EVT_UNBIND, tc);
/* Poison the entry */
if (CLIB_DEBUG > 0)
/* Make sure this is the owning thread */
if (tc->c_thread_index != vlib_get_thread_index ())
return 1;
- tcp_timer_reset (tc, TCP_TIMER_ESTABLISH_AO);
tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT_SYN);
tcp_half_open_connection_del (tc);
return 0;
{
tcp_main_t *tm = &tcp_main;
- TCP_EVT_DBG (TCP_EVT_DELETE, tc);
+ TCP_EVT (TCP_EVT_DELETE, tc);
/* Cleanup local endpoint if this was an active connect */
transport_endpoint_cleanup (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
return tc;
}
+tcp_connection_t *
+tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t * base)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tcp_connection_t *tc;
+
+ pool_get (tm->connections[thread_index], tc);
+ clib_memcpy_fast (tc, base, sizeof (*tc));
+ tc->c_c_index = tc - tm->connections[thread_index];
+ tc->c_thread_index = thread_index;
+ return tc;
+}
+
void
tcp_connection_free (tcp_connection_t * tc)
{
void
tcp_connection_reset (tcp_connection_t * tc)
{
- TCP_EVT_DBG (TCP_EVT_RST_RCVD, tc);
+ TCP_EVT (TCP_EVT_RST_RCVD, tc);
switch (tc->state)
{
case TCP_STATE_SYN_RCVD:
tcp_connection_timers_reset (tc);
/* Set the cleanup timer, in case the session layer/app don't
* cleanly close the connection */
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
session_transport_reset_notify (&tc->connection);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
session_transport_closed_notify (&tc->connection);
case TCP_STATE_CLOSING:
case TCP_STATE_LAST_ACK:
tcp_connection_timers_reset (tc);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
/* Make sure we mark the session as closed. In some states we may
* be still trying to send data */
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
void
tcp_connection_close (tcp_connection_t * tc)
{
- TCP_EVT_DBG (TCP_EVT_CLOSE, tc);
+ TCP_EVT (TCP_EVT_CLOSE, tc);
/* Send/Program FIN if needed and switch state */
switch (tc->state)
tcp_connection_timers_reset (tc);
tcp_send_fin (tc);
tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_1);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
break;
case TCP_STATE_ESTABLISHED:
/* If closing with unread data, reset the connection */
tcp_send_reset (tc);
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
session_transport_closed_notify (&tc->connection);
break;
}
/* Set a timer in case the peer stops responding. Otherwise the
* connection will be stuck here forever. */
ASSERT (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
break;
case TCP_STATE_CLOSE_WAIT:
if (!transport_max_tx_dequeue (&tc->connection))
tcp_send_fin (tc);
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
}
else
tc->flags |= TCP_CONN_FINPNDG;
break;
case TCP_STATE_FIN_WAIT_1:
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
break;
case TCP_STATE_CLOSED:
tcp_connection_timers_reset (tc);
/* Delete connection but instead of doing it now wait until next
* dispatch cycle to give the session layer a chance to clear
* unhandled events */
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
break;
default:
TCP_DBG ("state: %u", tc->state);
tcp_connection_cleanup (tc);
}
+static void
+tcp_session_reset (u32 conn_index, u32 thread_index)
+{
+ tcp_connection_t *tc;
+ tc = tcp_connection_get (conn_index, thread_index);
+ session_transport_closed_notify (&tc->connection);
+ tcp_send_reset (tc);
+ tcp_connection_timers_reset (tc);
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+}
+
/**
* Initialize all connection timers as invalid
*/
/* tcp_connection_fib_attach (tc); */
if (transport_connection_is_tx_paced (&tc->connection)
- || tcp_main.tx_pacing)
+ || tcp_cfg.enable_tx_pacing)
tcp_enable_pacing (tc);
if (tc->flags & TCP_CONN_RATE_SAMPLE)
int index, port;
if (is_ip4)
{
- index = tm->last_v4_address_rotor++;
- if (tm->last_v4_address_rotor >= vec_len (tm->ip4_src_addresses))
- tm->last_v4_address_rotor = 0;
- lcl_addr->ip4.as_u32 = tm->ip4_src_addresses[index].as_u32;
+ index = tm->last_v4_addr_rotor++;
+ if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs))
+ tm->last_v4_addr_rotor = 0;
+ lcl_addr->ip4.as_u32 = tcp_cfg.ip4_src_addrs[index].as_u32;
}
else
{
- index = tm->last_v6_address_rotor++;
- if (tm->last_v6_address_rotor >= vec_len (tm->ip6_src_addresses))
- tm->last_v6_address_rotor = 0;
- clib_memcpy_fast (&lcl_addr->ip6, &tm->ip6_src_addresses[index],
+ index = tm->last_v6_addr_rotor++;
+ if (tm->last_v6_addr_rotor >= vec_len (tcp_cfg.ip6_src_addrs))
+ tm->last_v6_addr_rotor = 0;
+ clib_memcpy_fast (&lcl_addr->ip6, &tcp_cfg.ip6_src_addrs[index],
sizeof (ip6_address_t));
}
port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr);
/*
* Allocate local endpoint
*/
- if ((rmt->is_ip4 && vec_len (tm->ip4_src_addresses))
- || (!rmt->is_ip4 && vec_len (tm->ip6_src_addresses)))
+ if ((rmt->is_ip4 && vec_len (tcp_cfg.ip4_src_addrs))
+ || (!rmt->is_ip4 && vec_len (tcp_cfg.ip6_src_addrs)))
rv = tcp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port,
rmt->is_ip4);
else
tc->c_is_ip4 = rmt->is_ip4;
tc->c_proto = TRANSPORT_PROTO_TCP;
tc->c_fib_index = rmt->fib_index;
+ tc->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
/* The other connection vars will be initialized after SYN ACK */
tcp_connection_timers_init (tc);
- TCP_EVT_DBG (TCP_EVT_OPEN, tc);
+ TCP_EVT (TCP_EVT_OPEN, tc);
tc->state = TCP_STATE_SYN_SENT;
tcp_init_snd_vars (tc);
tcp_send_syn (tc);
return tc->c_c_index;
}
-const char *tcp_dbg_evt_str[] = {
-#define _(sym, str) str,
- foreach_tcp_dbg_evt
-#undef _
-};
-
const char *tcp_fsm_states[] = {
#define _(sym, str) str,
foreach_tcp_fsm_state
.connect = tcp_session_open,
.close = tcp_session_close,
.cleanup = tcp_session_cleanup,
+ .reset = tcp_session_reset,
.send_mss = tcp_session_send_mss,
.send_space = tcp_session_send_space,
.update_time = tcp_update_time,
void
tcp_connection_tx_pacer_update (tcp_connection_t * tc)
{
- f64 srtt;
- u64 rate;
-
if (!transport_connection_is_tx_paced (&tc->connection))
return;
- srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
- /* TODO should constrain to interface's max throughput but
- * we don't have link speeds for sw ifs ..*/
- rate = tc->cwnd / srtt;
- transport_connection_tx_pacer_update (&tc->connection, rate);
+ transport_connection_tx_pacer_update (&tc->connection,
+ tcp_cc_get_pacing_rate (tc));
}
void
start_bucket, last_time);
}
-static void
-tcp_timer_keep_handler (u32 conn_index)
-{
- u32 thread_index = vlib_get_thread_index ();
- tcp_connection_t *tc;
-
- tc = tcp_connection_get (conn_index, thread_index);
- tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID;
-
- tcp_connection_close (tc);
-}
-
-static void
-tcp_timer_establish_handler (u32 conn_index)
-{
- tcp_connection_t *tc;
-
- tc = tcp_connection_get (conn_index, vlib_get_thread_index ());
- /* note: the connection may have already disappeared */
- if (PREDICT_FALSE (tc == 0))
- return;
- ASSERT (tc->state == TCP_STATE_SYN_RCVD);
- tc->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID;
- tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_connection_timers_reset (tc);
- /* Start cleanup. Do NOT delete the session until we do the connection
- * cleanup. Otherwise, we end up with a dangling session index in the
- * tcp connection. */
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
-}
-
-static void
-tcp_timer_establish_ao_handler (u32 conn_index)
-{
- tcp_connection_t *tc;
-
- tc = tcp_half_open_connection_get (conn_index);
- if (!tc)
- return;
-
- ASSERT (tc->state == TCP_STATE_SYN_SENT);
- /* Notify app if we haven't tried to clean this up already */
- if (!(tc->flags & TCP_CONN_HALF_OPEN_DONE))
- session_stream_connect_notify (&tc->connection, 1 /* fail */ );
-
- tc->timers[TCP_TIMER_ESTABLISH_AO] = TCP_TIMER_HANDLE_INVALID;
- tcp_connection_cleanup (tc);
-}
-
static void
tcp_timer_waitclose_handler (u32 conn_index)
{
tc = tcp_connection_get (conn_index, thread_index);
if (!tc)
return;
+
tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID;
switch (tc->state)
if (!(tc->flags & TCP_CONN_FINPNDG))
{
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
break;
}
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
/* Make sure we don't wait in LAST ACK forever */
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
/* Don't delete the connection yet */
break;
* Notify session layer that transport is closed. */
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
tcp_send_reset (tc);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
}
else
{
/* We've sent the fin but no progress. Close the connection and
* to make sure everything is flushed, setup a cleanup timer */
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
}
break;
case TCP_STATE_LAST_ACK:
case TCP_STATE_CLOSING:
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
session_transport_closed_notify (&tc->connection);
break;
default:
tcp_timer_retransmit_handler,
tcp_timer_delack_handler,
tcp_timer_persist_handler,
- tcp_timer_keep_handler,
tcp_timer_waitclose_handler,
tcp_timer_retransmit_syn_handler,
- tcp_timer_establish_handler,
- tcp_timer_establish_ao_handler,
};
/* *INDENT-ON* */
connection_index = expired_timers[i] & 0x0FFFFFFF;
timer_id = expired_timers[i] >> 28;
- TCP_EVT_DBG (TCP_EVT_TIMER_POP, connection_index, timer_id);
+ TCP_EVT (TCP_EVT_TIMER_POP, connection_index, timer_id);
/* Handle expiration */
(*timer_expiration_handlers[timer_id]) (connection_index);
foreach_vlib_main (({
tw = &tm->wrk_ctx[ii].timer_wheel;
tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch,
- 100e-3 /* timer period 100ms */ , ~0);
+ TCP_TIMER_TICK, ~0);
tw->last_run_time = vlib_time_now (this_vlib_main);
}));
/* *INDENT-ON* */
vec_validate (tm->connections, num_threads - 1);
vec_validate (tm->wrk_ctx, num_threads - 1);
n_workers = num_threads == 1 ? 1 : vtm->n_threads;
- prealloc_conn_per_wrk = tm->preallocated_connections / n_workers;
+ prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers;
for (thread = 0; thread < num_threads; thread++)
{
/*
* Use a preallocated half-open connection pool?
*/
- if (tm->preallocated_half_open_connections)
+ if (tcp_cfg.preallocated_half_open_connections)
pool_init_fixed (tm->half_open_connections,
- tm->preallocated_half_open_connections);
+ tcp_cfg.preallocated_half_open_connections);
/* Initialize clocks per tick for TCP timestamp. Used to compute
* monotonically increasing timestamps. */
tm->punt_unknown6 = is_add;
}
+/**
+ * Initialize default values for tcp parameters
+ */
+static void
+tcp_configuration_init (void)
+{
+ /* Initial wnd for SYN. Fifos are not allocated at that point so use some
+ * predefined value. For SYN-ACK we still want the scale to be computed in
+ * the same way */
+ tcp_cfg.max_rx_fifo = 32 << 20;
+ tcp_cfg.min_rx_fifo = 4 << 10;
+
+ tcp_cfg.default_mtu = 1460;
+ tcp_cfg.initial_cwnd_multiplier = 0;
+ tcp_cfg.enable_tx_pacing = 1;
+ tcp_cfg.cc_algo = TCP_CC_NEWRENO;
+
+ /* Time constants defined as timer tick (100ms) multiples */
+ tcp_cfg.delack_time = 1; /* 0.1s */
+ tcp_cfg.closewait_time = 20; /* 2s */
+ tcp_cfg.timewait_time = 100; /* 10s */
+ tcp_cfg.finwait1_time = 600; /* 60s */
+ tcp_cfg.lastack_time = 300; /* 30s */
+ tcp_cfg.finwait2_time = 300; /* 30s */
+ tcp_cfg.closing_time = 300; /* 30s */
+ tcp_cfg.cleanup_time = 1; /* 0.1s */
+}
+
static clib_error_t *
tcp_init (vlib_main_t * vm)
{
FIB_PROTOCOL_IP6, tcp6_output_node.index);
tcp_api_reference ();
+ tcp_configuration_init ();
+
tm->cc_algo_by_name = hash_create_string (0, sizeof (uword));
- tm->tx_pacing = 1;
- tm->cc_algo = TCP_CC_NEWRENO;
- tm->default_mtu = 1460;
- tm->initial_cwnd_multiplier = 0;
+
return 0;
}
uword
unformat_tcp_cc_algo (unformat_input_t * input, va_list * va)
{
- uword *result = va_arg (*va, uword *);
+ tcp_cc_algorithm_type_e *result = va_arg (*va, tcp_cc_algorithm_type_e *);
tcp_main_t *tm = &tcp_main;
char *cc_algo_name;
u8 found = 0;
static clib_error_t *
tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
-
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "preallocated-connections %d",
- &tm->preallocated_connections))
+ &tcp_cfg.preallocated_connections))
;
else if (unformat (input, "preallocated-half-open-connections %d",
- &tm->preallocated_half_open_connections))
+ &tcp_cfg.preallocated_half_open_connections))
;
else if (unformat (input, "buffer-fail-fraction %f",
- &tm->buffer_fail_fraction))
+ &tcp_cfg.buffer_fail_fraction))
;
else if (unformat (input, "max-rx-fifo %U", unformat_memory_size,
- &tm->max_rx_fifo))
+ &tcp_cfg.max_rx_fifo))
+ ;
+ else if (unformat (input, "min-rx-fifo %U", unformat_memory_size,
+ &tcp_cfg.min_rx_fifo))
;
- else if (unformat (input, "mtu %d", &tm->default_mtu))
+ else if (unformat (input, "mtu %d", &tcp_cfg.default_mtu))
;
else if (unformat (input, "initial-cwnd-multiplier %d",
- &tm->initial_cwnd_multiplier))
+ &tcp_cfg.initial_cwnd_multiplier))
;
else if (unformat (input, "no-tx-pacing"))
- tm->tx_pacing = 0;
+ tcp_cfg.enable_tx_pacing = 0;
else if (unformat (input, "cc-algo %U", unformat_tcp_cc_algo,
- &tm->cc_algo))
+ &tcp_cfg.cc_algo))
;
else if (unformat (input, "%U", unformat_tcp_cc_algo_cfg))
;
+ else if (unformat (input, "closewait-time %d", &tcp_cfg.closewait_time))
+ tcp_cfg.closewait_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "timewait-time %d", &tcp_cfg.timewait_time))
+ tcp_cfg.timewait_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "finwait1-time %d", &tcp_cfg.finwait1_time))
+ tcp_cfg.finwait1_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "finwait2-time %d", &tcp_cfg.finwait2_time))
+ tcp_cfg.finwait2_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "lastack-time %d", &tcp_cfg.lastack_time))
+ tcp_cfg.lastack_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "closing-time %d", &tcp_cfg.closing_time))
+ tcp_cfg.closing_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "cleanup-time %d", &tcp_cfg.cleanup_time))
+ tcp_cfg.cleanup_time /= TCP_TIMER_TICK;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
ip4_address_t * start,
ip4_address_t * end, u32 table_id)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
vnet_main_t *vnm = vnet_get_main ();
u32 start_host_byte_order, end_host_byte_order;
fib_prefix_t prefix;
{
dpo_id_t dpo = DPO_INVALID;
- vec_add1 (tm->ip4_src_addresses, start[0]);
+ vec_add1 (tcp_cfg.ip4_src_addrs, start[0]);
/* Add local adjacencies for the range */
ip6_address_t * start,
ip6_address_t * end, u32 table_id)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
fib_prefix_t prefix;
u32 fib_index = 0;
fib_node_index_t fei;
dpo_id_t dpo = DPO_INVALID;
/* Remember this address */
- vec_add1 (tm->ip6_src_addresses, start[0]);
+ vec_add1 (tcp_cfg.ip6_src_addrs, start[0]);
/* Lookup the prefix, to identify the interface involved */
prefix.fp_len = 128;