static void
tcp_cc_init (tcp_connection_t * tc)
{
- tc->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
tc->cc_algo->init (tc);
}
listener->c_s_index = session_index;
listener->c_fib_index = lcl->fib_index;
listener->state = TCP_STATE_LISTEN;
+ listener->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
tcp_connection_timers_init (listener);
- TCP_EVT_DBG (TCP_EVT_BIND, listener);
+ TCP_EVT (TCP_EVT_BIND, listener);
return listener->c_c_index;
}
tc = pool_elt_at_index (tm->listener_pool, listener_index);
- TCP_EVT_DBG (TCP_EVT_UNBIND, tc);
+ TCP_EVT (TCP_EVT_UNBIND, tc);
/* Poison the entry */
if (CLIB_DEBUG > 0)
{
tcp_main_t *tm = &tcp_main;
- TCP_EVT_DBG (TCP_EVT_DELETE, tc);
+ TCP_EVT (TCP_EVT_DELETE, tc);
/* Cleanup local endpoint if this was an active connect */
transport_endpoint_cleanup (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
return tc;
}
+tcp_connection_t *
+tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t * base)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tcp_connection_t *tc;
+
+ pool_get (tm->connections[thread_index], tc);
+ clib_memcpy_fast (tc, base, sizeof (*tc));
+ tc->c_c_index = tc - tm->connections[thread_index];
+ tc->c_thread_index = thread_index;
+ return tc;
+}
+
void
tcp_connection_free (tcp_connection_t * tc)
{
void
tcp_connection_reset (tcp_connection_t * tc)
{
- TCP_EVT_DBG (TCP_EVT_RST_RCVD, tc);
+ TCP_EVT (TCP_EVT_RST_RCVD, tc);
switch (tc->state)
{
case TCP_STATE_SYN_RCVD:
void
tcp_connection_close (tcp_connection_t * tc)
{
- TCP_EVT_DBG (TCP_EVT_CLOSE, tc);
+ TCP_EVT (TCP_EVT_CLOSE, tc);
/* Send/Program FIN if needed and switch state */
switch (tc->state)
tcp_connection_cleanup (tc);
}
+static void
+tcp_session_reset (u32 conn_index, u32 thread_index)
+{
+ tcp_connection_t *tc;
+ tc = tcp_connection_get (conn_index, thread_index);
+ session_transport_closed_notify (&tc->connection);
+ tcp_send_reset (tc);
+ tcp_connection_timers_reset (tc);
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+}
+
/**
* Initialize all connection timers as invalid
*/
return ((tmp >> 32) ^ (tmp & 0xffffffff));
}
+/**
+ * Initialize max segment size we're able to process.
+ *
+ * The value is constrained by the output interface's MTU and by the size
+ * of the IP and TCP headers (see RFC6691). It is also what we advertise
+ * to our peer.
+ */
+static void
+tcp_init_rcv_mss (tcp_connection_t * tc)
+{
+ u8 ip_hdr_len;
+
+ ip_hdr_len = tc->c_is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
+ tc->mss = tcp_cfg.default_mtu - sizeof (tcp_header_t) - ip_hdr_len;
+}
+
+static void
+tcp_init_mss (tcp_connection_t * tc)
+{
+ u16 default_min_mss = 536;
+
+ tcp_init_rcv_mss (tc);
+
+ /* TODO consider PMTU discovery */
+ tc->snd_mss = clib_min (tc->rcv_opts.mss, tc->mss);
+
+ if (tc->snd_mss < 45)
+ {
+ /* Assume that at least the min default mss works */
+ tc->snd_mss = default_min_mss;
+ tc->rcv_opts.mss = default_min_mss;
+ }
+
+ /* We should have enough space for 40 bytes of options */
+ ASSERT (tc->snd_mss > 45);
+
+ /* If we use timestamp option, account for it */
+ if (tcp_opts_tstamp (&tc->rcv_opts))
+ tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP;
+}
+
/**
* Initialize connection send variables.
*/
*/
tcp_set_time_now (tcp_get_worker (vlib_get_thread_index ()));
+ tcp_init_rcv_mss (tc);
tc->iss = tcp_generate_random_iss (tc);
tc->snd_una = tc->iss;
tc->snd_nxt = tc->iss + 1;
tc->c_is_ip4 = rmt->is_ip4;
tc->c_proto = TRANSPORT_PROTO_TCP;
tc->c_fib_index = rmt->fib_index;
+ tc->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
/* The other connection vars will be initialized after SYN ACK */
tcp_connection_timers_init (tc);
- TCP_EVT_DBG (TCP_EVT_OPEN, tc);
+ TCP_EVT (TCP_EVT_OPEN, tc);
tc->state = TCP_STATE_SYN_SENT;
tcp_init_snd_vars (tc);
tcp_send_syn (tc);
return tc->c_c_index;
}
-const char *tcp_dbg_evt_str[] = {
-#define _(sym, str) str,
- foreach_tcp_dbg_evt
-#undef _
-};
-
const char *tcp_fsm_states[] = {
#define _(sym, str) str,
foreach_tcp_fsm_state
tcp_session_get_transport (u32 conn_index, u32 thread_index)
{
tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index);
+ if (PREDICT_FALSE (!tc))
+ return 0;
return &tc->connection;
}
return &tc->connection;
}
+static u16
+tcp_session_cal_goal_size (tcp_connection_t * tc)
+{
+ u16 goal_size = tc->snd_mss;
+
+ goal_size = TCP_MAX_GSO_SZ - tc->snd_mss % TCP_MAX_GSO_SZ;
+ goal_size = clib_min (goal_size, tc->snd_wnd / 2);
+
+ return goal_size;
+}
+
/**
* Compute maximum segment size for session layer.
*
* the current state of the connection. */
tcp_update_burst_snd_vars (tc);
+ if (PREDICT_FALSE (tc->is_tso))
+ {
+ return tcp_session_cal_goal_size (tc);
+ }
+
return tc->snd_mss;
}
.connect = tcp_session_open,
.close = tcp_session_close,
.cleanup = tcp_session_cleanup,
+ .reset = tcp_session_reset,
.send_mss = tcp_session_send_mss,
.send_space = tcp_session_send_space,
.update_time = tcp_update_time,
void
tcp_connection_tx_pacer_update (tcp_connection_t * tc)
{
- f64 srtt;
- u64 rate;
-
if (!transport_connection_is_tx_paced (&tc->connection))
return;
- srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
- /* TODO should constrain to interface's max throughput but
- * we don't have link speeds for sw ifs ..*/
- rate = tc->cwnd / srtt;
- transport_connection_tx_pacer_update (&tc->connection, rate);
+ transport_connection_tx_pacer_update (&tc->connection,
+ tcp_cc_get_pacing_rate (tc));
}
void
connection_index = expired_timers[i] & 0x0FFFFFFF;
timer_id = expired_timers[i] >> 28;
- TCP_EVT_DBG (TCP_EVT_TIMER_POP, connection_index, timer_id);
+ TCP_EVT (TCP_EVT_TIMER_POP, connection_index, timer_id);
/* Handle expiration */
(*timer_expiration_handlers[timer_id]) (connection_index);
tcp_cfg.max_rx_fifo = 32 << 20;
tcp_cfg.min_rx_fifo = 4 << 10;
- tcp_cfg.default_mtu = 1460;
+ tcp_cfg.default_mtu = 1500;
tcp_cfg.initial_cwnd_multiplier = 0;
tcp_cfg.enable_tx_pacing = 1;
tcp_cfg.cc_algo = TCP_CC_NEWRENO;
+ tcp_cfg.rwnd_min_update_ack = 1;
/* Time constants defined as timer tick (100ms) multiples */
tcp_cfg.delack_time = 1; /* 0.1s */
static clib_error_t *
tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
{
+ uword memory_size;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "preallocated-connections %d",
&tcp_cfg.buffer_fail_fraction))
;
else if (unformat (input, "max-rx-fifo %U", unformat_memory_size,
- &tcp_cfg.max_rx_fifo))
- ;
+ &memory_size))
+ tcp_cfg.max_rx_fifo = memory_size;
else if (unformat (input, "min-rx-fifo %U", unformat_memory_size,
- &tcp_cfg.min_rx_fifo))
+ &memory_size))
+ tcp_cfg.min_rx_fifo = memory_size;
+ else if (unformat (input, "mtu %u", &tcp_cfg.default_mtu))
;
- else if (unformat (input, "mtu %d", &tcp_cfg.default_mtu))
+ else if (unformat (input, "rwnd-min-update-ack %d",
+ &tcp_cfg.rwnd_min_update_ack))
;
- else if (unformat (input, "initial-cwnd-multiplier %d",
+ else if (unformat (input, "initial-cwnd-multiplier %u",
&tcp_cfg.initial_cwnd_multiplier))
;
else if (unformat (input, "no-tx-pacing"))
;
else if (unformat (input, "%U", unformat_tcp_cc_algo_cfg))
;
- else if (unformat (input, "closewait-time %d", &tcp_cfg.closewait_time))
+ else if (unformat (input, "closewait-time %u", &tcp_cfg.closewait_time))
tcp_cfg.closewait_time /= TCP_TIMER_TICK;
- else if (unformat (input, "timewait-time %d", &tcp_cfg.timewait_time))
+ else if (unformat (input, "timewait-time %u", &tcp_cfg.timewait_time))
tcp_cfg.timewait_time /= TCP_TIMER_TICK;
- else if (unformat (input, "finwait1-time %d", &tcp_cfg.finwait1_time))
+ else if (unformat (input, "finwait1-time %u", &tcp_cfg.finwait1_time))
tcp_cfg.finwait1_time /= TCP_TIMER_TICK;
- else if (unformat (input, "finwait2-time %d", &tcp_cfg.finwait2_time))
+ else if (unformat (input, "finwait2-time %u", &tcp_cfg.finwait2_time))
tcp_cfg.finwait2_time /= TCP_TIMER_TICK;
- else if (unformat (input, "lastack-time %d", &tcp_cfg.lastack_time))
+ else if (unformat (input, "lastack-time %u", &tcp_cfg.lastack_time))
tcp_cfg.lastack_time /= TCP_TIMER_TICK;
- else if (unformat (input, "closing-time %d", &tcp_cfg.closing_time))
+ else if (unformat (input, "closing-time %u", &tcp_cfg.closing_time))
tcp_cfg.closing_time /= TCP_TIMER_TICK;
- else if (unformat (input, "cleanup-time %d", &tcp_cfg.cleanup_time))
+ else if (unformat (input, "cleanup-time %u", &tcp_cfg.cleanup_time))
tcp_cfg.cleanup_time /= TCP_TIMER_TICK;
else
return clib_error_return (0, "unknown input `%U'",