}
tcp_connection_t *
-tcp_connection_new (u8 thread_index)
+tcp_connection_alloc (u8 thread_index)
{
tcp_main_t *tm = vnet_get_tcp_main ();
tcp_connection_t *tc;
return tc;
}
+void
+tcp_connection_free (tcp_connection_t * tc)
+{
+ tcp_main_t *tm = &tcp_main;
+ pool_put (tm->connections[tc->c_thread_index], tc);
+ if (CLIB_DEBUG > 0)
+ clib_memset (tc, 0xFA, sizeof (*tc));
+}
+
/** Notify session that connection has been reset.
*
* Switch state to closed and wait for session to call cleanup.
tcp_connection_timers_reset (tc);
/* Set the cleanup timer, in case the session layer/app don't
* cleanly close the connection */
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
stream_session_reset_notify (&tc->connection);
+ tc->state = TCP_STATE_CLOSED;
+ TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_FIN_WAIT_1:
case TCP_STATE_FIN_WAIT_2:
case TCP_STATE_CLOSING:
+ tcp_connection_timers_reset (tc);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
tc->state = TCP_STATE_CLOSED;
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
- tcp_connection_timers_reset (tc);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
break;
case TCP_STATE_CLOSED:
- return;
+ break;
}
}
switch (tc->state)
{
case TCP_STATE_SYN_SENT:
- tc->state = TCP_STATE_CLOSED;
+ /* Do nothing. Establish timer will pop and cleanup the connection */
break;
case TCP_STATE_SYN_RCVD:
tcp_connection_timers_reset (tc);
tcp_send_fin (tc);
tc->state = TCP_STATE_FIN_WAIT_1;
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME);
break;
case TCP_STATE_ESTABLISHED:
if (!session_tx_fifo_max_dequeue (&tc->connection))
else
tc->flags |= TCP_CONN_FINPNDG;
tc->state = TCP_STATE_FIN_WAIT_1;
+ /* Set a timer in case the peer stops responding. Otherwise the
+ * connection will be stuck here forever. */
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME);
break;
case TCP_STATE_CLOSE_WAIT:
if (!session_tx_fifo_max_dequeue (&tc->connection))
case TCP_STATE_FIN_WAIT_1:
tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
break;
+ case TCP_STATE_CLOSED:
+ tcp_connection_timers_reset (tc);
+ break;
default:
TCP_DBG ("state: %u", tc->state);
}
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc);
- /* If in CLOSED and WAITCLOSE timer is not set, delete connection now */
+ /* If in CLOSED and WAITCLOSE timer is not set, delete connection.
+ * But instead of doing it now wait until next dispatch cycle to give
+ * the session layer a chance to clear unhandled events */
if (!tcp_timer_is_active (tc, TCP_TIMER_WAITCLOSE)
&& tc->state == TCP_STATE_CLOSED)
- tcp_connection_del (tc);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
}
static void
ip6_tcp_hdr_t hdr;
clib_memset (&hdr, 0, sizeof (hdr));
hdr.ip.protocol = IP_PROTOCOL_TCP;
- clib_memcpy (&hdr.ip.src_address, &tc->c_lcl_ip.ip6,
- sizeof (ip6_address_t));
- clib_memcpy (&hdr.ip.dst_address, &tc->c_rmt_ip.ip6,
- sizeof (ip6_address_t));
+ clib_memcpy_fast (&hdr.ip.src_address, &tc->c_lcl_ip.ip6,
+ sizeof (ip6_address_t));
+ clib_memcpy_fast (&hdr.ip.dst_address, &tc->c_rmt_ip.ip6,
+ sizeof (ip6_address_t));
hdr.tcp.src_port = tc->c_lcl_port;
hdr.tcp.dst_port = tc->c_rmt_port;
hash = ip6_compute_flow_hash (&hdr.ip, lb->lb_hash_config);
fib_prefix_t prefix;
u32 fib_index;
- clib_memcpy (&prefix.fp_addr, &tc->c_rmt_ip, sizeof (prefix.fp_addr));
+ clib_memcpy_fast (&prefix.fp_addr, &tc->c_rmt_ip, sizeof (prefix.fp_addr));
prefix.fp_proto = tc->c_is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
prefix.fp_len = tc->c_is_ip4 ? 32 : 128;
fib_index = fib_table_find (prefix.fp_proto, tc->c_fib_index);
static void
tcp_cc_init (tcp_connection_t * tc)
{
- tc->cc_algo = tcp_cc_algo_get (TCP_CC_NEWRENO);
+ tc->cc_algo = tcp_cc_algo_get (tcp_main.cc_algo);
tc->cc_algo->init (tc);
}
tc->snd_una = tc->iss;
tc->snd_nxt = tc->iss + 1;
tc->snd_una_max = tc->snd_nxt;
+ tc->srtt = 0;
}
void
index = tm->last_v6_address_rotor++;
if (tm->last_v6_address_rotor >= vec_len (tm->ip6_src_addresses))
tm->last_v6_address_rotor = 0;
- clib_memcpy (&lcl_addr->ip6, &tm->ip6_src_addresses[index],
- sizeof (ip6_address_t));
+ clib_memcpy_fast (&lcl_addr->ip6, &tm->ip6_src_addresses[index],
+ sizeof (ip6_address_t));
}
port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr);
if (port < 1)
tcp_rcv_wnd_available (tc));
s = format (s, " tsval_recent %u tsval_recent_age %u\n", tc->tsval_recent,
tcp_time_now () - tc->tsval_recent_age);
- s = format (s, " rto %u rto_boff %u srtt %u rttvar %u rtt_ts %2.5f ",
- tc->rto, tc->rto_boff, tc->srtt, tc->rttvar, tc->rtt_ts);
- s = format (s, "rtt_seq %u\n", tc->rtt_seq - tc->iss);
+ s = format (s, " rto %u rto_boff %u srtt %u us %.3f rttvar %u rtt_ts %x",
+ tc->rto, tc->rto_boff, tc->srtt, tc->mrtt_us * 1000, tc->rttvar,
+ tc->rtt_ts);
+ s = format (s, " rtt_seq %u\n", tc->rtt_seq - tc->iss);
s = format (s, " cong: %U", format_tcp_congestion, tc);
if (tc->state >= TCP_STATE_ESTABLISHED)
{
int snd_space, snt_limited;
- if (PREDICT_FALSE (tcp_in_fastrecovery (tc)))
+ if (PREDICT_FALSE (tcp_in_fastrecovery (tc)
+ || tc->state == TCP_STATE_CLOSED))
return 0;
snd_space = tcp_available_output_snd_space (tc);
tcp_set_time_now (wrk);
tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now);
tcp_do_fastretransmits (wrk);
+ tcp_send_acks (wrk);
tcp_flush_frames_to_output (wrk);
}
return tcp_push_header (tc, b);
}
+static void
+tcp_session_flush_data (transport_connection_t * tconn)
+{
+ tcp_connection_t *tc = (tcp_connection_t *) tconn;
+ if (tc->flags & TCP_CONN_PSH_PENDING)
+ return;
+ tc->flags |= TCP_CONN_PSH_PENDING;
+ tc->psh_seq = tc->snd_una_max + transport_max_tx_dequeue (tconn) - 1;
+}
+
/* *INDENT-OFF* */
const static transport_proto_vft_t tcp_proto = {
.enable = vnet_tcp_enable_disable,
.send_space = tcp_session_send_space,
.update_time = tcp_update_time,
.tx_fifo_offset = tcp_session_tx_fifo_offset,
+ .flush_data = tcp_session_flush_data,
.format_connection = format_tcp_session,
.format_listener = format_tcp_listener_session,
.format_half_open = format_tcp_half_open_session,
tcp_connection_tx_pacer_update (tcp_connection_t * tc)
{
f64 srtt;
+ u64 rate;
if (!transport_connection_is_tx_paced (&tc->connection))
return;
srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
- transport_connection_tx_pacer_update (&tc->connection,
- ((f64) tc->cwnd) / srtt);
+ /* TODO should constrain to interface's max throughput but
+ * we don't have link speeds for sw ifs ..*/
+ rate = tc->cwnd / srtt;
+ transport_connection_tx_pacer_update (&tc->connection, rate);
}
void
if (tc)
{
ASSERT (tc->state == TCP_STATE_SYN_SENT);
- session_stream_connect_notify (&tc->connection, 1 /* fail */ );
- TCP_DBG ("establish pop: %U", format_tcp_connection, tc, 2);
+ /* Notify app if we haven't tried to clean this up already */
+ if (!(tc->flags & TCP_CONN_HALF_OPEN_DONE))
+ session_stream_connect_notify (&tc->connection, 1 /* fail */ );
}
else
{
/* note: the connection may have already disappeared */
if (PREDICT_FALSE (tc == 0))
return;
- TCP_DBG ("establish pop: %U", format_tcp_connection, tc, 2);
ASSERT (tc->state == TCP_STATE_SYN_RCVD);
/* Start cleanup. App wasn't notified yet so use delete notify as
* opposed to delete to cleanup session layer state. */
/* Session didn't come back with a close(). Send FIN either way
* and switch to LAST_ACK. */
- if (tc->state == TCP_STATE_CLOSE_WAIT)
+ if (tc->state == TCP_STATE_CLOSE_WAIT && (tc->flags & TCP_CONN_FINPNDG))
{
- if (tc->flags & TCP_CONN_FINSNT)
- {
- clib_warning ("FIN was sent and still in CLOSE WAIT. Weird!");
- }
-
/* Make sure we don't try to send unsent data */
tcp_connection_timers_reset (tc);
tcp_cong_recovery_off (tc);
/* Don't delete the connection yet */
return;
}
+ else if (tc->state == TCP_STATE_FIN_WAIT_1)
+ {
+ tcp_connection_timers_reset (tc);
+ tc->state = TCP_STATE_CLOSED;
+ /* Wait for session layer to clean up tx events */
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ return;
+ }
tcp_connection_del (tc);
}
vec_validate (tm->wrk_ctx[thread].ongoing_fast_rxt, 255);
vec_validate (tm->wrk_ctx[thread].postponed_fast_rxt, 255);
vec_validate (tm->wrk_ctx[thread].pending_deq_acked, 255);
+ vec_validate (tm->wrk_ctx[thread].pending_acks, 255);
+ vec_validate (tm->wrk_ctx[thread].pending_disconnects, 255);
vec_reset_length (tm->wrk_ctx[thread].pending_fast_rxt);
vec_reset_length (tm->wrk_ctx[thread].ongoing_fast_rxt);
vec_reset_length (tm->wrk_ctx[thread].postponed_fast_rxt);
vec_reset_length (tm->wrk_ctx[thread].pending_deq_acked);
+ vec_reset_length (tm->wrk_ctx[thread].pending_acks);
+ vec_reset_length (tm->wrk_ctx[thread].pending_disconnects);
tm->wrk_ctx[thread].vm = vlib_mains[thread];
/*
FIB_PROTOCOL_IP6, tcp6_output_node.index);
tcp_api_reference ();
+ tm->tx_pacing = 1;
+ tm->cc_algo = TCP_CC_NEWRENO;
return 0;
}
VLIB_INIT_FUNCTION (tcp_init);
+uword
+unformat_tcp_cc_algo (unformat_input_t * input, va_list * va)
+{
+ uword *result = va_arg (*va, uword *);
+
+ if (unformat (input, "newreno"))
+ *result = TCP_CC_NEWRENO;
+ else if (unformat (input, "cubic"))
+ *result = TCP_CC_CUBIC;
+ else
+ return 0;
+
+ return 1;
+}
+
+uword
+unformat_tcp_cc_algo_cfg (unformat_input_t * input, va_list * va)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ tcp_cc_algorithm_t *cc_alg;
+ unformat_input_t sub_input;
+ int found = 0;
+
+ vec_foreach (cc_alg, tm->cc_algos)
+ {
+ if (!unformat (input, cc_alg->name))
+ continue;
+
+ if (cc_alg->unformat_cfg
+ && unformat (input, "%U", unformat_vlib_cli_sub_input, &sub_input))
+ {
+ if (cc_alg->unformat_cfg (&sub_input))
+ found = 1;
+ }
+ }
+ return found;
+}
+
static clib_error_t *
tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
{
else if (unformat (input, "max-rx-fifo %U", unformat_memory_size,
&tm->max_rx_fifo))
;
- else if (unformat (input, "tx-pacing"))
- tm->tx_pacing = 1;
+ else if (unformat (input, "no-tx-pacing"))
+ tm->tx_pacing = 0;
+ else if (unformat (input, "cc-algo %U", unformat_tcp_cc_algo,
+ &tm->cc_algo))
+ ;
+ else if (unformat (input, "%U", unformat_tcp_cc_algo_cfg))
+ ;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);