*
*/
static void
-tcp_half_open_connection_del (tcp_connection_t * tc)
+tcp_half_open_connection_free (tcp_connection_t * tc)
{
tcp_main_t *tm = vnet_get_tcp_main ();
clib_spinlock_lock_if_init (&tm->half_open_lock);
if (tc->c_thread_index != vlib_get_thread_index ())
return 1;
+ session_half_open_delete_notify (TRANSPORT_PROTO_TCP, tc->c_s_ho_handle);
wrk = tcp_get_worker (tc->c_thread_index);
tcp_timer_reset (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN);
- tcp_half_open_connection_del (tc);
+ tcp_half_open_connection_free (tc);
return 0;
}
tcp_connection_cleanup (tc);
}
+static void
+tcp_session_cleanup_ho (u32 conn_index)
+{
+ tcp_worker_ctx_t *wrk;
+ tcp_connection_t *tc;
+
+ tc = tcp_half_open_connection_get (conn_index);
+ wrk = tcp_get_worker (tc->c_thread_index);
+ tcp_timer_reset (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN);
+ tcp_half_open_connection_free (tc);
+}
+
static void
tcp_session_reset (u32 conn_index, u32 thread_index)
{
/* We should have enough space for 40 bytes of options */
ASSERT (tc->snd_mss > 45);
- /* If we use timestamp option, account for it */
+ /* If we use timestamp option, account for it and make sure
+ * the options are 4-byte aligned */
if (tcp_opts_tstamp (&tc->rcv_opts))
- tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP;
+ tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP + 2 /* alignment */;
}
/**
tc->iss = tcp_generate_random_iss (tc);
tc->snd_una = tc->iss;
tc->snd_nxt = tc->iss + 1;
- tc->snd_una_max = tc->snd_nxt;
- tc->srtt = 100; /* 100 ms */
+ tc->srtt = 0.1 * THZ; /* 100 ms */
if (!tcp_cfg.csum_offload)
tc->cfg_flags |= TCP_CFG_F_NO_CSUM_OFFLOAD;
rmt, &lcl_addr, &lcl_port);
if (rv)
- return rv;
+ {
+ if (rv != SESSION_E_PORTINUSE)
+ return rv;
+
+ if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip,
+ lcl_port, rmt->port, TRANSPORT_PROTO_UDP,
+ rmt->is_ip4))
+ return SESSION_E_PORTINUSE;
+
+ /* 5-tuple is available so increase lcl endpoint refcount and proceed
+ * with connection allocation */
+ transport_share_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
+ lcl_port);
+ }
/*
* Create connection and send SYN
u32 __clib_unused thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
tcp_connection_t *tc = tcp_listener_get (tci);
- s = format (s, "%-50U", format_tcp_connection_id, tc);
+ s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_tcp_connection_id, tc);
if (verbose)
- s = format (s, "%-15U", format_tcp_state, tc->state);
+ s = format (s, "%-" SESSION_CLI_STATE_LEN "U", format_tcp_state,
+ tc->state);
return s;
}
{
u16 goal_size = tc->snd_mss;
- goal_size = TCP_MAX_GSO_SZ - tc->snd_mss % TCP_MAX_GSO_SZ;
+ goal_size = tcp_cfg.max_gso_size - tc->snd_mss % tcp_cfg.max_gso_size;
goal_size = clib_min (goal_size, tc->snd_wnd / 2);
return goal_size > tc->snd_mss ? goal_size : tc->snd_mss;
{
int snd_space;
- if (PREDICT_FALSE (tcp_in_fastrecovery (tc)
+ /* Fast path is disabled when recovery is on. @ref tcp_session_custom_tx
+ * controls both retransmits and the sending of new data while congested
+ */
+ if (PREDICT_FALSE (tcp_in_cong_recovery (tc)
|| tc->state == TCP_STATE_CLOSED))
return 0;
* to force the peer to send enough dupacks to start retransmitting as
* per Limited Transmit (RFC3042)
*/
- if (PREDICT_FALSE (tc->rcv_dupacks != 0 || tc->sack_sb.sacked_bytes))
+ if (PREDICT_FALSE (tc->rcv_dupacks || tc->sack_sb.sacked_bytes))
{
- if (tc->limited_transmit != tc->snd_nxt
- && (seq_lt (tc->limited_transmit, tc->snd_nxt - 2 * tc->snd_mss)
- || seq_gt (tc->limited_transmit, tc->snd_nxt)))
+ int snt_limited, n_pkts;
+
+ n_pkts = tcp_opts_sack_permitted (&tc->rcv_opts)
+ ? tc->sack_sb.reorder - 1 : 2;
+
+ if ((seq_lt (tc->limited_transmit, tc->snd_nxt - n_pkts * tc->snd_mss)
+ || seq_gt (tc->limited_transmit, tc->snd_nxt)))
tc->limited_transmit = tc->snd_nxt;
ASSERT (seq_leq (tc->limited_transmit, tc->snd_nxt));
- int snt_limited = tc->snd_nxt - tc->limited_transmit;
- snd_space = clib_max ((int) 2 * tc->snd_mss - snt_limited, 0);
+ snt_limited = tc->snd_nxt - tc->limited_transmit;
+ snd_space = clib_max (n_pkts * tc->snd_mss - snt_limited, 0);
}
return tcp_round_snd_space (tc, snd_space);
}
static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
{
tcp_timer_retransmit_handler,
- tcp_timer_delack_handler,
tcp_timer_persist_handler,
tcp_timer_waitclose_handler,
tcp_timer_retransmit_syn_handler,
if (PREDICT_FALSE (!tc))
continue;
+ /* Skip if the timer is not pending. Probably it was reset while
+ * waiting for dispatch */
+ if (PREDICT_FALSE (!(tc->pending_timers & (1 << timer_id))))
+ continue;
+
+ tc->pending_timers &= ~(1 << timer_id);
+
/* Skip timer if it was rearmed while pending dispatch */
if (PREDICT_FALSE (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID))
continue;
tcp_set_time_now (wrk);
tcp_handle_cleanups (wrk, now);
- tw_timer_expire_timers_16t_2w_512sl (&wrk->timer_wheel, now);
+ tcp_timer_expire_timers (&wrk->timer_wheel, now);
tcp_dispatch_pending_timers (wrk);
}
.connect = tcp_session_open,
.close = tcp_session_close,
.cleanup = tcp_session_cleanup,
+ .cleanup_ho = tcp_session_cleanup_ho,
.reset = tcp_session_reset,
.send_params = tcp_session_send_params,
.update_time = tcp_update_time,
TCP_EVT (TCP_EVT_TIMER_POP, connection_index, timer_id);
tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID;
+ tc->pending_timers |= (1 << timer_id);
}
clib_fifo_add (wrk->pending_timers, expired_timers, n_expired);
session_queue_run_on_main_thread (wrk->vm);
}
-static void
-tcp_initialize_timer_wheels (tcp_main_t * tm)
-{
- tw_timer_wheel_16t_2w_512sl_t *tw;
- /* *INDENT-OFF* */
- foreach_vlib_main (({
- tw = &tm->wrk_ctx[ii].timer_wheel;
- tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch,
- TCP_TIMER_TICK, ~0);
- tw->last_run_time = vlib_time_now (this_vlib_main);
- }));
- /* *INDENT-ON* */
-}
-
static void
tcp_initialize_iss_seed (tcp_main_t * tm)
{
vec_reset_length (wrk->pending_deq_acked);
vec_reset_length (wrk->pending_disconnects);
vec_reset_length (wrk->pending_resets);
- wrk->vm = vlib_mains[thread];
+ wrk->vm = vlib_get_main_by_index (thread);
wrk->max_timers_per_loop = 10;
if (thread > 0)
*/
if ((thread > 0 || num_threads == 1) && prealloc_conn_per_wrk)
pool_init_fixed (wrk->connections, prealloc_conn_per_wrk);
+
+ tcp_timer_initialize_wheel (&wrk->timer_wheel,
+ tcp_expired_timers_dispatch,
+ vlib_time_now (vm));
}
/*
pool_init_fixed (tm->half_open_connections,
tcp_cfg.preallocated_half_open_connections);
- /* Initialize clocks per tick for TCP timestamp. Used to compute
- * monotonically increasing timestamps. */
- tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock
- / TCP_TSTAMP_RESOLUTION;
-
if (num_threads > 1)
{
clib_spinlock_init (&tm->half_open_lock);
}
- tcp_initialize_timer_wheels (tm);
tcp_initialize_iss_seed (tm);
tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm);
tcp_cfg.enable_tx_pacing = 1;
tcp_cfg.allow_tso = 0;
tcp_cfg.csum_offload = 1;
- tcp_cfg.cc_algo = TCP_CC_NEWRENO;
+ tcp_cfg.cc_algo = TCP_CC_CUBIC;
tcp_cfg.rwnd_min_update_ack = 1;
+ tcp_cfg.max_gso_size = TCP_MAX_GSO_SZ;
+
+ /* Time constants defined as timer tick (100us) multiples */
+ tcp_cfg.closewait_time = 20000; /* 2s */
+ tcp_cfg.timewait_time = 100000; /* 10s */
+ tcp_cfg.finwait1_time = 600000; /* 60s */
+ tcp_cfg.lastack_time = 300000; /* 30s */
+ tcp_cfg.finwait2_time = 300000; /* 30s */
+ tcp_cfg.closing_time = 300000; /* 30s */
- /* Time constants defined as timer tick (100ms) multiples */
- tcp_cfg.delack_time = 1; /* 0.1s */
- tcp_cfg.closewait_time = 20; /* 2s */
- tcp_cfg.timewait_time = 100; /* 10s */
- tcp_cfg.finwait1_time = 600; /* 60s */
- tcp_cfg.lastack_time = 300; /* 30s */
- tcp_cfg.finwait2_time = 300; /* 30s */
- tcp_cfg.closing_time = 300; /* 30s */
+ /* This value is seconds */
tcp_cfg.cleanup_time = 0.1; /* 100ms */
}