u32
tcp_initial_window_to_advertise (tcp_connection_t * tc)
{
+ tcp_main_t *tm = &tcp_main;
u32 max_fifo;
/* Initial wnd for SYN. Fifos are not allocated yet.
* Use some predefined value. For SYN-ACK we still want the
* scale to be computed in the same way */
- max_fifo = TCP_MAX_RX_FIFO_SIZE;
+ max_fifo = tm->max_rx_fifo ? tm->max_rx_fifo : TCP_MAX_RX_FIFO_SIZE;
tc->rcv_wscale = tcp_window_compute_scale (max_fifo);
tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
initial_wnd = tcp_initial_window_to_advertise (tc);
/* Make and write options */
- memset (&snd_opts, 0, sizeof (snd_opts));
+ clib_memset (&snd_opts, 0, sizeof (snd_opts));
tcp_opts_len = tcp_make_syn_options (&snd_opts, tc->rcv_wscale);
tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
tcp_header_t *th;
u16 initial_wnd;
- memset (snd_opts, 0, sizeof (*snd_opts));
+ clib_memset (snd_opts, 0, sizeof (*snd_opts));
tcp_reuse_buffer (vm, b);
initial_wnd = tcp_initial_window_to_advertise (tc);
u32 bi;
u8 fin_snt = 0;
- tcp_retransmit_timer_force_update (tc);
- if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
- return;
- b = vlib_get_buffer (vm, bi);
- tcp_init_buffer (vm, b);
fin_snt = tc->flags & TCP_CONN_FINSNT;
if (fin_snt)
tc->snd_nxt = tc->snd_una;
+
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ {
+ /* Out of buffers so program fin retransmit ASAP */
+ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1);
+ goto post_enqueue;
+ }
+
+ tcp_retransmit_timer_force_update (tc);
+ b = vlib_get_buffer (vm, bi);
+ tcp_init_buffer (vm, b);
tcp_make_fin (tc, b);
tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
+
+post_enqueue:
if (!fin_snt)
{
tc->flags |= TCP_CONN_FINSNT;
{
tc->snd_nxt = tc->snd_una_max;
}
- TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
}
always_inline u8
tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0,
/* burst */ 1);
tc->snd_una_max = tc->snd_nxt;
- ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
+ ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd
+ + tcp_fastrecovery_sent_1_smss (tc) * tc->snd_mss));
tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
/* If not tracking an ACK, start tracking */
if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
{
- tc->rtt_ts = tcp_time_now ();
+ tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
tc->rtt_seq = tc->snd_nxt;
}
if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
ASSERT (n_bytes == max_deq_bytes);
b[0]->current_length = n_bytes;
tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0);
+ if (seq_gt (tc->snd_nxt, tc->snd_una_max))
+ tc->snd_una_max = tc->snd_nxt;
}
/* Split mss into multiple buffers */
else
}
tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0);
+ if (seq_gt (tc->snd_nxt, tc->snd_una_max))
+ tc->snd_una_max = tc->snd_nxt;
}
ASSERT (n_bytes > 0);
/* Cleanly recover cc (also clears up fast retransmit) */
if (tcp_in_fastrecovery (tc))
- tcp_cc_fastrecovery_exit (tc);
+ {
+ /* TODO be less aggressive about this */
+ scoreboard_clear (&tc->sack_sb);
+ tcp_cc_fastrecovery_exit (tc);
+ }
/* Start again from the beginning */
tc->cc_algo->congestion (tc);
return;
}
- /* Shouldn't be here */
+ /* Shouldn't be here. This condition is tricky because it has to take
+ * into account boff > 0 due to persist timeout. */
if ((tc->rto_boff == 0 && tc->snd_una == tc->snd_una_max)
- || (tc->rto_boff > 0 && seq_geq (tc->snd_una, tc->snd_congestion)))
+ || (tc->rto_boff > 0 && seq_geq (tc->snd_una, tc->snd_congestion)
+ && !tcp_flight_size (tc)))
{
- tcp_recovery_off (tc);
+ ASSERT (!tcp_in_recovery (tc));
+ tc->rto_boff = 0;
return;
}
- /* We're not in recovery so make sure rto_boff is 0 */
+ /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due
+ * to persist timer timeout */
if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
{
tc->rto_boff = 0;
/* First retransmit timeout */
if (tc->rto_boff == 1)
tcp_rxt_timeout_cc (tc);
+ else
+ scoreboard_clear (&tc->sack_sb);
+
+ /* If we've sent beyond snd_congestion, update it */
+ if (seq_gt (tc->snd_una_max, tc->snd_congestion))
+ tc->snd_congestion = tc->snd_una_max;
tc->snd_una_max = tc->snd_nxt = tc->snd_una;
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
- /* Send one segment. Note that n_bytes may be zero due to buffer shortfall */
+ /* Send one segment. Note that n_bytes may be zero due to buffer
+ * shortfall */
n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b);
- /* TODO be less aggressive about this */
- scoreboard_clear (&tc->sack_sb);
-
if (n_bytes == 0)
{
- tcp_retransmit_timer_set (tc);
+ tcp_retransmit_timer_force_update (tc);
return;
}
tc->snd_rxt_ts = tcp_time_now ();
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
- tcp_retransmit_timer_update (tc);
+ tcp_retransmit_timer_force_update (tc);
}
/* Retransmit for SYN */
else if (tc->state == TCP_STATE_SYN_SENT)
* Try to force the first unsent segment (or buffer)
*/
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
- return;
+ {
+ tcp_persist_timer_set (tc);
+ return;
+ }
b = vlib_get_buffer (vm, bi);
data = tcp_init_buffer (vm, b);
/**
* Retransmit first unacked segment
*/
-void
+int
tcp_retransmit_first_unacked (tcp_connection_t * tc)
{
vlib_main_t *vm = vlib_get_main ();
tc->snd_nxt = tc->snd_una;
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2);
+
n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b);
if (!n_bytes)
- return;
+ return -1;
+
bi = vlib_get_buffer_index (vm, b);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
-
tc->snd_nxt = old_snd_nxt;
+
+ return 0;
}
/**
* Do fast retransmit with SACKs
*/
-void
-tcp_fast_retransmit_sack (tcp_connection_t * tc)
+int
+tcp_fast_retransmit_sack (tcp_connection_t * tc, u32 burst_size)
{
vlib_main_t *vm = vlib_get_main ();
u32 n_written = 0, offset, max_bytes, n_segs = 0;
old_snd_nxt = tc->snd_nxt;
sb = &tc->sack_sb;
snd_space = tcp_available_cc_snd_space (tc);
+ hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
if (snd_space < tc->snd_mss)
- goto done;
+ {
+ tcp_program_fastretransmit (tc);
+ goto done;
+ }
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
- hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
- while (hole && snd_space > 0 && n_segs++ < VLIB_FRAME_SIZE)
+ while (snd_space > 0 && n_segs < burst_size)
{
hole = scoreboard_next_rxt_hole (sb, hole,
tcp_fastrecovery_sent_1_smss (tc),
if (!can_rescue || !(seq_lt (sb->rescue_rxt, tc->snd_una)
|| seq_gt (sb->rescue_rxt,
tc->snd_congestion)))
- break;
+ {
+ if (tcp_fastrecovery_first (tc))
+ break;
+
+ /* We tend to lose the first segment. Try re-resending
+ * it but only once and after we've tried everything */
+ hole = scoreboard_first_hole (sb);
+ if (hole && hole->start == tc->snd_una)
+ {
+ tcp_retransmit_first_unacked (tc);
+ tcp_fastrecovery_first_on (tc);
+ n_segs += 1;
+ }
+ break;
+ }
/* If rescue rxt undefined or less than snd_una then one segment of
* up to SMSS octets that MUST include the highest outstanding
bi = vlib_get_buffer_index (vm, b);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+ n_segs += 1;
break;
}
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
ASSERT (n_written <= snd_space);
snd_space -= n_written;
+ n_segs += 1;
}
+ if (hole)
+ tcp_program_fastretransmit (tc);
+
done:
/* If window allows, send 1 SMSS of new data */
tc->snd_nxt = old_snd_nxt;
+ return n_segs;
}
/**
* Fast retransmit without SACK info
*/
-void
-tcp_fast_retransmit_no_sack (tcp_connection_t * tc)
+int
+tcp_fast_retransmit_no_sack (tcp_connection_t * tc, u32 burst_size)
{
vlib_main_t *vm = vlib_get_main ();
u32 n_written = 0, offset = 0, bi, old_snd_nxt;
- int snd_space;
+ int snd_space, n_segs = 0;
vlib_buffer_t *b;
ASSERT (tcp_in_fastrecovery (tc));
tc->snd_nxt = tc->snd_una;
snd_space = tcp_available_cc_snd_space (tc);
- while (snd_space > 0)
+ while (snd_space > 0 && n_segs < burst_size)
{
offset += n_written;
n_written = tcp_prepare_retransmit_segment (tc, offset, snd_space, &b);
bi = vlib_get_buffer_index (vm, b);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
snd_space -= n_written;
+ n_segs += 1;
}
+ /* More data to resend */
+ if (seq_lt (tc->snd_nxt, tc->snd_congestion))
+ tcp_program_fastretransmit (tc);
+
/* Restore snd_nxt. If window allows, send 1 SMSS of new data */
tc->snd_nxt = old_snd_nxt;
+
+ return n_segs;
}
/**
* Do fast retransmit
*/
-void
-tcp_fast_retransmit (tcp_connection_t * tc)
+int
+tcp_fast_retransmit (tcp_connection_t * tc, u32 burst_size)
{
if (tcp_opts_sack_permitted (&tc->rcv_opts))
- tcp_fast_retransmit_sack (tc);
+ return tcp_fast_retransmit_sack (tc, burst_size);
else
- tcp_fast_retransmit_no_sack (tc);
+ return tcp_fast_retransmit_no_sack (tc, burst_size);
}
static u32
if (n_left_from > 1)
{
- vlib_prefetch_buffer_header (b[0], STORE);
- CLIB_PREFETCH (b[0]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (b[1], STORE);
+ CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
}
next[0] = TCP_OUTPUT_NEXT_IP_LOOKUP;