/* 4th: check the SYN bit (in window) */
if (PREDICT_FALSE (tcp_syn (th0)))
{
+ /* As per RFC5961 send challenge ack instead of reset */
+ tcp_program_ack (wrk, tc0);
*error0 = TCP_ERROR_SPURIOUS_SYN;
- tcp_send_reset (tc0);
goto error;
}
if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number)
&& seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
- if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_una_max))
+ if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_una_max)
+ && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
{
tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
goto acceptable;
* seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
{
- u32 now = tcp_time_now_w_thread (tc->c_thread_index);
+ u32 now = tcp_tstamp (tc);
mrtt = clib_max (now - tc->rcv_opts.tsecr, 1);
}
#ifndef CLIB_MARCH_VARIANT
void
-tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
+tcp_cc_fastrecovery_clear (tcp_connection_t * tc)
{
- tc->cc_algo->recovered (tc);
tc->snd_rxt_bytes = 0;
tc->rcv_dupacks = 0;
- tc->snd_rxt_bytes = 0;
tc->rtt_ts = 0;
tcp_fastrecovery_off (tc);
}
else if (tcp_in_fastrecovery (tc))
{
- tcp_cc_fastrecovery_exit (tc);
+ tcp_cc_fastrecovery_clear (tc);
}
+ tcp_cc_undo_recovery (tc);
ASSERT (tc->rto_boff == 0);
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 5);
}
if (tcp_in_recovery (tc))
tcp_cc_recovery_exit (tc);
else if (tcp_in_fastrecovery (tc))
- tcp_cc_fastrecovery_exit (tc);
+ {
+ tcp_cc_recovered (tc);
+ tcp_cc_fastrecovery_clear (tc);
+ }
ASSERT (tc->rto_boff == 0);
ASSERT (!tcp_in_cong_recovery (tc));
}
static void
-tcp_cc_update (tcp_connection_t * tc, vlib_buffer_t * b)
+tcp_cc_update (tcp_connection_t * tc, tcp_rate_sample_t * rs)
{
ASSERT (!tcp_in_cong_recovery (tc) || tcp_is_lost_fin (tc));
/* Congestion avoidance */
- tcp_cc_rcv_ack (tc);
+ tcp_cc_rcv_ack (tc, rs);
/* If a cumulative ack, make sure dupacks is 0 */
tc->rcv_dupacks = 0;
for (i = 0; i < vec_len (ongoing_fast_rxt); i++)
{
tc = tcp_connection_get (ongoing_fast_rxt[i], thread_index);
+ if (!tc)
+ continue;
if (!tcp_in_fastrecovery (tc))
{
tc->flags &= ~TCP_CONN_FRXT_PENDING;
* One function to rule them all ... and in the darkness bind them
*/
static void
-tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack)
+tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs,
+ u32 is_dack)
{
u32 rxt_delivered;
if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD && !tc->bytes_acked)
{
ASSERT (tcp_in_fastrecovery (tc));
- tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+ tcp_cc_rcv_cong_ack (tc, TCP_CC_DUPACK, rs);
return;
}
else if (tcp_should_fastrecover (tc))
}
tcp_cc_init_congestion (tc);
- tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+ tcp_cc_rcv_cong_ack (tc, TCP_CC_DUPACK, rs);
if (tcp_opts_sack_permitted (&tc->rcv_opts))
{
else if (!tc->bytes_acked
|| (tc->bytes_acked && !tcp_in_cong_recovery (tc)))
{
- tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+ tcp_cc_rcv_cong_ack (tc, TCP_CC_DUPACK, rs);
return;
}
else
}
/* Treat as congestion avoidance ack */
- tcp_cc_rcv_ack (tc);
+ tcp_cc_rcv_ack (tc, rs);
return;
}
/* Post RTO timeout don't try anything fancy */
if (tcp_in_recovery (tc))
{
- tcp_cc_rcv_ack (tc);
+ tcp_cc_rcv_ack (tc, rs);
transport_add_tx_event (&tc->connection);
return;
}
else
{
tcp_fastrecovery_first_on (tc);
- /* Reuse last bytes delivered to track total bytes acked */
- tc->sack_sb.last_bytes_delivered += tc->bytes_acked;
if (tc->snd_rxt_bytes > tc->bytes_acked)
tc->snd_rxt_bytes -= tc->bytes_acked;
else
tc->snd_rxt_bytes = 0;
}
- tc->cc_algo->rcv_cong_ack (tc, TCP_CC_PARTIALACK);
+ tcp_cc_rcv_cong_ack (tc, TCP_CC_PARTIALACK, rs);
/*
* Since this was a partial ack, try to retransmit some more data
tcp_header_t * th, u32 * error)
{
u32 prev_snd_wnd, prev_snd_una;
+ tcp_rate_sample_t rs = { 0 };
u8 is_dack;
TCP_EVT_DBG (TCP_EVT_CC_STAT, tc);
{
/* We've probably entered recovery and the peer still has some
* of the data we've sent. Update snd_nxt and accept the ack */
- if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_una_max))
+ if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_una_max)
+ && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
{
tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
goto process_ack;
TCP_EVT_DBG (TCP_EVT_ACK_RCV_ERR, tc, 1,
vnet_buffer (b)->tcp.ack_number);
if (tcp_in_fastrecovery (tc) && tc->rcv_dupacks == TCP_DUPACK_THRESHOLD)
- tcp_cc_handle_event (tc, 1);
+ tcp_cc_handle_event (tc, 0, 1);
/* Don't drop yet */
return 0;
}
tcp_update_rtt (tc, vnet_buffer (b)->tcp.ack_number);
}
+ if (tc->flags & TCP_CONN_RATE_SAMPLE)
+ tcp_bt_sample_delivery_rate (tc, &rs);
+
TCP_EVT_DBG (TCP_EVT_ACK_RCVD, tc);
/*
if (tcp_ack_is_cc_event (tc, b, prev_snd_wnd, prev_snd_una, &is_dack))
{
- tcp_cc_handle_event (tc, is_dack);
+ tcp_cc_handle_event (tc, &rs, is_dack);
if (!tcp_in_cong_recovery (tc))
{
*error = TCP_ERROR_ACK_OK;
/*
* Update congestion control (slow start/congestion avoidance)
*/
- tcp_cc_update (tc, b);
+ tcp_cc_update (tc, &rs);
*error = TCP_ERROR_ACK_OK;
return 0;
}
tcp_rcv_fin (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
u32 * error)
{
+ /* Reject out-of-order fins */
+ if (vnet_buffer (b)->tcp.seq_end != tc->rcv_nxt)
+ return;
+
/* Account for the FIN and send ack */
tc->rcv_nxt += 1;
tcp_program_ack (wrk, tc);
newest = svm_fifo_newest_ooo_segment (s0->rx_fifo);
if (newest)
{
- offset = ooo_segment_offset (s0->rx_fifo, newest);
+ offset = ooo_segment_offset_prod (s0->rx_fifo, newest);
ASSERT (offset <= vnet_buffer (b)->tcp.seq_number - tc->rcv_nxt);
start = tc->rcv_nxt + offset;
end = start + ooo_segment_length (s0->rx_fifo, newest);
* allocate session send reset */
if (session_stream_connect_notify (&new_tc0->connection, 0))
{
- clib_warning ("connect notify fail");
tcp_send_reset_w_pkt (new_tc0, b0, my_thread_index, is_ip4);
tcp_connection_cleanup (new_tc0);
+ error0 = TCP_ERROR_CREATE_SESSION_FAIL;
goto drop;
}
tcp_connection_cleanup (new_tc0);
tcp_send_reset_w_pkt (tc0, b0, my_thread_index, is_ip4);
TCP_EVT_DBG (TCP_EVT_RST_SENT, tc0);
+ error0 = TCP_ERROR_CREATE_SESSION_FAIL;
goto drop;
}
max_dequeue = transport_max_tx_dequeue (&tc0->connection);
if (max_dequeue <= tc0->burst_acked)
tcp_send_fin (tc0);
+ /* If a fin was received and data was acked extend wait */
+ else if ((tc0->flags & TCP_CONN_FINRCVD) && tc0->bytes_acked)
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE,
+ TCP_CLOSEWAIT_TIME);
}
/* If FIN is ACKed */
else if (tc0->snd_una == tc0->snd_nxt)
break;
/* Still have outstanding tx data */
- if (transport_max_tx_dequeue (&tc0->connection))
+ max_dequeue = transport_max_tx_dequeue (&tc0->connection);
+ if (max_dequeue > tc0->burst_acked)
break;
tcp_send_fin (tc0);
break;
case TCP_STATE_FIN_WAIT_1:
tc0->rcv_nxt += 1;
- /* If data is outstanding stay in FIN_WAIT_1 and try to finish
- * sending it. */
+
if (tc0->flags & TCP_CONN_FINPNDG)
{
+ /* If data is outstanding, stay in FIN_WAIT_1 and try to finish
+ * sending it. Since we already received a fin, do not wait
+ * for too long. */
tc0->flags |= TCP_CONN_FINRCVD;
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
}
else
{
tcp_connection_set_state (tc0, TCP_STATE_CLOSING);
tcp_program_ack (wrk, tc0);
+ /* Wait for ACK for our FIN but not forever */
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
}
- /* Wait for ACK for our FIN but not forever */
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
break;
case TCP_STATE_FIN_WAIT_2:
/* Got FIN, send ACK! Be more aggressive with resource cleanup */
thread_index);
tcp_inc_counter (rcv_process, TCP_ERROR_MSG_QUEUE_FULL, errors);
tcp_handle_postponed_dequeues (wrk);
+ tcp_handle_disconnects (wrk);
vlib_buffer_free (vm, first_buffer, from_frame->n_vectors);
return from_frame->n_vectors;
tcp_connection_init_vars (child0);
child0->rto = TCP_RTO_MIN;
- TCP_EVT_DBG (TCP_EVT_SYN_RCVD, child0, 1);
if (session_stream_accept (&child0->connection, lc0->c_s_index,
- 0 /* notify */ ))
+ lc0->c_thread_index, 0 /* notify */ ))
{
tcp_connection_cleanup (child0);
error0 = TCP_ERROR_CREATE_SESSION_FAIL;
goto drop;
}
+ TCP_EVT_DBG (TCP_EVT_SYN_RCVD, child0, 1);
child0->tx_fifo_size = transport_tx_fifo_size (&child0->connection);
tcp_send_synack (child0);
tcp_timer_set (child0, TCP_TIMER_ESTABLISH, TCP_SYN_RCVD_TIME);