{
ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval));
tc->tsval_recent = tc->rcv_opts.tsval;
{
ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval));
tc->tsval_recent = tc->rcv_opts.tsval;
/* If it just so happens that a segment updates tsval_recent for a
* segment over 24 days old, invalidate tsval_recent. */
if (timestamp_lt (tc0->tsval_recent_age + TCP_PAWS_IDLE,
/* If it just so happens that a segment updates tsval_recent for a
* segment over 24 days old, invalidate tsval_recent. */
if (timestamp_lt (tc0->tsval_recent_age + TCP_PAWS_IDLE,
if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number)
&& seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number)
&& seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
&& seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
{
tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
&& seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
{
tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
* to minimize round-off errors, here we don't. Instead, we rely on
* better precision time measurements.
* to minimize round-off errors, here we don't. Instead, we rely on
* better precision time measurements.
+ *
+ * A known limitation of the algorithm is that a drop in rtt results in a
+ * rttvar increase and bigger RTO.
+ *
+ * mrtt must be provided in @ref TCP_TICK multiples, i.e., in us. Note that
+ * timestamps are measured as ms ticks so they must be converted before
+ * calling this function.
- if (tc->srtt != 0)
- {
- err = mrtt - tc->srtt;
-
- /* XXX Drop in RTT results in RTTVAR increase and bigger RTO.
- * The increase should be bound */
- tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1);
- diff = (clib_abs (err) - (int) tc->rttvar) >> 2;
- tc->rttvar = clib_max ((int) tc->rttvar + diff, 1);
- }
- else
- {
- /* First measurement. */
- tc->srtt = mrtt;
- tc->rttvar = mrtt >> 1;
- }
+ err = mrtt - tc->srtt;
+ tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1);
+ diff = (clib_abs (err) - (int) tc->rttvar) >> 2;
+ tc->rttvar = clib_max ((int) tc->rttvar + diff, 1);
* seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
{
* seq_lt (tc->snd_una, ack). This is a condition for calling update_rtt */
else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr)
{
- u32 now = tcp_tstamp (tc);
- mrtt = clib_max (now - tc->rcv_opts.tsecr, 1);
+ mrtt = clib_max (tcp_tstamp (tc) - tc->rcv_opts.tsecr, 1);
+ mrtt *= TCP_TSTP_TO_HZ;
- mrtt = tcp_time_now_w_thread (thread_index) - tc->rcv_opts.tsecr;
- mrtt = clib_max (mrtt, 1);
+ mrtt = tcp_tstamp (tc) - tc->rcv_opts.tsecr;
+ mrtt = clib_max (mrtt, 1) * TCP_TSTP_TO_HZ;
/* Dequeue the newly ACKed bytes */
session_tx_fifo_dequeue_drop (&tc->connection, tc->burst_acked);
/* Dequeue the newly ACKed bytes */
session_tx_fifo_dequeue_drop (&tc->connection, tc->burst_acked);
- tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
-
- if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
- {
- if (seq_leq (tc->psh_seq, tc->snd_una))
- tc->flags &= ~TCP_CONN_PSH_PENDING;
- }
+ tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
* three segments that have left the network and should've been
* buffered at the receiver XXX */
if (!tcp_opts_sack_permitted (&tc->rcv_opts))
* three segments that have left the network and should've been
* buffered at the receiver XXX */
if (!tcp_opts_sack_permitted (&tc->rcv_opts))
{
/* We've probably entered recovery and the peer still has some
* of the data we've sent. Update snd_nxt and accept the ack */
{
/* We've probably entered recovery and the peer still has some
* of the data we've sent. Update snd_nxt and accept the ack */
&& seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
{
tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
&& seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
{
tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
{
/* Remove SACK blocks that have been delivered */
tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt);
{
/* Remove SACK blocks that have been delivered */
tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt);
- /* Half-open completed recently but the connection was't removed
- * yet by the owning thread */
+ /* Half-open completed or cancelled recently but the connection
+ * was't removed yet by the owning thread */
- /* Make sure the connection actually exists */
- ASSERT (tcp_lookup_connection (tc0->c_fib_index, b0,
- my_thread_index, is_ip4));
case TCP_STATE_FIN_WAIT_2:
if (vnet_buffer (b0)->tcp.data_len)
error0 = tcp_segment_rcv (wrk, tc0, b0);
case TCP_STATE_FIN_WAIT_2:
if (vnet_buffer (b0)->tcp.data_len)
error0 = tcp_segment_rcv (wrk, tc0, b0);
- lc = tcp_listener_get (vnet_buffer (b)->tcp.connection_index);
- if (PREDICT_FALSE (lc == 0))
+ /* Flags initialized with connection state after lookup */
+ if (vnet_buffer (b)->tcp.flags == TCP_STATE_LISTEN)
+ {
+ lc = tcp_listener_get (vnet_buffer (b)->tcp.connection_index);
+ }
+ else
{
tcp_connection_t *tc;
tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
thread_index);
if (tc->state != TCP_STATE_TIME_WAIT)
{
{
tcp_connection_t *tc;
tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
thread_index);
if (tc->state != TCP_STATE_TIME_WAIT)
{
error = TCP_ERROR_CREATE_EXISTS;
goto done;
}
lc = tcp_lookup_listener (b, tc->c_fib_index, is_ip4);
/* clean up the old session */
tcp_connection_del (tc);
error = TCP_ERROR_CREATE_EXISTS;
goto done;
}
lc = tcp_lookup_listener (b, tc->c_fib_index, is_ip4);
/* clean up the old session */
tcp_connection_del (tc);
+ /*
+ * This initializes elog track, must be done before synack.
+ * We also do it before possible tcp_connection_cleanup() as it
+ * generates TCP_EVT_DELETE event.
+ */
+ TCP_EVT (TCP_EVT_SYN_RCVD, child, 1);
+
if (session_stream_accept (&child->connection, lc->c_s_index,
lc->c_thread_index, 0 /* notify */ ))
{
if (session_stream_accept (&child->connection, lc->c_s_index,
lc->c_thread_index, 0 /* notify */ ))
{
- tcp_rx_trace_t *t;
- t = vlib_add_trace (vm, node, b, sizeof (*t));
- clib_memcpy_fast (&t->tcp_header, tcp_buffer_hdr (b),
- sizeof (t->tcp_header));
- clib_memcpy_fast (&t->tcp_connection, lc,
- sizeof (t->tcp_connection));
+ tcp_rx_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t));
+ tcp_set_rx_trace_data (t, lc, tcp_buffer_hdr (b), b, is_ip4);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
_(FIN_WAIT_2, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(FIN_WAIT_2, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(FIN_WAIT_2, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(FIN_WAIT_2, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(LAST_ACK, 0, TCP_INPUT_NEXT_DROP, TCP_ERROR_SEGMENT_INVALID);
_(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(LAST_ACK, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(LAST_ACK, 0, TCP_INPUT_NEXT_DROP, TCP_ERROR_SEGMENT_INVALID);
_(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(LAST_ACK, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);