Type: feature
To avoid excessive bursts, pacer must be provided with an estimated rtt
for the connection. That's used to compute an idle timeout, i.e., time
after which the bucket is reset to 1 mtu due to inactivity. For now,
idle timeout is computed as 5% of the rtt.
Change-Id: Ia0b752fe7b4ad0ce97b477fb886b0133a2321541
Signed-off-by: Florin Coras <fcoras@cisco.com>
/* Init data structures */
memset (tc, 0, sizeof (*tc));
session_main.wrk[thread_index].last_vlib_time = 1;
/* Init data structures */
memset (tc, 0, sizeof (*tc));
session_main.wrk[thread_index].last_vlib_time = 1;
- transport_connection_tx_pacer_update (&tc->connection, rate);
+ transport_connection_tx_pacer_update (&tc->connection, rate, 1e6);
tcp_bt_init (tc);
bt = tc->bt;
tcp_bt_init (tc);
bt = tc->bt;
return SESSION_TX_NO_DATA;
}
return SESSION_TX_NO_DATA;
}
- ctx->snd_space = transport_connection_snd_space (ctx->tc, ctx->snd_mss);
+ ctx->snd_space = transport_connection_snd_space (ctx->tc);
/* This flow queue is "empty" so it should be re-evaluated before
* the ones that have data to send. */
/* This flow queue is "empty" so it should be re-evaluated before
* the ones that have data to send. */
- if (ctx->snd_space == 0)
{
session_evt_add_head_old (wrk, elt);
return SESSION_TX_NO_DATA;
}
{
session_evt_add_head_old (wrk, elt);
return SESSION_TX_NO_DATA;
}
+ if (transport_connection_is_tx_paced (ctx->tc))
+ {
+ u32 snd_space = transport_connection_tx_pacer_burst (ctx->tc);
+ if (snd_space < TRANSPORT_PACER_MIN_BURST)
+ {
+ session_evt_add_head_old (wrk, elt);
+ return SESSION_TX_NO_DATA;
+ }
+ snd_space = clib_min (ctx->snd_space, snd_space);
+ ctx->snd_space = snd_space >= ctx->snd_mss ?
+ snd_space - snd_space % ctx->snd_mss : snd_space;
+ }
+
/* Allow enqueuing of a new event */
svm_fifo_unset_event (ctx->s->tx_fifo);
/* Allow enqueuing of a new event */
svm_fifo_unset_event (ctx->s->tx_fifo);
if (PREDICT_FALSE (!ctx->max_len_to_snd))
{
if (PREDICT_FALSE (!ctx->max_len_to_snd))
{
- transport_connection_tx_pacer_reset_bucket (ctx->tc);
+ transport_connection_tx_pacer_reset_bucket (ctx->tc, 0);
return SESSION_TX_NO_DATA;
}
return SESSION_TX_NO_DATA;
}
now = transport_us_time_now (thread_index);
diff = now - pacer->last_update;
now = transport_us_time_now (thread_index);
diff = now - pacer->last_update;
- s = format (s, "rate %lu bucket %lu t/p %.3f last_update %U",
+ s = format (s, "rate %lu bucket %lu t/p %.3f last_update %U idle %u",
pacer->bytes_per_sec, pacer->bucket, pacer->tokens_per_period,
pacer->bytes_per_sec, pacer->bucket, pacer->tokens_per_period,
- format_clib_us_time, diff);
+ format_clib_us_time, diff, pacer->idle_timeout_us);
u64 n_periods = (time_now - pacer->last_update);
u64 inc;
u64 n_periods = (time_now - pacer->last_update);
u64 inc;
- if (PREDICT_FALSE (n_periods > 5e4))
+ if (PREDICT_FALSE (n_periods > pacer->idle_timeout_us))
{
pacer->last_update = time_now;
pacer->bucket = TRANSPORT_PACER_MIN_BURST;
return TRANSPORT_PACER_MIN_BURST;
}
{
pacer->last_update = time_now;
pacer->bucket = TRANSPORT_PACER_MIN_BURST;
return TRANSPORT_PACER_MIN_BURST;
}
- if (n_periods > 0
- && (inc = (f32) n_periods * pacer->tokens_per_period) > 10)
+ if ((inc = (f32) n_periods * pacer->tokens_per_period) > 10)
{
pacer->last_update = time_now;
pacer->bucket = clib_min (pacer->bucket + inc, pacer->bytes_per_sec);
{
pacer->last_update = time_now;
pacer->bucket = clib_min (pacer->bucket + inc, pacer->bytes_per_sec);
-spacer_set_pace_rate (spacer_t * pacer, u64 rate_bytes_per_sec)
+spacer_set_pace_rate (spacer_t * pacer, u64 rate_bytes_per_sec,
+ clib_us_time_t rtt)
{
ASSERT (rate_bytes_per_sec != 0);
pacer->bytes_per_sec = rate_bytes_per_sec;
pacer->tokens_per_period = rate_bytes_per_sec * CLIB_US_TIME_PERIOD;
{
ASSERT (rate_bytes_per_sec != 0);
pacer->bytes_per_sec = rate_bytes_per_sec;
pacer->tokens_per_period = rate_bytes_per_sec * CLIB_US_TIME_PERIOD;
+ pacer->idle_timeout_us = clib_max (rtt * TRANSPORT_PACER_IDLE_FACTOR,
+ TRANSPORT_PACER_MIN_IDLE);
void
transport_connection_tx_pacer_reset (transport_connection_t * tc,
void
transport_connection_tx_pacer_reset (transport_connection_t * tc,
- u64 rate_bytes_per_sec, u32 start_bucket)
+ u64 rate_bytes_per_sec, u32 start_bucket,
+ clib_us_time_t rtt)
- spacer_set_pace_rate (&tc->pacer, rate_bytes_per_sec);
+ spacer_set_pace_rate (&tc->pacer, rate_bytes_per_sec, rtt);
spacer_reset (&tc->pacer, transport_us_time_now (tc->thread_index),
start_bucket);
}
void
spacer_reset (&tc->pacer, transport_us_time_now (tc->thread_index),
start_bucket);
}
void
-transport_connection_tx_pacer_reset_bucket (transport_connection_t * tc)
+transport_connection_tx_pacer_reset_bucket (transport_connection_t * tc,
+ u32 bucket)
- spacer_reset (&tc->pacer, transport_us_time_now (tc->thread_index), 0);
+ spacer_reset (&tc->pacer, transport_us_time_now (tc->thread_index), bucket);
{
tc->flags |= TRANSPORT_CONNECTION_F_IS_TX_PACED;
transport_connection_tx_pacer_reset (tc, rate_bytes_per_sec,
{
tc->flags |= TRANSPORT_CONNECTION_F_IS_TX_PACED;
transport_connection_tx_pacer_reset (tc, rate_bytes_per_sec,
}
void
transport_connection_tx_pacer_update (transport_connection_t * tc,
}
void
transport_connection_tx_pacer_update (transport_connection_t * tc,
+ u64 bytes_per_sec, clib_us_time_t rtt)
- spacer_set_pace_rate (&tc->pacer, bytes_per_sec);
+ spacer_set_pace_rate (&tc->pacer, bytes_per_sec, rtt);
transport_us_time_now (tc->thread_index));
}
transport_us_time_now (tc->thread_index));
}
-u32
-transport_connection_snd_space (transport_connection_t * tc, u16 mss)
-{
- u32 snd_space, max_paced_burst;
-
- snd_space = tp_vfts[tc->proto].send_space (tc);
- if (snd_space && transport_connection_is_tx_paced (tc))
- {
- clib_us_time_t now = transport_us_time_now (tc->thread_index);
- max_paced_burst = spacer_max_burst (&tc->pacer, now);
- max_paced_burst =
- (max_paced_burst < TRANSPORT_PACER_MIN_BURST) ? 0 : max_paced_burst;
- snd_space = clib_min (snd_space, max_paced_burst);
- return snd_space >= mss ? snd_space - snd_space % mss : snd_space;
- }
- return snd_space;
-}
-
u64
transport_connection_tx_pacer_rate (transport_connection_t * tc)
{
u64
transport_connection_tx_pacer_rate (transport_connection_t * tc)
{
#define TRANSPORT_PACER_MIN_MSS 1460
#define TRANSPORT_PACER_MIN_BURST TRANSPORT_PACER_MIN_MSS
#define TRANSPORT_PACER_MAX_BURST (43 * TRANSPORT_PACER_MIN_MSS)
#define TRANSPORT_PACER_MIN_MSS 1460
#define TRANSPORT_PACER_MIN_BURST TRANSPORT_PACER_MIN_MSS
#define TRANSPORT_PACER_MAX_BURST (43 * TRANSPORT_PACER_MIN_MSS)
+#define TRANSPORT_PACER_MIN_IDLE 100
+#define TRANSPORT_PACER_IDLE_FACTOR 0.05
typedef struct _transport_options_t
{
typedef struct _transport_options_t
{
return tp_vfts[tp].app_rx_evt (tc);
}
return tp_vfts[tp].app_rx_evt (tc);
}
+/**
+ * Get maximum tx burst allowed for transport connection
+ *
+ * @param tc transport connection
+ */
+static inline u32
+transport_connection_snd_space (transport_connection_t * tc)
+{
+ return tp_vfts[tc->proto].send_space (tc);
+}
+
void transport_register_protocol (transport_proto_t transport_proto,
const transport_proto_vft_t * vft,
fib_protocol_t fib_proto, u32 output_node);
void transport_register_protocol (transport_proto_t transport_proto,
const transport_proto_vft_t * vft,
fib_protocol_t fib_proto, u32 output_node);
void transport_connection_tx_pacer_reset (transport_connection_t * tc,
u64 rate_bytes_per_sec,
void transport_connection_tx_pacer_reset (transport_connection_t * tc,
u64 rate_bytes_per_sec,
+ u32 initial_bucket,
+ clib_us_time_t rtt);
/**
* Initialize tx pacer for connection
*
/**
* Initialize tx pacer for connection
*
*
* @param tc transport connection
* @param bytes_per_sec new pacing rate
*
* @param tc transport connection
* @param bytes_per_sec new pacing rate
+ * @param rtt connection rtt that is used to compute
+ * inactivity time after which pacer bucket is
+ * reset to 1 mtu
*/
void transport_connection_tx_pacer_update (transport_connection_t * tc,
*/
void transport_connection_tx_pacer_update (transport_connection_t * tc,
- u64 bytes_per_sec);
-
-/**
- * Get maximum tx burst allowed for transport connection
- *
- * @param tc transport connection
- * @param time_now current cpu time as returned by @ref clib_cpu_time_now
- * @param mss transport's mss
- */
-u32 transport_connection_snd_space (transport_connection_t * tc, u16 mss);
+ u64 bytes_per_sec,
+ clib_us_time_t rtt);
/**
* Get tx pacer max burst
/**
* Get tx pacer max burst
* Reset tx pacer bucket
*
* @param tc transport connection
* Reset tx pacer bucket
*
* @param tc transport connection
- * @param time_now current cpu time
+ * @param bucket value the bucket will be reset to
-void transport_connection_tx_pacer_reset_bucket (transport_connection_t * tc);
+void transport_connection_tx_pacer_reset_bucket (transport_connection_t * tc,
+ u32 bucket);
/**
* Check if transport connection is paced
/**
* Check if transport connection is paced
u64 bucket;
clib_us_time_t last_update;
f32 tokens_per_period;
u64 bucket;
clib_us_time_t last_update;
f32 tokens_per_period;
} spacer_t;
#define TRANSPORT_CONN_ID_LEN 44
} spacer_t;
#define TRANSPORT_CONN_ID_LEN 44
if (!transport_connection_is_tx_paced (&tc->connection))
return;
if (!transport_connection_is_tx_paced (&tc->connection))
return;
+ f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
+
transport_connection_tx_pacer_update (&tc->connection,
transport_connection_tx_pacer_update (&tc->connection,
- tcp_cc_get_pacing_rate (tc));
+ tcp_cc_get_pacing_rate (tc),
+ srtt * CLIB_US_TIME_FREQ);
{
f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
u64 rate = (u64) window / srtt;
{
f64 srtt = clib_min ((f64) tc->srtt * TCP_TICK, tc->mrtt_us);
u64 rate = (u64) window / srtt;
- transport_connection_tx_pacer_reset (&tc->connection, rate, start_bucket);
+ transport_connection_tx_pacer_reset (&tc->connection, rate, start_bucket,
+ srtt * CLIB_US_TIME_FREQ);
u32 prr_start; /**< snd_una when prr starts */
u32 rxt_delivered; /**< Rxt bytes delivered during current cc event */
u32 rxt_head; /**< snd_una last time we re rxted the head */
u32 prr_start; /**< snd_una when prr starts */
u32 rxt_delivered; /**< Rxt bytes delivered during current cc event */
u32 rxt_head; /**< snd_una last time we re rxted the head */
- u32 prev_dsegs_out; /**< Number of dsegs after last ack */
u32 tsecr_last_ack; /**< Timestamp echoed to us in last healthy ACK */
u32 snd_congestion; /**< snd_una_max when congestion is detected */
u32 tx_fifo_size; /**< Tx fifo size. Used to constrain cwnd */
u32 tsecr_last_ack; /**< Timestamp echoed to us in last healthy ACK */
u32 snd_congestion; /**< snd_una_max when congestion is detected */
u32 tx_fifo_size; /**< Tx fifo size. Used to constrain cwnd */
-always_inline u8
-tcp_recovery_no_snd_space (tcp_connection_t * tc)
-{
- u32 space;
-
- ASSERT (tcp_in_cong_recovery (tc));
-
- if (tcp_in_recovery (tc))
- space = tcp_available_output_snd_space (tc);
- else
- space = tcp_fastrecovery_prr_snd_space (tc);
-
- return (space < tc->snd_mss + tc->burst_acked);
-}
-
/**
* Dequeue bytes for connections that have received acks in last burst
*/
/**
* Dequeue bytes for connections that have received acks in last burst
*/
tc = tcp_connection_get (pending_deq_acked[i], thread_index);
tc->flags &= ~TCP_CONN_DEQ_PENDING;
tc = tcp_connection_get (pending_deq_acked[i], thread_index);
tc->flags &= ~TCP_CONN_DEQ_PENDING;
- if (tc->burst_acked)
- {
- /* Dequeue the newly ACKed bytes */
- session_tx_fifo_dequeue_drop (&tc->connection, tc->burst_acked);
- tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
-
- if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
- {
- if (seq_leq (tc->psh_seq, tc->snd_una))
- tc->flags &= ~TCP_CONN_PSH_PENDING;
- }
+ if (PREDICT_FALSE (!tc->burst_acked))
+ continue;
- /* If everything has been acked, stop retransmit timer
- * otherwise update. */
- tcp_retransmit_timer_update (tc);
+ /* Dequeue the newly ACKed bytes */
+ session_tx_fifo_dequeue_drop (&tc->connection, tc->burst_acked);
+ tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
- /* Update pacer based on our new cwnd estimate */
- tcp_connection_tx_pacer_update (tc);
+ if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
+ {
+ if (seq_leq (tc->psh_seq, tc->snd_una))
+ tc->flags &= ~TCP_CONN_PSH_PENDING;
- /* Reset the pacer if we've been idle, i.e., no data sent or if
- * we're in recovery and snd space constrained */
- if (tc->data_segs_out == tc->prev_dsegs_out
- || (tcp_in_cong_recovery (tc) && tcp_recovery_no_snd_space (tc)))
- transport_connection_tx_pacer_reset_bucket (&tc->connection);
+ /* If everything has been acked, stop retransmit timer
+ * otherwise update. */
+ tcp_retransmit_timer_update (tc);
+
+ /* Update pacer based on our new cwnd estimate */
+ tcp_connection_tx_pacer_update (tc);
- tc->prev_dsegs_out = tc->data_segs_out;
tc->burst_acked = 0;
}
_vec_len (wrk->pending_deq_acked) = 0;
tc->burst_acked = 0;
}
_vec_len (wrk->pending_deq_acked) = 0;
if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_sample_delivery_rate (tc, &rs);
if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_sample_delivery_rate (tc, &rs);
- tcp_program_dequeue (wrk, tc);
-
- tcp_update_rtt (tc, &rs, vnet_buffer (b)->tcp.ack_number);
+ {
+ tcp_program_dequeue (wrk, tc);
+ tcp_update_rtt (tc, &rs, vnet_buffer (b)->tcp.ack_number);
+ }
TCP_EVT (TCP_EVT_ACK_RCVD, tc);
TCP_EVT (TCP_EVT_ACK_RCVD, tc);
if (tc->snd_una == tc->snd_nxt)
{
tcp_cc_event (tc, TCP_CC_EVT_START_TX);
if (tc->snd_una == tc->snd_nxt)
{
tcp_cc_event (tc, TCP_CC_EVT_START_TX);
- tcp_connection_tx_pacer_reset (tc, tc->cwnd, TRANSPORT_PACER_MIN_MSS);
+ tcp_connection_tx_pacer_reset (tc, tc->cwnd, TRANSPORT_PACER_MIN_BURST);
tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
u32 burst_size)
{
tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
u32 burst_size)
{
- u8 snd_limited = 0, can_rescue = 0, reset_pacer = 0;
u32 n_written = 0, offset, max_bytes, n_segs = 0;
u32 n_written = 0, offset, max_bytes, n_segs = 0;
- u32 bi, max_deq, burst_bytes, sent_bytes;
+ u8 snd_limited = 0, can_rescue = 0;
+ u32 bi, max_deq, burst_bytes;
sack_scoreboard_hole_t *hole;
vlib_main_t *vm = wrk->vm;
vlib_buffer_t *b = 0;
sack_scoreboard_hole_t *hole;
vlib_main_t *vm = wrk->vm;
vlib_buffer_t *b = 0;
snd_space = tcp_fastrecovery_prr_snd_space (tc);
if (snd_space < tc->snd_mss)
snd_space = tcp_fastrecovery_prr_snd_space (tc);
if (snd_space < tc->snd_mss)
- {
- reset_pacer = burst_bytes > tc->snd_mss;
- goto done;
- }
-
- reset_pacer = snd_space < burst_bytes;
- if (reset_pacer)
- {
- transport_connection_tx_pacer_reset_bucket (&tc->connection);
- }
- else
- {
- sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
- transport_connection_tx_pacer_update_bytes (&tc->connection,
- sent_bytes);
- }
-
+ transport_connection_tx_pacer_reset_bucket (&tc->connection, 0);