- if (seq_gt (sb->high_sacked, left->end))
- {
- bytes = sb->high_sacked - left->end;
- blks = 1;
- }
-
- while ((right = left)
- && bytes < (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss
- && blks < TCP_DUPACK_THRESHOLD
- /* left not updated if above conditions fail */
- && (left = scoreboard_prev_hole (sb, right)))
- {
- bytes += right->start - left->end;
- blks++;
- }
-
- /* left is first lost */
- if (left)
- {
- do
- {
- sb->lost_bytes += scoreboard_hole_bytes (right);
- left->is_lost = 1;
- left = scoreboard_prev_hole (sb, right);
- if (left)
- bytes += right->start - left->end;
- }
- while ((right = left));
- }
-
- sb->sacked_bytes = bytes;
-}
-
-/**
- * Figure out the next hole to retransmit
- *
- * Follows logic proposed in RFC6675 Sec. 4, NextSeg()
- */
-sack_scoreboard_hole_t *
-scoreboard_next_rxt_hole (sack_scoreboard_t * sb,
- sack_scoreboard_hole_t * start,
- u8 have_unsent, u8 * can_rescue, u8 * snd_limited)
-{
- sack_scoreboard_hole_t *hole = 0;
-
- hole = start ? start : scoreboard_first_hole (sb);
- while (hole && seq_leq (hole->end, sb->high_rxt) && hole->is_lost)
- hole = scoreboard_next_hole (sb, hole);
-
- /* Nothing, return */
- if (!hole)
- {
- sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
- return 0;
- }
-
- /* Rule (1): if higher than rxt, less than high_sacked and lost */
- if (hole->is_lost && seq_lt (hole->start, sb->high_sacked))
- {
- sb->cur_rxt_hole = scoreboard_hole_index (sb, hole);
- }
- else
- {
- /* Rule (2): available unsent data */
- if (have_unsent)
- {
- sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
- return 0;
- }
- /* Rule (3): if hole not lost */
- else if (seq_lt (hole->start, sb->high_sacked))
- {
- *snd_limited = 0;
- sb->cur_rxt_hole = scoreboard_hole_index (sb, hole);
- }
- /* Rule (4): if hole beyond high_sacked */
- else
- {
- ASSERT (seq_geq (hole->start, sb->high_sacked));
- *snd_limited = 1;
- *can_rescue = 1;
- /* HighRxt MUST NOT be updated */
- return 0;
- }
- }
-
- if (hole && seq_lt (sb->high_rxt, hole->start))
- sb->high_rxt = hole->start;
-
- return hole;
-}
-
-static void
-scoreboard_init_high_rxt (sack_scoreboard_t * sb, u32 snd_una)
-{
- sack_scoreboard_hole_t *hole;
- hole = scoreboard_first_hole (sb);
- if (hole)
- {
- snd_una = seq_gt (snd_una, hole->start) ? snd_una : hole->start;
- sb->cur_rxt_hole = sb->head;
- }
- sb->high_rxt = snd_una;
- sb->rescue_rxt = snd_una - 1;
-}
-
-void
-scoreboard_init (sack_scoreboard_t * sb)
-{
- sb->head = TCP_INVALID_SACK_HOLE_INDEX;
- sb->tail = TCP_INVALID_SACK_HOLE_INDEX;
- sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
-}
-
-void
-scoreboard_clear (sack_scoreboard_t * sb)
-{
- sack_scoreboard_hole_t *hole;
- while ((hole = scoreboard_first_hole (sb)))
- {
- scoreboard_remove_hole (sb, hole);
- }
- ASSERT (sb->head == sb->tail && sb->head == TCP_INVALID_SACK_HOLE_INDEX);
- ASSERT (pool_elts (sb->holes) == 0);
- sb->sacked_bytes = 0;
- sb->last_sacked_bytes = 0;
- sb->last_bytes_delivered = 0;
- sb->snd_una_adv = 0;
- sb->high_sacked = 0;
- sb->high_rxt = 0;
- sb->lost_bytes = 0;
- sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
-}
-
-/**
- * Test that scoreboard is sane after recovery
- *
- * Returns 1 if scoreboard is empty or if first hole beyond
- * snd_una.
- */
-static u8
-tcp_scoreboard_is_sane_post_recovery (tcp_connection_t * tc)
-{
- sack_scoreboard_hole_t *hole;
- hole = scoreboard_first_hole (&tc->sack_sb);
- return (!hole || (seq_geq (hole->start, tc->snd_una)
- && seq_lt (hole->end, tc->snd_una_max)));
-}
-
-void
-tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
-{
- sack_scoreboard_t *sb = &tc->sack_sb;
- sack_block_t *blk, tmp;
- sack_scoreboard_hole_t *hole, *next_hole, *last_hole;
- u32 blk_index = 0, old_sacked_bytes, hole_index;
- int i, j;
-
- sb->last_sacked_bytes = 0;
- sb->last_bytes_delivered = 0;
- sb->snd_una_adv = 0;
-
- if (!tcp_opts_sack (&tc->rcv_opts)
- && sb->head == TCP_INVALID_SACK_HOLE_INDEX)
- return;
-
- old_sacked_bytes = sb->sacked_bytes;
-
- /* Remove invalid blocks */
- blk = tc->rcv_opts.sacks;
- while (blk < vec_end (tc->rcv_opts.sacks))
- {
- if (seq_lt (blk->start, blk->end)
- && seq_gt (blk->start, tc->snd_una)
- && seq_gt (blk->start, ack)
- && seq_lt (blk->start, tc->snd_una_max)
- && seq_leq (blk->end, tc->snd_una_max))
- {
- blk++;
- continue;
- }
- vec_del1 (tc->rcv_opts.sacks, blk - tc->rcv_opts.sacks);
- }
-
- /* Add block for cumulative ack */
- if (seq_gt (ack, tc->snd_una))
- {
- tmp.start = tc->snd_una;
- tmp.end = ack;
- vec_add1 (tc->rcv_opts.sacks, tmp);
- }
-
- if (vec_len (tc->rcv_opts.sacks) == 0)
- return;
-
- tcp_scoreboard_trace_add (tc, ack);
-
- /* Make sure blocks are ordered */
- for (i = 0; i < vec_len (tc->rcv_opts.sacks); i++)
- for (j = i + 1; j < vec_len (tc->rcv_opts.sacks); j++)
- if (seq_lt (tc->rcv_opts.sacks[j].start, tc->rcv_opts.sacks[i].start))
- {
- tmp = tc->rcv_opts.sacks[i];
- tc->rcv_opts.sacks[i] = tc->rcv_opts.sacks[j];
- tc->rcv_opts.sacks[j] = tmp;
- }
-
- if (sb->head == TCP_INVALID_SACK_HOLE_INDEX)
- {
- /* If no holes, insert the first that covers all outstanding bytes */
- last_hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX,
- tc->snd_una, tc->snd_una_max);
- sb->tail = scoreboard_hole_index (sb, last_hole);
- tmp = tc->rcv_opts.sacks[vec_len (tc->rcv_opts.sacks) - 1];
- sb->high_sacked = tmp.end;
- }
- else
- {
- /* If we have holes but snd_una_max is beyond the last hole, update
- * last hole end */
- tmp = tc->rcv_opts.sacks[vec_len (tc->rcv_opts.sacks) - 1];
- last_hole = scoreboard_last_hole (sb);
- if (seq_gt (tc->snd_una_max, last_hole->end))
- {
- if (seq_geq (last_hole->start, sb->high_sacked))
- {
- last_hole->end = tc->snd_una_max;
- }
- /* New hole after high sacked block */
- else if (seq_lt (sb->high_sacked, tc->snd_una_max))
- {
- scoreboard_insert_hole (sb, sb->tail, sb->high_sacked,
- tc->snd_una_max);
- }
- }
- /* Keep track of max byte sacked for when the last hole
- * is acked */
- if (seq_gt (tmp.end, sb->high_sacked))
- sb->high_sacked = tmp.end;
- }
-
- /* Walk the holes with the SACK blocks */
- hole = pool_elt_at_index (sb->holes, sb->head);
- while (hole && blk_index < vec_len (tc->rcv_opts.sacks))
- {
- blk = &tc->rcv_opts.sacks[blk_index];
- if (seq_leq (blk->start, hole->start))
- {
- /* Block covers hole. Remove hole */
- if (seq_geq (blk->end, hole->end))
- {
- next_hole = scoreboard_next_hole (sb, hole);
-
- /* Byte accounting: snd_una needs to be advanced */
- if (blk->end == ack)
- {
- if (next_hole)
- {
- if (seq_lt (ack, next_hole->start))
- sb->snd_una_adv = next_hole->start - ack;
- sb->last_bytes_delivered +=
- next_hole->start - hole->end;
- }
- else
- {
- ASSERT (seq_geq (sb->high_sacked, ack));
- sb->snd_una_adv = sb->high_sacked - ack;
- sb->last_bytes_delivered += sb->high_sacked - hole->end;
- }
- }
-
- scoreboard_remove_hole (sb, hole);
- hole = next_hole;
- }
- /* Partial 'head' overlap */
- else
- {
- if (seq_gt (blk->end, hole->start))
- {
- hole->start = blk->end;
- }
- blk_index++;
- }
- }
- else
- {
- /* Hole must be split */
- if (seq_lt (blk->end, hole->end))
- {
- hole_index = scoreboard_hole_index (sb, hole);
- next_hole = scoreboard_insert_hole (sb, hole_index, blk->end,
- hole->end);
-
- /* Pool might've moved */
- hole = scoreboard_get_hole (sb, hole_index);
- hole->end = blk->start;
- blk_index++;
- ASSERT (hole->next == scoreboard_hole_index (sb, next_hole));
- }
- else if (seq_lt (blk->start, hole->end))
- {
- hole->end = blk->start;
- }
- hole = scoreboard_next_hole (sb, hole);
- }
- }
-
- if (pool_elts (sb->holes) == 1)
- {
- hole = scoreboard_first_hole (sb);
- if (hole->start == ack + sb->snd_una_adv
- && hole->end == tc->snd_una_max)
- scoreboard_remove_hole (sb, hole);
- }
-
- scoreboard_update_bytes (tc, sb);
- sb->last_sacked_bytes = sb->sacked_bytes
- - (old_sacked_bytes - sb->last_bytes_delivered);
- ASSERT (sb->last_sacked_bytes <= sb->sacked_bytes || tcp_in_recovery (tc));
- ASSERT (sb->sacked_bytes == 0 || tcp_in_recovery (tc)
- || sb->sacked_bytes < tc->snd_una_max - seq_max (tc->snd_una, ack));
- ASSERT (sb->last_sacked_bytes + sb->lost_bytes <= tc->snd_una_max
- - seq_max (tc->snd_una, ack) || tcp_in_recovery (tc));
- ASSERT (sb->head == TCP_INVALID_SACK_HOLE_INDEX || tcp_in_recovery (tc)
- || sb->holes[sb->head].start == ack + sb->snd_una_adv);
- TCP_EVT_DBG (TCP_EVT_CC_SCOREBOARD, tc);
-}
-
-/**
- * Try to update snd_wnd based on feedback received from peer.
- *
- * If successful, and new window is 'effectively' 0, activate persist
- * timer.
- */
-static void
-tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd)
-{
- /* If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and SND.WL2 =< SEG.ACK)), set
- * SND.WND <- SEG.WND, set SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK */
- if (seq_lt (tc->snd_wl1, seq)
- || (tc->snd_wl1 == seq && seq_leq (tc->snd_wl2, ack)))
- {
- tc->snd_wnd = snd_wnd;
- tc->snd_wl1 = seq;
- tc->snd_wl2 = ack;
- TCP_EVT_DBG (TCP_EVT_SND_WND, tc);
-
- if (PREDICT_FALSE (tc->snd_wnd < tc->snd_mss))
- {
- /* Set persist timer if not set and we just got 0 wnd */
- if (!tcp_timer_is_active (tc, TCP_TIMER_PERSIST)
- && !tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT))
- tcp_persist_timer_set (tc);
- }
- else
- {
- tcp_persist_timer_reset (tc);
- if (PREDICT_FALSE (!tcp_in_recovery (tc) && tc->rto_boff > 0))
- {
- tc->rto_boff = 0;
- tcp_update_rto (tc);
- }
- }
- }
-}
-
-/**
- * Init loss recovery/fast recovery.
- *
- * Triggered by dup acks as opposed to timer timeout. Note that cwnd is
- * updated in @ref tcp_cc_handle_event after fast retransmit
- */
-void
-tcp_cc_init_congestion (tcp_connection_t * tc)
-{
- tcp_fastrecovery_on (tc);
- tc->snd_congestion = tc->snd_una_max;
- tc->cwnd_acc_bytes = 0;
- tc->snd_rxt_bytes = 0;
- tc->prev_ssthresh = tc->ssthresh;
- tc->prev_cwnd = tc->cwnd;
- tc->cc_algo->congestion (tc);
- TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 4);
-}
-
-static void
-tcp_cc_recovery_exit (tcp_connection_t * tc)
-{
- tc->rto_boff = 0;
- tcp_update_rto (tc);
- tc->snd_rxt_ts = 0;
- tc->snd_nxt = tc->snd_una_max;
- tc->rtt_ts = 0;
- tcp_recovery_off (tc);
- TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
-}
-
-void
-tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
-{
- tc->cc_algo->recovered (tc);
- tc->snd_rxt_bytes = 0;
- tc->rcv_dupacks = 0;
- tc->snd_nxt = tc->snd_una_max;
- tc->snd_rxt_bytes = 0;
- tc->rtt_ts = 0;
-
- tcp_fastrecovery_off (tc);
- tcp_fastrecovery_first_off (tc);
-
- TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
-}
-
-static void
-tcp_cc_congestion_undo (tcp_connection_t * tc)
-{
- tc->cwnd = tc->prev_cwnd;
- tc->ssthresh = tc->prev_ssthresh;
- tc->snd_nxt = tc->snd_una_max;
- tc->rcv_dupacks = 0;
- if (tcp_in_recovery (tc))
- tcp_cc_recovery_exit (tc);
- else if (tcp_in_fastrecovery (tc))
- tcp_cc_fastrecovery_exit (tc);
- ASSERT (tc->rto_boff == 0);
- TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 5);
-}
-
-static inline u8
-tcp_cc_is_spurious_timeout_rxt (tcp_connection_t * tc)
-{
- return (tcp_in_recovery (tc) && tc->rto_boff == 1
- && tc->snd_rxt_ts
- && tcp_opts_tstamp (&tc->rcv_opts)
- && timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts));
-}
-
-static inline u8
-tcp_cc_is_spurious_fast_rxt (tcp_connection_t * tc)
-{
- return (tcp_in_fastrecovery (tc)
- && tc->cwnd > tc->ssthresh + 3 * tc->snd_mss);
-}
-
-static u8
-tcp_cc_is_spurious_retransmit (tcp_connection_t * tc)
-{
- return (tcp_cc_is_spurious_timeout_rxt (tc)
- || tcp_cc_is_spurious_fast_rxt (tc));
-}
-
-static int
-tcp_cc_recover (tcp_connection_t * tc)
-{
- ASSERT (tcp_in_cong_recovery (tc));
- if (tcp_cc_is_spurious_retransmit (tc))
- {
- tcp_cc_congestion_undo (tc);
- return 1;
- }
-
- if (tcp_in_recovery (tc))
- tcp_cc_recovery_exit (tc);
- else if (tcp_in_fastrecovery (tc))
- tcp_cc_fastrecovery_exit (tc);
-
- ASSERT (tc->rto_boff == 0);
- ASSERT (!tcp_in_cong_recovery (tc));
- ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
- return 0;
-}
-
-static void
-tcp_cc_update (tcp_connection_t * tc, vlib_buffer_t * b)
-{
- ASSERT (!tcp_in_cong_recovery (tc) || tcp_is_lost_fin (tc));
-
- /* Congestion avoidance */
- tcp_cc_rcv_ack (tc);
-
- /* If a cumulative ack, make sure dupacks is 0 */
- tc->rcv_dupacks = 0;
-
- /* When dupacks hits the threshold we only enter fast retransmit if
- * cumulative ack covers more than snd_congestion. Should snd_una
- * wrap this test may fail under otherwise valid circumstances.
- * Therefore, proactively update snd_congestion when wrap detected. */
- if (PREDICT_FALSE
- (seq_leq (tc->snd_congestion, tc->snd_una - tc->bytes_acked)
- && seq_gt (tc->snd_congestion, tc->snd_una)))
- tc->snd_congestion = tc->snd_una - 1;
-}
-
-static u8
-tcp_should_fastrecover_sack (tcp_connection_t * tc)
-{
- return (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss < tc->sack_sb.sacked_bytes;
-}
-
-static u8
-tcp_should_fastrecover (tcp_connection_t * tc)
-{
- return (tc->rcv_dupacks == TCP_DUPACK_THRESHOLD
- || tcp_should_fastrecover_sack (tc));
-}
-
-void
-tcp_program_fastretransmit (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
-{
- if (!(tc->flags & TCP_CONN_FRXT_PENDING))