X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp_output.c;h=0d5feb976f865ab1e4db1f1a39147f9e6651a896;hb=0db9b04cf0f9c892a00988e7a61ae703aa83b721;hp=4b7915828df3b90c8bc0cdfe41eac99f7008ae3c;hpb=b26743d093141a2aef19bdf8a7fe06dcaa81329a;p=vpp.git diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 4b7915828df..0d5feb976f8 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -118,12 +118,13 @@ tcp_initial_wnd_unscaled (tcp_connection_t * tc) u32 tcp_initial_window_to_advertise (tcp_connection_t * tc) { + tcp_main_t *tm = &tcp_main; u32 max_fifo; /* Initial wnd for SYN. Fifos are not allocated yet. * Use some predefined value. For SYN-ACK we still want the * scale to be computed in the same way */ - max_fifo = TCP_MAX_RX_FIFO_SIZE; + max_fifo = tm->max_rx_fifo ? tm->max_rx_fifo : TCP_MAX_RX_FIFO_SIZE; tc->rcv_wscale = tcp_window_compute_scale (max_fifo); tc->rcv_wnd = tcp_initial_wnd_unscaled (tc); @@ -1187,7 +1188,8 @@ tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b) tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0, /* burst */ 1); tc->snd_una_max = tc->snd_nxt; - ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd)); + ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd + + tcp_fastrecovery_sent_1_smss (tc) * tc->snd_mss)); tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); /* If not tracking an ACK, start tracking */ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc)) @@ -1306,6 +1308,8 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, ASSERT (n_bytes == max_deq_bytes); b[0]->current_length = n_bytes; tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0); + if (seq_gt (tc->snd_nxt, tc->snd_una_max)) + tc->snd_una_max = tc->snd_nxt; } /* Split mss into multiple buffers */ else @@ -1369,6 +1373,8 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, } tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0); + if (seq_gt (tc->snd_nxt, tc->snd_una_max)) + tc->snd_una_max = tc->snd_nxt; } ASSERT (n_bytes > 0); @@ -1447,15 +1453,19 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) return; } - /* Shouldn't be here */ + /* Shouldn't be here. This condition is tricky because it has to take + * into account boff > 0 due to persist timeout. */ if ((tc->rto_boff == 0 && tc->snd_una == tc->snd_una_max) - || (tc->rto_boff > 0 && seq_geq (tc->snd_una, tc->snd_congestion))) + || (tc->rto_boff > 0 && seq_geq (tc->snd_una, tc->snd_congestion) + && !tcp_flight_size (tc))) { - tcp_recovery_off (tc); + ASSERT (!tcp_in_recovery (tc)); + tc->rto_boff = 0; return; } - /* We're not in recovery so make sure rto_boff is 0 */ + /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due + * to persist timer timeout */ if (!tcp_in_recovery (tc) && tc->rto_boff > 0) { tc->rto_boff = 0; @@ -1470,10 +1480,15 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (tc->rto_boff == 1) tcp_rxt_timeout_cc (tc); + /* If we've sent beyond snd_congestion, update it */ + if (seq_gt (tc->snd_una_max, tc->snd_congestion)) + tc->snd_congestion = tc->snd_una_max; + tc->snd_una_max = tc->snd_nxt = tc->snd_una; tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); - /* Send one segment. Note that n_bytes may be zero due to buffer shortfall */ + /* Send one segment. Note that n_bytes may be zero due to buffer + * shortfall */ n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b); /* TODO be less aggressive about this */ @@ -1481,7 +1496,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (n_bytes == 0) { - tcp_retransmit_timer_set (tc); + tcp_retransmit_timer_force_update (tc); return; } @@ -1492,7 +1507,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tc->snd_rxt_ts = tcp_time_now (); tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); - tcp_retransmit_timer_update (tc); + tcp_retransmit_timer_force_update (tc); } /* Retransmit for SYN */ else if (tc->state == TCP_STATE_SYN_SENT) @@ -1628,7 +1643,10 @@ tcp_timer_persist_handler (u32 index) * Try to force the first unsent segment (or buffer) */ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) - return; + { + tcp_persist_timer_set (tc); + return; + } b = vlib_get_buffer (vm, bi); data = tcp_init_buffer (vm, b); @@ -1996,8 +2014,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (n_left_from > 1) { - vlib_prefetch_buffer_header (b[0], STORE); - CLIB_PREFETCH (b[0]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (b[1], STORE); + CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); } next[0] = TCP_OUTPUT_NEXT_IP_LOOKUP;