X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp_input.c;h=87bacc2435468033484db6be5b396016fc4a574e;hb=9f9e969f149e40044fee9d2e47b7dd96f3ae4dfa;hp=c99d6cca6fe622568d733372b438b151a87e87db;hpb=0dbd5171344bf89d1519dae7e8ddbc056df6132e;p=vpp.git diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index c99d6cca6fe..87bacc24354 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -135,7 +135,8 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to) data = (const u8 *) (th + 1); /* Zero out all flags but those set in SYN */ - to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE); + to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE + | TCP_OPTS_FLAG_SACK); for (; opts_len > 0; opts_len -= opt_len, data += opt_len) { @@ -453,7 +454,11 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack) /* Karn's rule, part 1. Don't use retransmitted segments to estimate * RTT because they're ambiguous. */ if (tcp_in_cong_recovery (tc) || tc->sack_sb.sacked_bytes) - goto done; + { + if (tcp_in_recovery (tc)) + return 0; + goto done; + } if (tc->rtt_ts && seq_geq (ack, tc->rtt_seq)) { @@ -479,7 +484,7 @@ done: tc->rtt_ts = 0; /* If we got here something must've been ACKed so make sure boff is 0, - * even if mrrt is not valid since we update the rto lower */ + * even if mrtt is not valid since we update the rto lower */ tc->rto_boff = 0; tcp_update_rto (tc); @@ -662,39 +667,45 @@ scoreboard_insert_hole (sack_scoreboard_t * sb, u32 prev_index, static void scoreboard_update_bytes (tcp_connection_t * tc, sack_scoreboard_t * sb) { - sack_scoreboard_hole_t *hole, *prev; + sack_scoreboard_hole_t *left, *right; u32 bytes = 0, blks = 0; sb->lost_bytes = 0; sb->sacked_bytes = 0; - hole = scoreboard_last_hole (sb); - if (!hole) + left = scoreboard_last_hole (sb); + if (!left) return; - if (seq_gt (sb->high_sacked, hole->end)) + if (seq_gt (sb->high_sacked, left->end)) { - bytes = sb->high_sacked - hole->end; + bytes = sb->high_sacked - left->end; blks = 1; } - while ((prev = scoreboard_prev_hole (sb, hole)) - && (bytes < (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss - && blks < TCP_DUPACK_THRESHOLD)) + while ((right = left) + && bytes < (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss + && blks < TCP_DUPACK_THRESHOLD + /* left not updated if above conditions fail */ + && (left = scoreboard_prev_hole (sb, right))) { - bytes += hole->start - prev->end; + bytes += right->start - left->end; blks++; - hole = prev; } - while (hole) + /* left is first lost */ + if (left) { - sb->lost_bytes += scoreboard_hole_bytes (hole); - hole->is_lost = 1; - prev = hole; - hole = scoreboard_prev_hole (sb, hole); - if (hole) - bytes += prev->start - hole->end; + do + { + sb->lost_bytes += scoreboard_hole_bytes (right); + left->is_lost = 1; + left = scoreboard_prev_hole (sb, right); + if (left) + bytes += right->start - left->end; + } + while ((right = left)); } + sb->sacked_bytes = bytes; } @@ -810,7 +821,8 @@ tcp_scoreboard_is_sane_post_recovery (tcp_connection_t * tc) { sack_scoreboard_hole_t *hole; hole = scoreboard_first_hole (&tc->sack_sb); - return (!hole || seq_geq (hole->start, tc->snd_una)); + return (!hole || (seq_geq (hole->start, tc->snd_una) + && seq_lt (hole->end, tc->snd_una_max))); } void @@ -823,14 +835,15 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) int i, j; sb->last_sacked_bytes = 0; - sb->snd_una_adv = 0; - old_sacked_bytes = sb->sacked_bytes; sb->last_bytes_delivered = 0; + sb->snd_una_adv = 0; if (!tcp_opts_sack (&tc->rcv_opts) && sb->head == TCP_INVALID_SACK_HOLE_INDEX) return; + old_sacked_bytes = sb->sacked_bytes; + /* Remove invalid blocks */ blk = tc->rcv_opts.sacks; while (blk < vec_end (tc->rcv_opts.sacks)) @@ -968,6 +981,14 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) } } + if (pool_elts (sb->holes) == 1) + { + hole = scoreboard_first_hole (sb); + if (hole->start == ack + sb->snd_una_adv + && hole->end == tc->snd_una_max) + scoreboard_remove_hole (sb, hole); + } + scoreboard_update_bytes (tc, sb); sb->last_sacked_bytes = sb->sacked_bytes - (old_sacked_bytes - sb->last_bytes_delivered); @@ -1239,7 +1260,10 @@ partial_ack: * Legitimate ACK. 1) See if we can exit recovery */ /* XXX limit this only to first partial ack? */ - tcp_retransmit_timer_update (tc); + if (seq_lt (tc->snd_una, tc->snd_congestion)) + tcp_retransmit_timer_force_update (tc); + else + tcp_retransmit_timer_update (tc); if (seq_geq (tc->snd_una, tc->snd_congestion)) { @@ -1271,6 +1295,7 @@ partial_ack: { tc->cc_algo->rcv_ack (tc); tc->tsecr_last_ack = tc->rcv_opts.tsecr; + transport_add_tx_event (&tc->connection); return; } @@ -1321,10 +1346,11 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b, /* When we entered recovery, we reset snd_nxt to snd_una. Seems peer * still has the data so accept the ack */ if (tcp_in_recovery (tc) - && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_congestion) - && seq_geq (vnet_buffer (b)->tcp.ack_number, tc->snd_una)) + && seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_congestion)) { - tc->snd_una_max = tc->snd_nxt = vnet_buffer (b)->tcp.ack_number; + tc->snd_nxt = vnet_buffer (b)->tcp.ack_number; + if (seq_gt (tc->snd_nxt, tc->snd_una_max)) + tc->snd_una_max = tc->snd_nxt; goto process_ack; } @@ -1391,7 +1417,9 @@ process_ack: if (!tcp_in_cong_recovery (tc)) return 0; *error = TCP_ERROR_ACK_DUP; - return vnet_buffer (b)->tcp.data_len ? 0 : -1; + if (vnet_buffer (b)->tcp.data_len || tcp_is_fin (th)) + return 0; + return -1; } /* @@ -2272,8 +2300,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (tcp_opts_wscale (&new_tc0->rcv_opts)) new_tc0->snd_wscale = new_tc0->rcv_opts.wscale; - /* RFC1323: SYN and SYN-ACK wnd not scaled */ - new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window); + new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window) + << new_tc0->snd_wscale; new_tc0->snd_wl1 = seq0; new_tc0->snd_wl2 = ack0; @@ -2662,9 +2690,12 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc0->state = TCP_STATE_CLOSED; TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0); - /* Delete the connection/session since the pipes should be - * clear by now */ - tcp_connection_del (tc0); + + /* Don't free the connection from the data path since + * we can't ensure that we have no packets already enqueued + * to output. Rely instead on the waitclose timer */ + tcp_connection_timers_reset (tc0); + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, 1); goto drop; @@ -2725,10 +2756,10 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, case TCP_STATE_SYN_RCVD: /* Send FIN-ACK notify app and enter CLOSE-WAIT */ tcp_connection_timers_reset (tc0); - tcp_retransmit_timer_set (tc0); tcp_make_fin (tc0, b0); tc0->snd_nxt += 1; tc0->snd_una_max = tc0->snd_nxt; + tcp_retransmit_timer_set (tc0); next0 = tcp_next_output (tc0->c_is_ip4); stream_session_disconnect_notify (&tc0->connection); tc0->state = TCP_STATE_CLOSE_WAIT; @@ -3431,7 +3462,11 @@ do { \ /* ACK for for a SYN-ACK -> tcp-rcv-process. */ _(SYN_RCVD, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(SYN_RCVD, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(SYN_RCVD, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, + TCP_ERROR_NONE); _(SYN_RCVD, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(SYN_RCVD, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, + TCP_ERROR_NONE); /* SYN-ACK for a SYN */ _(SYN_SENT, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); @@ -3472,6 +3507,7 @@ do { \ _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(LAST_ACK, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(LAST_ACK, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(TIME_WAIT, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(TIME_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);