X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp_output.c;h=78148cd569511abac2919cb8c8c99b64d4072936;hb=e8ea6be8dfb626b5bb4ff3355ce8037724ce1d83;hp=dfcb4ee394ee90cfd9a10ef253ae5b8b2038d88e;hpb=273968cf2d0343b2f4e3217f25c0752f20cf03c5;p=vpp.git diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index dfcb4ee394e..78148cd5695 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -321,7 +321,6 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc) if (tc->snd_una == tc->snd_nxt) { tcp_cc_event (tc, TCP_CC_EVT_START_TX); - tcp_connection_tx_pacer_reset (tc, tc->cwnd, TRANSPORT_PACER_MIN_BURST); } if (tc->flags & TCP_CONN_PSH_PENDING) @@ -421,7 +420,7 @@ static inline void tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, u8 flags) { - tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + tcp_options_t _snd_opts = {}, *snd_opts = &_snd_opts; u8 tcp_opts_len, tcp_hdr_opts_len; tcp_header_t *th; u16 wnd; @@ -657,8 +656,8 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, u8 tcp_hdr_len, flags = 0; tcp_header_t *th, *pkt_th; u32 seq, ack, bi; - ip4_header_t *ih4, *pkt_ih4; - ip6_header_t *ih6, *pkt_ih6; + ip4_header_t *pkt_ih4; + ip6_header_t *pkt_ih6; if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) { @@ -668,6 +667,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; /* Make and write options */ tcp_hdr_len = sizeof (tcp_header_t); @@ -699,28 +699,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port, seq, ack, tcp_hdr_len, flags, 0); - - /* Swap src and dst ip */ - if (is_ip4) - { - ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40); - ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address, - &pkt_ih4->src_address, IP_PROTOCOL_TCP, - tcp_csum_offload (tc)); - th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4); - } - else - { - int bogus = ~0; - ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) == - 0x60); - ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address, - &pkt_ih6->src_address, - IP_PROTOCOL_TCP, - tc->ipv6_flow_label); - th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus); - ASSERT (!bogus); - } + th->checksum = tcp_compute_checksum (tc, b); tcp_enqueue_half_open (wrk, tc, b, bi); TCP_EVT (TCP_EVT_RST_SENT, tc); @@ -859,10 +838,9 @@ tcp_send_fin (tcp_connection_t * tc) /* Out of buffers so program fin retransmit ASAP */ tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, tcp_cfg.alloc_err_timeout); - if (fin_snt) - tc->snd_nxt += 1; - else - /* Make sure retransmit retries a fin not data */ + tc->snd_nxt += 1; + /* Make sure retransmit retries a fin not data with right snd_nxt */ + if (!fin_snt) tc->flags |= TCP_CONN_FINSNT; tcp_worker_stats_inc (wrk, no_buffer, 1); return; @@ -964,11 +942,9 @@ tcp_buffer_len (vlib_buffer_t * b) return data_len; } -u32 -tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +always_inline u32 +tcp_push_one_header (tcp_connection_t *tc, vlib_buffer_t *b) { - tcp_connection_t *tc = (tcp_connection_t *) tconn; - if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) tcp_bt_track_tx (tc, tcp_buffer_len (b)); @@ -976,6 +952,37 @@ tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b) /* update_snd_nxt */ 1); tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una); + return 0; +} + +u32 +tcp_session_push_header (transport_connection_t *tconn, vlib_buffer_t **bs, + u32 n_bufs) +{ + tcp_connection_t *tc = (tcp_connection_t *) tconn; + + while (n_bufs >= 4) + { + vlib_prefetch_buffer_header (bs[2], STORE); + vlib_prefetch_buffer_header (bs[3], STORE); + + tcp_push_one_header (tc, bs[0]); + tcp_push_one_header (tc, bs[1]); + + n_bufs -= 2; + bs += 2; + } + while (n_bufs) + { + if (n_bufs > 1) + vlib_prefetch_buffer_header (bs[1], STORE); + + tcp_push_one_header (tc, bs[0]); + + n_bufs -= 1; + bs += 1; + } + /* If not tracking an ACK, start tracking */ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc)) { @@ -1109,7 +1116,7 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, data = tcp_init_buffer (vm, *b); n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset, max_deq_bytes); - ASSERT (n_bytes == max_deq_bytes); + ASSERT (n_bytes > 0); b[0]->current_length = n_bytes; tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0, /* burst */ 0, /* update_snd_nxt */ 0); @@ -1271,6 +1278,7 @@ tcp_cc_init_rxt_timeout (tcp_connection_t * tc) tc->cwnd_acc_bytes = 0; tc->tr_occurences += 1; tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD; + tc->sack_sb.rescue_rxt = tc->snd_una - 1; tcp_recovery_on (tc); } @@ -1337,7 +1345,10 @@ tcp_timer_retransmit_handler (tcp_connection_t * tc) } if (tcp_opts_sack_permitted (&tc->rcv_opts)) - tcp_check_sack_reneging (tc); + { + tcp_check_sack_reneging (tc); + scoreboard_rxt_mark_lost (&tc->sack_sb, tc->snd_una, tc->snd_nxt); + } /* Update send congestion to make sure that rxt has data to send */ tc->snd_congestion = tc->snd_nxt; @@ -1534,8 +1545,10 @@ tcp_timer_persist_handler (tcp_connection_t * tc) tcp_validate_txf_size (tc, offset); tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state); - max_snd_bytes = clib_min (tc->snd_mss, + max_snd_bytes = clib_min (clib_min (tc->snd_mss, available_bytes), tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN); + if (tc->snd_wnd > 0) + max_snd_bytes = clib_min (tc->snd_wnd, max_snd_bytes); n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset, max_snd_bytes); b->current_length = n_bytes; @@ -1716,7 +1729,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, && tc->rxt_head != tc->snd_una && tcp_retransmit_should_retry_head (tc, sb)) { - max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una); + max_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una); n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b); if (!n_written) { @@ -1748,7 +1761,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, if (!hole) { /* We are out of lost holes to retransmit so send some new data. */ - if (max_deq > tc->snd_mss) + if (max_deq) { u32 n_segs_new; int av_wnd; @@ -1758,7 +1771,10 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una); av_wnd = clib_max (av_wnd - tc->snd_mss, 0); snd_space = clib_min (snd_space, av_wnd); - snd_space = clib_min (max_deq, snd_space); + /* Low bound max_deq to mss to be able to send a segment even + * when it is less than mss */ + snd_space = + clib_min (clib_max (max_deq, tc->snd_mss), snd_space); burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss); burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST); @@ -1770,8 +1786,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, goto done; } - if (tcp_in_recovery (tc) || !can_rescue - || scoreboard_rescue_rxt_valid (sb, tc)) + if (!can_rescue || scoreboard_rescue_rxt_valid (sb, tc)) break; /* If rescue rxt undefined or less than snd_una then one segment of @@ -1795,7 +1810,11 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, break; } - max_bytes = clib_min (hole->end - sb->high_rxt, snd_space); + max_bytes = hole->end - sb->high_rxt; + /* Avoid retransmitting segment less than mss if possible */ + if (snd_space < tc->snd_mss && max_bytes > snd_space) + break; + max_bytes = clib_min (max_bytes, snd_space); max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes; if (max_bytes == 0) break; @@ -2158,6 +2177,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 n_left_from, *from, thread_index = vm->thread_index; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 nexts[VLIB_FRAME_SIZE], *next; + u16 err_counters[TCP_N_ERROR] = { 0 }; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -2208,7 +2228,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION]; + tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, + 1); next[0] = TCP_OUTPUT_NEXT_DROP; } if (tc1 != 0) @@ -2219,7 +2240,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - b[1]->error = node->errors[TCP_ERROR_INVALID_CONNECTION]; + tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, + 1); next[1] = TCP_OUTPUT_NEXT_DROP; } } @@ -2249,7 +2271,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION]; + tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, 1); next[0] = TCP_OUTPUT_NEXT_DROP; } @@ -2258,6 +2280,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_left_from -= 1; } + tcp_store_err_counters (output, err_counters); vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4), TCP_ERROR_PKTS_SENT, frame->n_vectors); @@ -2276,7 +2299,6 @@ VLIB_NODE_FN (tcp6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node, return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ ); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp4_output_node) = { .name = "tcp4-output", @@ -2294,9 +2316,7 @@ VLIB_REGISTER_NODE (tcp4_output_node) = .format_buffer = format_tcp_header, .format_trace = format_tcp_tx_trace, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp6_output_node) = { .name = "tcp6-output", @@ -2314,7 +2334,6 @@ VLIB_REGISTER_NODE (tcp6_output_node) = .format_buffer = format_tcp_header, .format_trace = format_tcp_tx_trace, }; -/* *INDENT-ON* */ typedef enum _tcp_reset_next { @@ -2425,7 +2444,6 @@ VLIB_NODE_FN (tcp6_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node, return tcp46_reset_inline (vm, node, from_frame, 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp4_reset_node) = { .name = "tcp4-reset", .vector_size = sizeof (u32), @@ -2439,9 +2457,7 @@ VLIB_REGISTER_NODE (tcp4_reset_node) = { }, .format_trace = format_tcp_tx_trace, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp6_reset_node) = { .name = "tcp6-reset", .vector_size = sizeof (u32), @@ -2455,7 +2471,6 @@ VLIB_REGISTER_NODE (tcp6_reset_node) = { }, .format_trace = format_tcp_tx_trace, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON