X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp_output.c;h=96ffd2c7efe9edf28457d207427238bedd919657;hb=1ee7830e9ee8a62800822b6f5224d66243b916d4;hp=29a919bd160bd89bf4820ec628788c4f0a53c9d5;hpb=9ece3c03133309dda1f7f7f292bd071fa1ccb0f1;p=vpp.git diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 29a919bd160..96ffd2c7efe 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -14,7 +14,6 @@ */ #include -#include #include vlib_node_registration_t tcp4_output_node; @@ -126,7 +125,10 @@ tcp_initial_window_to_advertise (tcp_connection_t * tc) * scale to be computed in the same way */ max_fifo = tm->max_rx_fifo ? tm->max_rx_fifo : TCP_MAX_RX_FIFO_SIZE; - tc->rcv_wscale = tcp_window_compute_scale (max_fifo); + /* Compute rcv wscale only if peer advertised support for it */ + if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts)) + tc->rcv_wscale = tcp_window_compute_scale (max_fifo); + tc->rcv_wnd = tcp_initial_wnd_unscaled (tc); return clib_min (tc->rcv_wnd, TCP_WND_MAX); @@ -215,7 +217,7 @@ tcp_options_write (u8 * data, tcp_options_t * opts) *data++ = TCP_OPTION_MSS; *data++ = TCP_OPTION_LEN_MSS; buf = clib_host_to_net_u16 (opts->mss); - clib_memcpy (data, &buf, sizeof (opts->mss)); + clib_memcpy_fast (data, &buf, sizeof (opts->mss)); data += sizeof (opts->mss); opts_len += TCP_OPTION_LEN_MSS; } @@ -240,10 +242,10 @@ tcp_options_write (u8 * data, tcp_options_t * opts) *data++ = TCP_OPTION_TIMESTAMP; *data++ = TCP_OPTION_LEN_TIMESTAMP; buf = clib_host_to_net_u32 (opts->tsval); - clib_memcpy (data, &buf, sizeof (opts->tsval)); + clib_memcpy_fast (data, &buf, sizeof (opts->tsval)); data += sizeof (opts->tsval); buf = clib_host_to_net_u32 (opts->tsecr); - clib_memcpy (data, &buf, sizeof (opts->tsecr)); + clib_memcpy_fast (data, &buf, sizeof (opts->tsecr)); data += sizeof (opts->tsecr); opts_len += TCP_OPTION_LEN_TIMESTAMP; } @@ -261,10 +263,10 @@ tcp_options_write (u8 * data, tcp_options_t * opts) for (i = 0; i < n_sack_blocks; i++) { buf = clib_host_to_net_u32 (opts->sacks[i].start); - clib_memcpy (data, &buf, seq_len); + clib_memcpy_fast (data, &buf, seq_len); data += seq_len; buf = clib_host_to_net_u32 (opts->sacks[i].end); - clib_memcpy (data, &buf, seq_len); + clib_memcpy_fast (data, &buf, seq_len); data += seq_len; } opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; @@ -389,9 +391,13 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, switch (state) { case TCP_STATE_ESTABLISHED: + case TCP_STATE_CLOSE_WAIT: case TCP_STATE_FIN_WAIT_1: + case TCP_STATE_LAST_ACK: + case TCP_STATE_CLOSING: + case TCP_STATE_FIN_WAIT_2: + case TCP_STATE_TIME_WAIT: case TCP_STATE_CLOSED: - case TCP_STATE_CLOSE_WAIT: return tcp_make_established_options (tc, opts); case TCP_STATE_SYN_RCVD: return tcp_make_synack_options (tc, opts); @@ -444,7 +450,6 @@ tcp_init_mss (tcp_connection_t * tc) if (tc->snd_mss < 45) { - clib_warning ("snd mss is 0"); /* Assume that at least the min default mss works */ tc->snd_mss = default_min_mss; tc->rcv_opts.mss = default_min_mss; @@ -458,41 +463,6 @@ tcp_init_mss (tcp_connection_t * tc) tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP; } -static int -tcp_alloc_tx_buffers (tcp_worker_ctx_t * wrk, u16 * n_bufs, u32 wanted) -{ - vlib_main_t *vm = vlib_get_main (); - u32 n_alloc; - - ASSERT (wanted > *n_bufs); - vec_validate_aligned (wrk->tx_buffers, wanted - 1, CLIB_CACHE_LINE_BYTES); - n_alloc = vlib_buffer_alloc (vm, &wrk->tx_buffers[*n_bufs], - wanted - *n_bufs); - *n_bufs += n_alloc; - _vec_len (wrk->tx_buffers) = *n_bufs; - return n_alloc; -} - -always_inline int -tcp_get_free_buffer_index (tcp_worker_ctx_t * wrk, u32 * bidx) -{ - u16 n_bufs = vec_len (wrk->tx_buffers); - - TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL (wrk->vm->thread_index); - - if (PREDICT_FALSE (!n_bufs)) - { - if (!tcp_alloc_tx_buffers (wrk, &n_bufs, VLIB_FRAME_SIZE)) - { - *bidx = ~0; - return -1; - } - } - *bidx = wrk->tx_buffers[--n_bufs]; - _vec_len (wrk->tx_buffers) = n_bufs; - return 0; -} - static void * tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) { @@ -506,27 +476,26 @@ tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) vnet_buffer (b)->tcp.flags = 0; /* Leave enough space for headers */ - return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); } static void * tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) { ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST; b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->total_length_not_including_first_buffer = 0; b->current_data = 0; vnet_buffer (b)->tcp.flags = 0; VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); /* Leave enough space for headers */ - return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); } /** * Prepare ACK */ -static void +static inline void tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, u8 flags) { @@ -551,15 +520,11 @@ tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, /** * Convert buffer to ACK */ -void +static inline void tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b) { - vlib_main_t *vm = vlib_get_main (); - - tcp_reuse_buffer (vm, b); tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK); TCP_EVT_DBG (TCP_EVT_ACK_SENT, tc); - vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; tc->rcv_las = tc->rcv_nxt; } @@ -569,13 +534,7 @@ tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b) void tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b) { - vlib_main_t *vm = vlib_get_main (); - u8 flags = 0; - - tcp_reuse_buffer (vm, b); - - flags = TCP_FLAG_FIN | TCP_FLAG_ACK; - tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, flags); + tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK); /* Reset flags, make sure ack is sent */ vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; @@ -612,15 +571,12 @@ tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b) void tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) { - vlib_main_t *vm = vlib_get_main (); tcp_options_t _snd_opts, *snd_opts = &_snd_opts; u8 tcp_opts_len, tcp_hdr_opts_len; tcp_header_t *th; u16 initial_wnd; clib_memset (snd_opts, 0, sizeof (*snd_opts)); - tcp_reuse_buffer (vm, b); - initial_wnd = tcp_initial_window_to_advertise (tc); tcp_opts_len = tcp_make_synack_options (tc, snd_opts); tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); @@ -631,7 +587,6 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) tcp_options_write ((u8 *) (th + 1), snd_opts); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; - vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; /* Init retransmit timer. Use update instead of set because of * retransmissions */ @@ -766,8 +721,8 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0, { ih6 = vlib_buffer_get_current (b0); ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60); - clib_memcpy (&src_ip60, &ih6->src_address, sizeof (ip6_address_t)); - clib_memcpy (&dst_ip60, &ih6->dst_address, sizeof (ip6_address_t)); + clib_memcpy_fast (&src_ip60, &ih6->src_address, sizeof (ip6_address_t)); + clib_memcpy_fast (&dst_ip60, &ih6->dst_address, sizeof (ip6_address_t)); } src_port = th0->src_port; @@ -835,7 +790,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, u8 is_ip4) ip6_header_t *ih6, *pkt_ih6; fib_protocol_t fib_proto; - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); @@ -911,7 +866,7 @@ tcp_send_reset (tcp_connection_t * tc) u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len; u8 flags; - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); @@ -990,11 +945,11 @@ tcp_send_syn (tcp_connection_t * tc) * Setup retransmit and establish timers before requesting buffer * such that we can return if we've ran out. */ - tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME); + tcp_timer_set (tc, TCP_TIMER_ESTABLISH_AO, TCP_ESTABLISH_TIME); tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK); - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); @@ -1002,7 +957,7 @@ tcp_send_syn (tcp_connection_t * tc) tcp_make_syn (tc, b); /* Measure RTT with this */ - tc->rtt_ts = tcp_time_now (); + tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0); tc->rtt_seq = tc->snd_nxt; tc->rto_boff = 0; @@ -1011,6 +966,24 @@ tcp_send_syn (tcp_connection_t * tc) TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc); } +void +tcp_send_synack (tcp_connection_t * tc) +{ + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); + vlib_main_t *vm = wrk->vm; + vlib_buffer_t *b; + u32 bi; + + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) + return; + + tc->rtt_ts = tcp_time_now_us (tc->c_thread_index); + b = vlib_get_buffer (vm, bi); + tcp_init_buffer (vm, b); + tcp_make_synack (tc, b); + tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); +} + /** * Flush tx frame populated by retransmits and timer pops */ @@ -1070,11 +1043,13 @@ tcp_send_fin (tcp_connection_t * tc) if (fin_snt) tc->snd_nxt = tc->snd_una; - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) { /* Out of buffers so program fin retransmit ASAP */ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, 1); - goto post_enqueue; + if (fin_snt) + tc->snd_nxt = tc->snd_una_max; + return; } tcp_retransmit_timer_force_update (tc); @@ -1084,7 +1059,6 @@ tcp_send_fin (tcp_connection_t * tc) tcp_enqueue_to_output_now (wrk, b, bi, tc->c_is_ip4); TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc); -post_enqueue: if (!fin_snt) { tc->flags |= TCP_CONN_FINSNT; @@ -1106,6 +1080,8 @@ tcp_make_state_flags (tcp_connection_t * tc, tcp_state_t next_state) { case TCP_STATE_ESTABLISHED: case TCP_STATE_CLOSE_WAIT: + case TCP_STATE_TIME_WAIT: + case TCP_STATE_FIN_WAIT_2: return TCP_FLAG_ACK; case TCP_STATE_SYN_RCVD: return TCP_FLAG_SYN | TCP_FLAG_ACK; @@ -1113,6 +1089,7 @@ tcp_make_state_flags (tcp_connection_t * tc, tcp_state_t next_state) return TCP_FLAG_SYN; case TCP_STATE_LAST_ACK: case TCP_STATE_FIN_WAIT_1: + case TCP_STATE_CLOSING: if (tc->snd_nxt + 1 < tc->snd_una_max) return TCP_FLAG_ACK; else @@ -1153,16 +1130,21 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, advertise_wnd = tcp_window_to_advertise (tc, next_state); flags = tcp_make_state_flags (tc, next_state); - + if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING)) + { + if (seq_geq (tc->psh_seq, tc->snd_nxt) + && seq_lt (tc->psh_seq, tc->snd_nxt + data_len)) + flags |= TCP_FLAG_PSH; + } th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt, tc->rcv_nxt, tcp_hdr_opts_len, flags, advertise_wnd); if (maybe_burst) { - clib_memcpy ((u8 *) (th + 1), - tm->wrk_ctx[tc->c_thread_index].cached_opts, - tc->snd_opts_len); + clib_memcpy_fast ((u8 *) (th + 1), + tm->wrk_ctx[tc->c_thread_index].cached_opts, + tc->snd_opts_len); } else { @@ -1186,8 +1168,7 @@ tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b) tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0, /* burst */ 1); tc->snd_una_max = tc->snd_nxt; - ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd - + tcp_fastrecovery_sent_1_smss (tc) * tc->snd_mss)); + ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd)); tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); /* If not tracking an ACK, start tracking */ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc)) @@ -1212,17 +1193,64 @@ tcp_send_ack (tcp_connection_t * tc) vlib_buffer_t *b; u32 bi; - /* Get buffer */ - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); - - /* Fill in the ACK */ tcp_make_ack (tc, b); tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); } +void +tcp_program_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) +{ + if (!(tc->flags & TCP_CONN_SNDACK)) + { + vec_add1 (wrk->pending_acks, tc->c_c_index); + tc->flags |= TCP_CONN_SNDACK; + } +} + +void +tcp_program_dupack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) +{ + if (!(tc->flags & TCP_CONN_SNDACK)) + { + vec_add1 (wrk->pending_acks, tc->c_c_index); + tc->flags |= TCP_CONN_SNDACK; + } + if (tc->pending_dupacks < 255) + tc->pending_dupacks += 1; +} + +void +tcp_send_acks (tcp_worker_ctx_t * wrk) +{ + u32 thread_index, *pending_acks; + tcp_connection_t *tc; + int i, j, n_acks; + + if (!vec_len (wrk->pending_acks)) + return; + + thread_index = wrk->vm->thread_index; + pending_acks = wrk->pending_acks; + for (i = 0; i < vec_len (pending_acks); i++) + { + tc = tcp_connection_get (pending_acks[i], thread_index); + tc->flags &= ~TCP_CONN_SNDACK; + n_acks = clib_max (1, tc->pending_dupacks); + /* If we're supposed to send dupacks but have no ooo data + * send only one ack */ + if (tc->pending_dupacks && !vec_len (tc->snd_sacks)) + n_acks = 1; + for (j = 0; j < n_acks; j++) + tcp_send_ack (tc); + tc->pending_dupacks = 0; + } + _vec_len (wrk->pending_acks) = 0; +} + /** * Delayed ack timer handler * @@ -1256,12 +1284,12 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b) { u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer; - u32 bi, seg_size; vlib_main_t *vm = wrk->vm; + u32 bi, seg_size; int n_bytes = 0; u8 *data; - seg_size = max_deq_bytes + MAX_HDRS_LEN; + seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN; /* * Prepare options @@ -1275,7 +1303,7 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, /* Easy case, buffer size greater than mss */ if (PREDICT_TRUE (seg_size <= bytes_per_buffer)) { - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return 0; *b = vlib_get_buffer (vm, bi); data = tcp_init_buffer (vm, *b); @@ -1290,30 +1318,28 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, /* Split mss into multiple buffers */ else { - u32 chain_bi = ~0, n_bufs_per_seg; - u16 n_peeked, len_to_deq, available_bufs; + u32 chain_bi = ~0, n_bufs_per_seg, n_bufs; + u16 n_peeked, len_to_deq; vlib_buffer_t *chain_b, *prev_b; int i; /* Make sure we have enough buffers */ n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer); - available_bufs = vec_len (wrk->tx_buffers); - if (n_bufs_per_seg > available_bufs) + vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1, + CLIB_CACHE_LINE_BYTES); + n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg); + if (PREDICT_FALSE (n_bufs != n_bufs_per_seg)) { - tcp_alloc_tx_buffers (wrk, &available_bufs, VLIB_FRAME_SIZE); - if (n_bufs_per_seg > available_bufs) - { - *b = 0; - return 0; - } + if (n_bufs) + vlib_buffer_free (vm, wrk->tx_buffers, n_bufs); + return 0; } - (void) tcp_get_free_buffer_index (wrk, &bi); - ASSERT (bi != (u32) ~ 0); - *b = vlib_get_buffer (vm, bi); + *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]); data = tcp_init_buffer (vm, *b); n_bytes = stream_session_peek_bytes (&tc->connection, data, offset, - bytes_per_buffer - MAX_HDRS_LEN); + bytes_per_buffer - + TRANSPORT_MAX_HDRS_LEN); b[0]->current_length = n_bytes; b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; b[0]->total_length_not_including_first_buffer = 0; @@ -1324,8 +1350,7 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, { prev_b = chain_b; len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer); - tcp_get_free_buffer_index (wrk, &chain_bi); - ASSERT (chain_bi != (u32) ~ 0); + chain_bi = wrk->tx_buffers[--n_bufs]; chain_b = vlib_get_buffer (vm, chain_bi); chain_b->current_data = 0; data = vlib_buffer_get_current (chain_b); @@ -1347,6 +1372,12 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0); if (seq_gt (tc->snd_nxt, tc->snd_una_max)) tc->snd_una_max = tc->snd_nxt; + + if (PREDICT_FALSE (n_bufs)) + { + clib_warning ("not all buffers consumed"); + vlib_buffer_free (vm, wrk->tx_buffers, n_bufs); + } } ASSERT (n_bytes > 0); @@ -1451,7 +1482,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) { tc = tcp_half_open_connection_get (index); /* Note: the connection may have transitioned to ESTABLISHED... */ - if (PREDICT_FALSE (tc == 0)) + if (PREDICT_FALSE (tc == 0 || tc->state != TCP_STATE_SYN_SENT)) return; tc->timers[TCP_TIMER_RETRANSMIT_SYN] = TCP_TIMER_HANDLE_INVALID; } @@ -1459,9 +1490,12 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) { tc = tcp_connection_get (index, thread_index); /* Note: the connection may have been closed and pool_put */ - if (PREDICT_FALSE (tc == 0)) + if (PREDICT_FALSE (tc == 0 || tc->state == TCP_STATE_SYN_SENT)) return; tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID; + /* Wait-close and retransmit could pop at the same time */ + if (tc->state == TCP_STATE_CLOSED) + return; } if (tc->state >= TCP_STATE_ESTABLISHED) @@ -1516,8 +1550,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) /* Send one segment. Note that n_bytes may be zero due to buffer * shortfall */ n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b); - - if (n_bytes == 0) + if (!n_bytes) { tcp_retransmit_timer_force_update (tc); return; @@ -1541,10 +1574,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (tc->flags & TCP_CONN_HALF_OPEN_DONE) { if (tcp_half_open_connection_cleanup (tc)) - { - clib_warning ("could not remove half-open connection"); - ASSERT (0); - } + TCP_DBG ("could not remove half-open connection"); return; } @@ -1559,7 +1589,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK); - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) return; b = vlib_get_buffer (vm, bi); @@ -1583,7 +1613,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); tc->rtt_ts = 0; - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) { tcp_retransmit_timer_force_update (tc); return; @@ -1643,7 +1673,7 @@ tcp_timer_persist_handler (u32 index) /* Problem already solved or worse */ if (tc->state == TCP_STATE_CLOSED || tc->state > TCP_STATE_ESTABLISHED - || tc->snd_wnd > tc->snd_mss || tcp_in_recovery (tc)) + || tc->snd_wnd > tc->snd_mss) return; available_bytes = session_tx_fifo_max_dequeue (&tc->connection); @@ -1670,7 +1700,7 @@ tcp_timer_persist_handler (u32 index) /* * Try to force the first unsent segment (or buffer) */ - if (PREDICT_FALSE (tcp_get_free_buffer_index (wrk, &bi))) + if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1))) { tcp_persist_timer_set (tc); return; @@ -1680,9 +1710,10 @@ tcp_timer_persist_handler (u32 index) tcp_validate_txf_size (tc, offset); tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state); - max_snd_bytes = clib_min (tc->snd_mss, tm->bytes_per_buffer - MAX_HDRS_LEN); - n_bytes = stream_session_peek_bytes (&tc->connection, data, offset, - max_snd_bytes); + max_snd_bytes = + clib_min (tc->snd_mss, tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN); + n_bytes = + stream_session_peek_bytes (&tc->connection, data, offset, max_snd_bytes); b->current_length = n_bytes; ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT) || tc->snd_nxt == tc->snd_una_max @@ -1944,13 +1975,6 @@ tcp_fast_retransmit (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, return tcp_fast_retransmit_no_sack (wrk, tc, burst_size); } -static u32 -tcp_session_has_ooo_data (tcp_connection_t * tc) -{ - stream_session_t *s = session_get (tc->c_s_index, tc->c_thread_index); - return svm_fifo_has_ooo_data (s->server_rx_fifo); -} - static void tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0, u16 * next0, u32 * error0) @@ -2001,8 +2025,8 @@ tcp46_output_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node, tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index, vm->thread_index); t = vlib_add_trace (vm, node, b, sizeof (*t)); - clib_memcpy (&t->tcp_header, th, sizeof (t->tcp_header)); - clib_memcpy (&t->tcp_connection, tc, sizeof (t->tcp_connection)); + clib_memcpy_fast (&t->tcp_header, th, sizeof (t->tcp_header)); + clib_memcpy_fast (&t->tcp_connection, tc, sizeof (t->tcp_connection)); } } @@ -2055,25 +2079,6 @@ tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0, tcp_output_handle_link_local (tc0, b0, next0, error0); } - /* Filter out DUPACKs if there are no OOO segments left */ - if (PREDICT_FALSE (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK)) - { - /* N.B. Should not filter burst of dupacks. Two issues: - * 1) dupacks open cwnd on remote peer when congested - * 2) acks leaving should have the latest rcv_wnd since the - * burst may have eaten up all of it, so only the old ones - * could be filtered. - */ - if (!tcp_session_has_ooo_data (tc0)) - { - *error0 = TCP_ERROR_FILTERED_DUPACKS; - *next0 = TCP_OUTPUT_NEXT_DROP; - return; - } - } - - /* Stop DELACK timer and fix flags */ - tc0->flags &= ~(TCP_CONN_SNDACK); if (!TCP_ALWAYS_ACK) tcp_timer_reset (tc0, TCP_TIMER_DELACK); } @@ -2171,19 +2176,21 @@ tcp6_output (vlib_main_t * vm, vlib_node_runtime_t * node, /* *INDENT-OFF* */ VLIB_REGISTER_NODE (tcp4_output_node) = { - .function = tcp4_output,.name = "tcp4-output", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - .n_errors = TCP_N_ERROR, - .error_strings = tcp_error_strings, - .n_next_nodes = TCP_OUTPUT_N_NEXT, - .next_nodes = { + .function = tcp4_output, + .name = "tcp4-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .protocol_hint = VLIB_NODE_PROTO_HINT_TCP, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_OUTPUT_N_NEXT, + .next_nodes = { #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, foreach_tcp4_output_next #undef _ - }, - .format_buffer = format_tcp_header, - .format_trace = format_tcp_tx_trace, + }, + .format_buffer = format_tcp_header, + .format_trace = format_tcp_tx_trace, }; /* *INDENT-ON* */ @@ -2197,6 +2204,7 @@ VLIB_REGISTER_NODE (tcp6_output_node) = /* Takes a vector of packets. */ .vector_size = sizeof (u32), .n_errors = TCP_N_ERROR, + .protocol_hint = VLIB_NODE_PROTO_HINT_TCP, .error_strings = tcp_error_strings, .n_next_nodes = TCP_OUTPUT_N_NEXT, .next_nodes = { @@ -2284,7 +2292,8 @@ tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node, else th0 = ip6_next_header ((ip6_header_t *) th0); t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); - clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header)); + clib_memcpy_fast (&t0->tcp_header, th0, + sizeof (t0->tcp_header)); } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,