X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp_output.c;h=bbcbc9121753fec498a5c028d0fd7b1c0f89b0cc;hb=371ca50a74a9c4f1b74c4c1b65c6fdec610fcfc3;hp=be29f05f65c39b879936aeb4511a9410f061072c;hpb=9d063047eb1a3738cb0fc9ebebb55793d155bb20;p=vpp.git diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index be29f05f65c..bbcbc912175 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -55,7 +55,7 @@ format_tcp_tx_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *); - uword indent = format_get_indent (s); + u32 indent = format_get_indent (s); s = format (s, "%U\n%U%U", format_tcp_header, &t->tcp_header, 128, @@ -389,7 +389,7 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, case TCP_STATE_SYN_SENT: return tcp_make_syn_options (opts, tc->rcv_wscale); default: - clib_warning ("Not handled!"); + clib_warning ("State not handled! %d", state); return 0; } } @@ -463,6 +463,9 @@ tcp_get_free_buffer_index (tcp_main_t * tm, u32 * bidx) { u32 *my_tx_buffers; u32 thread_index = vlib_get_thread_index (); + + TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL (thread_index); + if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0)) { if (tcp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE)) @@ -500,11 +503,11 @@ always_inline void * tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) { ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK; + b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST; b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->total_length_not_including_first_buffer = 0; vnet_buffer (b)->tcp.flags = 0; - + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); /* Leave enough space for headers */ return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); } @@ -590,9 +593,6 @@ tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b) initial_wnd); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; tcp_options_write ((u8 *) (th + 1), &snd_opts); - - tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, - tc->rto * TCP_TO_TIMER_TICK); } /** @@ -641,15 +641,11 @@ tcp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, b->error = 0; /* Default FIB for now */ - vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; /* Send to IP lookup */ next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; - if (VLIB_BUFFER_TRACE_TRAJECTORY > 0) - { - b->pre_data[0] = 2; - b->pre_data[1] = next_index; - } + tcp_trajectory_add_start (b, 1); f = tm->ip_lookup_tx_frames[!is_ip4][thread_index]; if (!f) @@ -697,11 +693,7 @@ tcp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, /* Decide where to send the packet */ next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index; - if (VLIB_BUFFER_TRACE_TRAJECTORY > 0) - { - b->pre_data[0] = 1; - b->pre_data[1] = next_index; - } + tcp_trajectory_add_start (b, 2); /* Get frame to v4/6 output node */ f = tm->tx_frames[!is_ip4][thread_index]; @@ -791,6 +783,7 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0, } tcp_reuse_buffer (vm, b0); + tcp_trajectory_add_start (b0, 4); th0 = vlib_buffer_push_tcp_net_order (b0, dst_port, src_port, seq, ack, sizeof (tcp_header_t), flags, 0); @@ -918,7 +911,24 @@ tcp_send_reset (tcp_connection_t * tc) opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts); ASSERT (opts_write_len == tc->snd_opts_len); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; - tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4); + if (tc->c_is_ip4) + { + ip4_header_t *ih4; + ih4 = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip.ip4, + &tc->c_rmt_ip.ip4, IP_PROTOCOL_TCP, 0); + th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4); + } + else + { + int bogus = ~0; + ip6_header_t *ih6; + ih6 = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip.ip6, + &tc->c_rmt_ip.ip6, IP_PROTOCOL_TCP); + th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus); + ASSERT (!bogus); + } + tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4); + TCP_EVT_DBG (TCP_EVT_RST_SENT, tc); } void @@ -960,6 +970,14 @@ tcp_send_syn (tcp_connection_t * tc) tcp_main_t *tm = vnet_get_tcp_main (); vlib_main_t *vm = vlib_get_main (); + /* + * Setup retransmit and establish timers before requesting buffer + * such that we can return if we've ran out. + */ + tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME); + tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, + tc->rto * TCP_TO_TIMER_TICK); + if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) return; @@ -972,9 +990,6 @@ tcp_send_syn (tcp_connection_t * tc) tc->rtt_seq = tc->snd_nxt; tc->rto_boff = 0; - /* Set the connection establishment timer */ - tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME); - tcp_push_ip_hdr (tm, tc, b); tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc); @@ -1038,7 +1053,7 @@ tcp_send_fin (tcp_connection_t * tc) u32 bi; u8 fin_snt = 0; - + tcp_retransmit_timer_force_update (tc); if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) return; b = vlib_get_buffer (vm, bi); @@ -1055,7 +1070,10 @@ tcp_send_fin (tcp_connection_t * tc) tc->snd_una_max += 1; tc->snd_nxt = tc->snd_una_max; } - tcp_retransmit_timer_force_update (tc); + else + { + tc->snd_nxt = tc->snd_una_max; + } TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc); } @@ -1281,7 +1299,7 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, ASSERT (n_peeked == len_to_deq); n_bytes += n_peeked; chain_b->current_length = n_peeked; - chain_b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK; + chain_b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST; chain_b->next_buffer = 0; /* update previous buffer */ @@ -1324,7 +1342,7 @@ tcp_rtx_timeout_cc (tcp_connection_t * tc) tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss); tc->cwnd = tcp_loss_wnd (tc); tc->snd_congestion = tc->snd_una_max; - + tc->rtt_ts = 0; tcp_recovery_on (tc); } @@ -1361,6 +1379,15 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (tcp_is_lost_fin (tc)) { tcp_send_fin (tc); + tc->rto_boff += 1; + tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + return; + } + + /* Shouldn't be here */ + if (tc->snd_una == tc->snd_una_max) + { + tcp_recovery_off (tc); return; } @@ -1374,13 +1401,12 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) /* Increment RTO backoff (also equal to number of retries) and go back * to first un-acked byte */ tc->rto_boff += 1; - tc->snd_nxt = tc->snd_una; /* First retransmit timeout */ if (tc->rto_boff == 1) tcp_rtx_timeout_cc (tc); - /* Exponential backoff */ + tc->snd_nxt = tc->snd_una; tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1); @@ -1393,12 +1419,6 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (n_bytes == 0) { - ASSERT (!b); - if (tc->snd_una == tc->snd_una_max) - return; - ASSERT (tc->rto_boff > 1 && tc->snd_una == tc->snd_congestion); - clib_warning ("retransmit fail: %U", format_tcp_connection, tc, 2); - /* Try again eventually */ tcp_retransmit_timer_set (tc); return; } @@ -1434,6 +1454,9 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (tc->rto_boff > TCP_RTO_SYN_RETRIES) tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, + tc->rto * TCP_TO_TIMER_TICK); + if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) return; @@ -1457,7 +1480,10 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tc->rtt_ts = 0; if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) - return; + { + tcp_retransmit_timer_force_update (tc); + return; + } b = vlib_get_buffer (vm, bi); tcp_make_synack (tc, b); @@ -1469,7 +1495,6 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) else { ASSERT (tc->state == TCP_STATE_CLOSED); - TCP_DBG ("connection state: %d", tc->state); return; } } @@ -1498,7 +1523,7 @@ tcp_timer_persist_handler (u32 index) u32 thread_index = vlib_get_thread_index (); tcp_connection_t *tc; vlib_buffer_t *b; - u32 bi, old_snd_nxt, max_snd_bytes, available_bytes, offset; + u32 bi, max_snd_bytes, available_bytes, offset; int n_bytes = 0; u8 *data; @@ -1550,14 +1575,11 @@ tcp_timer_persist_handler (u32 index) n_bytes = stream_session_peek_bytes (&tc->connection, data, offset, max_snd_bytes); b->current_length = n_bytes; - ASSERT (n_bytes != 0 && (tc->snd_nxt == tc->snd_una_max || tc->rto_boff > 1 - || tcp_timer_is_active (tc, - TCP_TIMER_RETRANSMIT))); + ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT) + || tc->snd_nxt == tc->snd_una_max + || tc->rto_boff > 1)); - /* Allow updating of snd_una_max but don't update snd_nxt */ - old_snd_nxt = tc->snd_nxt; tcp_push_hdr_i (tc, b, tc->state, 0); - tc->snd_nxt = old_snd_nxt; tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); /* Just sent new data, enable retransmit */ @@ -1717,8 +1739,7 @@ tcp_fast_retransmit (tcp_connection_t * tc) always_inline u32 tcp_session_has_ooo_data (tcp_connection_t * tc) { - stream_session_t *s = - stream_session_get (tc->c_s_index, tc->c_thread_index); + stream_session_t *s = session_get (tc->c_s_index, tc->c_thread_index); return svm_fifo_has_ooo_data (s->server_rx_fifo); } @@ -1952,6 +1973,7 @@ tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) tc->rtt_ts = tcp_time_now (); tc->rtt_seq = tc->snd_nxt; } + tcp_trajectory_add_start (b, 3); return 0; } @@ -2014,7 +2036,7 @@ tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } /* Prepare to send to IP lookup */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0; next0 = TCP_RESET_NEXT_IP_LOOKUP; done: