X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Ftcp%2Ftcp_input.c;h=0757741fa8f0a503d2746dae7f6ed3d21d0dad92;hb=27bb99ec804afc5977897fe97043735a508c4a78;hp=164a1b3431fe6c5035f333af0bd80771dcf7782e;hpb=6939d5efdf6541936076dce8c0a436b10e809bee;p=vpp.git diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 164a1b3431f..0757741fa8f 100755 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -327,17 +327,15 @@ tcp_rcv_rst (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) tcp_connection_set_state (tc, TCP_STATE_CLOSED); break; case TCP_STATE_SYN_SENT: - tcp_program_reset_ntf (wrk, tc); - tcp_connection_set_state (tc, TCP_STATE_CLOSED); + /* Do not program ntf because the connection is half-open */ + tcp_handle_rst (tc); break; case TCP_STATE_ESTABLISHED: tcp_connection_timers_reset (tc); - /* Set the cleanup timer, in case the session layer/app don't - * cleanly close the connection */ - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); tcp_cong_recovery_off (tc); tcp_program_reset_ntf (wrk, tc); tcp_connection_set_state (tc, TCP_STATE_CLOSED); + tcp_program_cleanup (wrk, tc); break; case TCP_STATE_CLOSE_WAIT: case TCP_STATE_FIN_WAIT_1: @@ -345,12 +343,12 @@ tcp_rcv_rst (tcp_worker_ctx_t * wrk, tcp_connection_t * tc) case TCP_STATE_CLOSING: case TCP_STATE_LAST_ACK: tcp_connection_timers_reset (tc); - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); tcp_cong_recovery_off (tc); tcp_program_reset_ntf (wrk, tc); /* Make sure we mark the session as closed. In some states we may * be still trying to send data */ tcp_connection_set_state (tc, TCP_STATE_CLOSED); + tcp_program_cleanup (wrk, tc); break; case TCP_STATE_CLOSED: case TCP_STATE_TIME_WAIT: @@ -702,6 +700,9 @@ tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk) tc->flags &= ~TCP_CONN_PSH_PENDING; } + if (tcp_is_descheduled (tc)) + tcp_reschedule (tc); + /* If everything has been acked, stop retransmit timer * otherwise update. */ tcp_retransmit_timer_update (tc); @@ -1096,9 +1097,7 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) { if (seq_lt (blk->start, blk->end) && seq_gt (blk->start, tc->snd_una) - && seq_gt (blk->start, ack) - && seq_lt (blk->start, tc->snd_nxt) - && seq_leq (blk->end, tc->snd_nxt)) + && seq_gt (blk->start, ack) && seq_leq (blk->end, tc->snd_nxt)) { blk++; continue; @@ -1316,7 +1315,12 @@ tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd) } else { - tcp_persist_timer_reset (tc); + if (PREDICT_FALSE (tcp_timer_is_active (tc, TCP_TIMER_PERSIST))) + tcp_persist_timer_reset (tc); + + if (PREDICT_FALSE (tcp_is_descheduled (tc))) + tcp_reschedule (tc); + if (PREDICT_FALSE (!tcp_in_recovery (tc) && tc->rto_boff > 0)) { tc->rto_boff = 0; @@ -1467,6 +1471,7 @@ tcp_cc_recover (tcp_connection_t * tc) ASSERT (tc->rto_boff == 0); ASSERT (!tcp_in_cong_recovery (tc)); ASSERT (tcp_scoreboard_is_sane_post_recovery (tc)); + return is_spurious; } @@ -2079,7 +2084,8 @@ tcp_segment_rcv (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, * retransmissions since we may not have any data to send */ if (seq_leq (vnet_buffer (b)->tcp.seq_end, tc->rcv_nxt)) { - tcp_program_ack (tc); + tcp_program_dupack (tc); + tc->errors.below_data_wnd++; error = TCP_ERROR_SEGMENT_OLD; goto done; } @@ -2945,7 +2951,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Make sure the segment is exactly right */ if (tc0->rcv_nxt != vnet_buffer (b0)->tcp.seq_number || is_fin0) { - tcp_rcv_rst (wrk, tc0); + tcp_send_reset_w_pkt (tc0, b0, thread_index, is_ip4); error0 = TCP_ERROR_SEGMENT_INVALID; goto drop; } @@ -2958,7 +2964,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, */ if (tcp_rcv_ack_no_cc (tc0, b0, &error0)) { - tcp_rcv_rst (wrk, tc0); + tcp_send_reset_w_pkt (tc0, b0, thread_index, is_ip4); + error0 = TCP_ERROR_SEGMENT_INVALID; goto drop; } @@ -2985,7 +2992,9 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (session_stream_accept_notify (&tc0->connection)) { error0 = TCP_ERROR_MSG_QUEUE_FULL; - tcp_rcv_rst (wrk, tc0); + tcp_send_reset (tc0); + session_transport_delete_notify (&tc0->connection); + tcp_connection_cleanup (tc0); goto drop; } error0 = TCP_ERROR_ACK_OK; @@ -3028,9 +3037,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (tc0->flags & TCP_CONN_FINRCVD) { tcp_connection_set_state (tc0, TCP_STATE_CLOSED); - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, - tcp_cfg.cleanup_time); session_transport_closed_notify (&tc0->connection); + tcp_program_cleanup (wrk, tc0); goto drop; } @@ -3111,7 +3119,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * we can't ensure that we have no packets already enqueued * to output. Rely instead on the waitclose timer */ tcp_connection_timers_reset (tc0); - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); + tcp_program_cleanup (tcp_get_worker (tc0->c_thread_index), tc0); goto drop; @@ -3306,54 +3314,46 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, int is_ip4) { u32 n_left_from, *from, n_syns = 0, *first_buffer; - u32 my_thread_index = vm->thread_index; - tcp_connection_t *tc0; + u32 thread_index = vm->thread_index; from = first_buffer = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; while (n_left_from > 0) { - u32 bi0; - vlib_buffer_t *b0; - tcp_rx_trace_t *t0; - tcp_header_t *th0 = 0; - tcp_connection_t *lc0; - ip4_header_t *ip40; - ip6_header_t *ip60; - tcp_connection_t *child0; - u32 error0 = TCP_ERROR_NONE; + u32 bi, error = TCP_ERROR_NONE; + tcp_connection_t *lc, *child; + vlib_buffer_t *b; - bi0 = from[0]; + bi = from[0]; from += 1; n_left_from -= 1; - b0 = vlib_get_buffer (vm, bi0); - - if (is_ip4) - { - ip40 = vlib_buffer_get_current (b0); - th0 = tcp_buffer_hdr (b0); - } - else - { - ip60 = vlib_buffer_get_current (b0); - th0 = tcp_buffer_hdr (b0); - } + b = vlib_get_buffer (vm, bi); - lc0 = tcp_listener_get (vnet_buffer (b0)->tcp.connection_index); - if (PREDICT_FALSE (lc0 == 0)) + lc = tcp_listener_get (vnet_buffer (b)->tcp.connection_index); + if (PREDICT_FALSE (lc == 0)) { - tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, - my_thread_index); - if (tc0->state != TCP_STATE_TIME_WAIT) + tcp_connection_t *tc; + tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index, + thread_index); + if (tc->state != TCP_STATE_TIME_WAIT) { - error0 = TCP_ERROR_CREATE_EXISTS; - goto drop; + error = TCP_ERROR_CREATE_EXISTS; + goto done; } - lc0 = tcp_lookup_listener (b0, tc0->c_fib_index, is_ip4); + lc = tcp_lookup_listener (b, tc->c_fib_index, is_ip4); /* clean up the old session */ - tcp_connection_del (tc0); + tcp_connection_del (tc); + } + + /* Make sure connection wasn't just created */ + child = tcp_lookup_connection (lc->c_fib_index, b, thread_index, + is_ip4); + if (PREDICT_FALSE (child->state != TCP_STATE_LISTEN)) + { + error = TCP_ERROR_CREATE_EXISTS; + goto done; } /* Create child session. For syn-flood protection use filter */ @@ -3373,91 +3373,51 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* 3. check for a SYN (did that already) */ - /* Make sure connection wasn't just created */ - child0 = tcp_lookup_connection (lc0->c_fib_index, b0, my_thread_index, - is_ip4); - if (PREDICT_FALSE (child0->state != TCP_STATE_LISTEN)) - { - error0 = TCP_ERROR_CREATE_EXISTS; - goto drop; - } - /* Create child session and send SYN-ACK */ - child0 = tcp_connection_alloc (my_thread_index); - child0->c_lcl_port = th0->dst_port; - child0->c_rmt_port = th0->src_port; - child0->c_is_ip4 = is_ip4; - child0->state = TCP_STATE_SYN_RCVD; - child0->c_fib_index = lc0->c_fib_index; - child0->cc_algo = lc0->cc_algo; - - if (is_ip4) - { - child0->c_lcl_ip4.as_u32 = ip40->dst_address.as_u32; - child0->c_rmt_ip4.as_u32 = ip40->src_address.as_u32; - } - else - { - clib_memcpy_fast (&child0->c_lcl_ip6, &ip60->dst_address, - sizeof (ip6_address_t)); - clib_memcpy_fast (&child0->c_rmt_ip6, &ip60->src_address, - sizeof (ip6_address_t)); - } + child = tcp_connection_alloc (thread_index); - if (tcp_options_parse (th0, &child0->rcv_opts, 1)) + if (tcp_options_parse (tcp_buffer_hdr (b), &child->rcv_opts, 1)) { - error0 = TCP_ERROR_OPTIONS; - tcp_connection_free (child0); - goto drop; + error = TCP_ERROR_OPTIONS; + tcp_connection_free (child); + goto done; } - child0->irs = vnet_buffer (b0)->tcp.seq_number; - child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1; - child0->rcv_las = child0->rcv_nxt; - child0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + tcp_init_w_buffer (child, b, is_ip4); - /* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK} - * segments are used to initialize PAWS. */ - if (tcp_opts_tstamp (&child0->rcv_opts)) + child->state = TCP_STATE_SYN_RCVD; + child->c_fib_index = lc->c_fib_index; + child->cc_algo = lc->cc_algo; + tcp_connection_init_vars (child); + child->rto = TCP_RTO_MIN; + + if (session_stream_accept (&child->connection, lc->c_s_index, + lc->c_thread_index, 0 /* notify */ )) { - child0->tsval_recent = child0->rcv_opts.tsval; - child0->tsval_recent_age = tcp_time_now (); + tcp_connection_cleanup (child); + error = TCP_ERROR_CREATE_SESSION_FAIL; + goto done; } - if (tcp_opts_wscale (&child0->rcv_opts)) - child0->snd_wscale = child0->rcv_opts.wscale; - - child0->snd_wnd = clib_net_to_host_u16 (th0->window) - << child0->snd_wscale; - child0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number; - child0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number; - - tcp_connection_init_vars (child0); - child0->rto = TCP_RTO_MIN; + child->tx_fifo_size = transport_tx_fifo_size (&child->connection); - if (session_stream_accept (&child0->connection, lc0->c_s_index, - lc0->c_thread_index, 0 /* notify */ )) - { - tcp_connection_cleanup (child0); - error0 = TCP_ERROR_CREATE_SESSION_FAIL; - goto drop; - } + tcp_send_synack (child); - TCP_EVT (TCP_EVT_SYN_RCVD, child0, 1); - child0->tx_fifo_size = transport_tx_fifo_size (&child0->connection); - tcp_send_synack (child0); + TCP_EVT (TCP_EVT_SYN_RCVD, child, 1); - drop: + done: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) { - t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); - clib_memcpy_fast (&t0->tcp_header, th0, sizeof (t0->tcp_header)); - clib_memcpy_fast (&t0->tcp_connection, lc0, - sizeof (t0->tcp_connection)); + tcp_rx_trace_t *t; + t = vlib_add_trace (vm, node, b, sizeof (*t)); + clib_memcpy_fast (&t->tcp_header, tcp_buffer_hdr (b), + sizeof (t->tcp_header)); + clib_memcpy_fast (&t->tcp_connection, lc, + sizeof (t->tcp_connection)); } - n_syns += (error0 == TCP_ERROR_NONE); + n_syns += (error == TCP_ERROR_NONE); } tcp_inc_counter (listen, TCP_ERROR_SYNS_RCVD, n_syns); @@ -3702,14 +3662,10 @@ tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc, if (PREDICT_FALSE (error != TCP_ERROR_NONE)) { - /* Overload tcp flags to store state */ - tcp_state_t state = tc->state; - vnet_buffer (b)->tcp.flags = tc->state; - b->error = error_node->errors[error]; if (error == TCP_ERROR_DISPATCH) clib_warning ("tcp conn %u disp error state %U flags %U", - tc->c_c_index, format_tcp_state, state, + tc->c_c_index, format_tcp_state, tc->state, format_tcp_flags, (int) flags); } }