&& (prev_snd_wnd == tc->snd_wnd));
}
-static u8
-tcp_is_lost_fin (tcp_connection_t * tc)
-{
- if ((tc->flags & TCP_CONN_FINSNT) && tc->snd_una_max - tc->snd_una == 1)
- return 1;
- return 0;
-}
-
/**
* Checks if ack is a congestion control event.
*/
/* Remove retransmitted bytes that have been delivered */
ASSERT (tc->bytes_acked + tc->sack_sb.snd_una_adv
- >= tc->sack_sb.last_bytes_delivered);
+ >= tc->sack_sb.last_bytes_delivered
+ || (tc->flags & TCP_CONN_FINSNT));
if (seq_lt (tc->snd_una, tc->sack_sb.high_rxt))
{
if (tcp_ack_is_cc_event (tc, b, prev_snd_wnd, prev_snd_una, &is_dack))
{
tcp_cc_handle_event (tc, is_dack);
+ if (!tcp_in_cong_recovery (tc))
+ return 0;
*error = TCP_ERROR_ACK_DUP;
TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc, 1);
return vnet_buffer (b)->tcp.data_len ? 0 : -1;
tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
u16 data_len)
{
- int written;
+ int written, error = TCP_ERROR_ENQUEUED;
ASSERT (seq_geq (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt));
/* Update rcv_nxt */
if (PREDICT_TRUE (written == data_len))
{
- tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end;
+ tc->rcv_nxt += written;
}
/* If more data written than expected, account for out-of-order bytes. */
else if (written > data_len)
{
- tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end + written - data_len;
+ tc->rcv_nxt += written;
/* Send ACK confirming the update */
tc->flags |= TCP_CONN_SNDACK;
* not be enqueued. Inform peer */
tc->flags |= TCP_CONN_SNDACK;
- return TCP_ERROR_PARTIALLY_ENQUEUED;
+ error = TCP_ERROR_PARTIALLY_ENQUEUED;
}
else
{
tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt);
}
- return TCP_ERROR_ENQUEUED;
+ return error;
}
/** Enqueue out-of-order data */
return 1;
}
+static int
+tcp_buffer_discard_bytes (vlib_buffer_t * b, u32 n_bytes_to_drop)
+{
+ u32 discard, first = b->current_length;
+ vlib_main_t *vm = vlib_get_main ();
+
+ /* Handle multi-buffer segments */
+ if (n_bytes_to_drop > b->current_length)
+ {
+ if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ return -1;
+ do
+ {
+ discard = clib_min (n_bytes_to_drop, b->current_length);
+ vlib_buffer_advance (b, discard);
+ b = vlib_get_buffer (vm, b->next_buffer);
+ n_bytes_to_drop -= discard;
+ }
+ while (n_bytes_to_drop);
+ if (n_bytes_to_drop > first)
+ b->total_length_not_including_first_buffer -= n_bytes_to_drop - first;
+ }
+ else
+ vlib_buffer_advance (b, n_bytes_to_drop);
+ vnet_buffer (b)->tcp.data_len -= n_bytes_to_drop;
+ return 0;
+}
+
static int
tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b,
u32 * next0)
n_bytes_to_drop = tc->rcv_nxt - vnet_buffer (b)->tcp.seq_number;
n_data_bytes -= n_bytes_to_drop;
vnet_buffer (b)->tcp.seq_number = tc->rcv_nxt;
- vlib_buffer_advance (b, n_bytes_to_drop);
+ if (tcp_buffer_discard_bytes (b, n_bytes_to_drop))
+ goto done;
goto in_order;
}
* in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
tc0->state = TCP_STATE_CLOSE_WAIT;
TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
- tc0->rcv_nxt += (vnet_buffer (b0)->tcp.data_len == 0);
+ if (vnet_buffer (b0)->tcp.data_len == 0)
+ {
+ tc0->rcv_nxt += 1;
+ next0 = TCP_ESTABLISHED_NEXT_DROP;
+ }
stream_session_disconnect_notify (&tc0->connection);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
}
done:
errors = session_manager_flush_enqueue_events (my_thread_index);
tcp_established_inc_counter (vm, is_ip4, TCP_ERROR_EVENT_FIFO_FULL, errors);
+ tcp_flush_frame_to_output (vm, my_thread_index, is_ip4);
+
return from_frame->n_vectors;
}
tcp_main_t *tm = vnet_get_tcp_main ();
u32 n_left_from, next_index, *from, *to_next;
u32 my_thread_index = vm->thread_index, errors = 0;
- u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
tc0 =
tcp_half_open_connection_get (vnet_buffer (b0)->
tcp.connection_index);
+ ASSERT (tc0);
ack0 = vnet_buffer (b0)->tcp.ack_number;
seq0 = vnet_buffer (b0)->tcp.seq_number;
tcp0 = tcp_buffer_hdr (b0);
- if (!tc0)
- {
- ip4_header_t *ip40 = vlib_buffer_get_current (b0);
- tcp0 = ip4_next_header (ip40);
- tc0 =
- (tcp_connection_t *)
- stream_session_lookup_transport_wt4 (&ip40->dst_address,
- &ip40->src_address,
- tcp0->dst_port,
- tcp0->src_port,
- SESSION_TYPE_IP4_TCP,
- my_thread_index);
- ASSERT (0);
- goto drop;
- }
if (PREDICT_FALSE
(!tcp_ack (tcp0) && !tcp_rst (tcp0) && !tcp_syn (tcp0)))
goto drop;
if (ack0 <= tc0->iss || ack0 > tc0->snd_nxt)
{
if (!tcp_rst (tcp0))
- tcp_send_reset (tc0, b0, is_ip4);
+ tcp_send_reset_w_pkt (tc0, b0, is_ip4);
goto drop;
}
if (tcp_options_parse (tcp0, &tc0->rcv_opts))
goto drop;
- /* Stop connection establishment and retransmit timers */
- tcp_timer_reset (tc0, TCP_TIMER_ESTABLISH);
- tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN);
-
/* Valid SYN or SYN-ACK. Move connection from half-open pool to
* current thread pool. */
pool_get (tm->connections[my_thread_index], new_tc0);
new_tc0->c_thread_index = my_thread_index;
new_tc0->rcv_nxt = vnet_buffer (b0)->tcp.seq_end;
new_tc0->irs = seq0;
- tcp_half_open_connection_del (tc0);
+ new_tc0->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID;
+ new_tc0->timers[TCP_TIMER_RETRANSMIT_SYN] =
+ TCP_TIMER_HANDLE_INVALID;
+
+ /* If this is not the owning thread, wait for syn retransmit to
+ * expire and cleanup then */
+ if (tcp_half_open_connection_cleanup (tc0))
+ tc0->flags |= TCP_CONN_HALF_OPEN_DONE;
if (tcp_opts_tstamp (&new_tc0->rcv_opts))
{
/* Notify app that we have connection. If session layer can't
* allocate session send reset */
- if (stream_session_connect_notify (&new_tc0->connection, sst,
- 0))
+ if (stream_session_connect_notify (&new_tc0->connection, 0))
{
+ tcp_send_reset_w_pkt (new_tc0, b0, is_ip4);
tcp_connection_cleanup (new_tc0);
- tcp_send_reset (tc0, b0, is_ip4);
goto drop;
}
new_tc0->state = TCP_STATE_SYN_RCVD;
/* Notify app that we have connection */
- if (stream_session_connect_notify
- (&new_tc0->connection, sst, 0))
+ if (stream_session_connect_notify (&new_tc0->connection, 0))
{
tcp_connection_cleanup (new_tc0);
- tcp_send_reset (tc0, b0, is_ip4);
+ tcp_send_reset_w_pkt (tc0, b0, is_ip4);
TCP_EVT_DBG (TCP_EVT_RST_SENT, tc0);
goto drop;
}
*/
if (!tcp_rcv_ack_is_acceptable (tc0, b0))
{
- tcp_send_reset (tc0, b0, is_ip4);
+ tcp_send_reset_w_pkt (tc0, b0, is_ip4);
goto drop;
}
tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
stream_session_accept_notify (&tc0->connection);
- /* Reset SYN-ACK retransmit timer */
+ /* Reset SYN-ACK retransmit and SYN_RCV establish timers */
tcp_retransmit_timer_reset (tc0);
+ tcp_timer_reset (tc0, TCP_TIMER_ESTABLISH);
break;
case TCP_STATE_ESTABLISHED:
/* We can get packets in established state here because they
if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
goto drop;
+ /* Still have to send the FIN */
+ if (tc0->flags & TCP_CONN_FINPNDG)
+ {
+ /* TX fifo finally drained */
+ if (!stream_session_tx_fifo_max_dequeue (&tc0->connection))
+ tcp_send_fin (tc0);
+ }
/* If FIN is ACKed */
- if (tc0->snd_una == tc0->snd_una_max)
+ else if (tc0->snd_una == tc0->snd_una_max)
{
- ASSERT (tcp_fin (tcp0));
+ tc0->rcv_nxt += 1;
tc0->state = TCP_STATE_FIN_WAIT_2;
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
- /* Stop all timers, 2MSL will be set lower */
- tcp_connection_timers_reset (tc0);
+ if (tcp_fin (tcp0))
+ {
+ /* Stop all timers, 2MSL will be set lower */
+ tcp_connection_timers_reset (tc0);
+ }
+ else
+ {
+ /* Wait for peer to finish sending its data */
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE,
+ TCP_2MSL_TIME);
+ }
}
break;
case TCP_STATE_FIN_WAIT_2:
* acknowledged ("ok") but do not delete the TCB. */
if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
goto drop;
- /* check if rtx queue is empty and ack CLOSE TODO */
break;
case TCP_STATE_CLOSE_WAIT:
/* Do the same processing as for the ESTABLISHED state. */
if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
goto drop;
- /* XXX test that send queue empty */
tc0->state = TCP_STATE_TIME_WAIT;
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
goto drop;
break;
/* move along .. */
break;
case TCP_STATE_FIN_WAIT_1:
- tc0->state = TCP_STATE_TIME_WAIT;
- tcp_connection_timers_reset (tc0);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tc0->state = TCP_STATE_CLOSING;
+ tcp_make_ack (tc0, b0);
+ next0 = tcp_next_output (is_ip4);
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+ /* Wait for ACK but not forever */
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
break;
case TCP_STATE_FIN_WAIT_2:
/* Got FIN, send ACK! */
tc0->state = TCP_STATE_TIME_WAIT;
tcp_connection_timers_reset (tc0);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
tcp_make_ack (tc0, b0);
next0 = tcp_next_output (is_ip4);
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
/* Reuse buffer to make syn-ack and send */
tcp_make_synack (child0, b0);
next0 = tcp_next_output (is_ip4);
+ tcp_timer_set (child0, TCP_TIMER_ESTABLISH, TCP_SYN_RCVD_TIME);
drop:
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
if ((tmp =
stream_session_half_open_lookup (&tc->c_lcl_ip, &tc->c_rmt_ip,
tc->c_lcl_port, tc->c_rmt_port,
- tc->c_proto)))
+ tc->c_transport_proto)))
{
if (tmp->lcl_port == hdr->dst_port
&& tmp->rmt_port == hdr->src_port)
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
-
next_index = node->cached_next_index;
+ tcp_set_time_now (my_thread_index);
while (n_left_from > 0)
{