}
}
+static void
+tcp_handle_rst (tcp_connection_t * tc)
+{
+ switch (tc->rst_state)
+ {
+ case TCP_STATE_SYN_RCVD:
+ /* Cleanup everything. App wasn't notified yet */
+ session_transport_delete_notify (&tc->connection);
+ tcp_connection_cleanup (tc);
+ break;
+ case TCP_STATE_SYN_SENT:
+ session_stream_connect_notify (&tc->connection, 1 /* fail */ );
+ tcp_connection_cleanup (tc);
+ break;
+ case TCP_STATE_ESTABLISHED:
+ session_transport_reset_notify (&tc->connection);
+ session_transport_closed_notify (&tc->connection);
+ break;
+ case TCP_STATE_CLOSE_WAIT:
+ case TCP_STATE_FIN_WAIT_1:
+ case TCP_STATE_FIN_WAIT_2:
+ case TCP_STATE_CLOSING:
+ case TCP_STATE_LAST_ACK:
+ session_transport_closed_notify (&tc->connection);
+ break;
+ case TCP_STATE_CLOSED:
+ case TCP_STATE_TIME_WAIT:
+ break;
+ default:
+ TCP_DBG ("reset state: %u", tc->state);
+ }
+}
+
+static void
+tcp_program_reset_ntf (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+ if (!tcp_disconnect_pending (tc))
+ {
+ tc->rst_state = tc->state;
+ vec_add1 (wrk->pending_resets, tc->c_c_index);
+ tcp_disconnect_pending_on (tc);
+ }
+}
+
+/**
+ * Handle reset packet
+ *
+ * Programs disconnect/reset notification that should be sent
+ * later by calling @ref tcp_handle_disconnects
+ */
+static void
+tcp_rcv_rst (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
+{
+ TCP_EVT (TCP_EVT_RST_RCVD, tc);
+ switch (tc->state)
+ {
+ case TCP_STATE_SYN_RCVD:
+ tcp_program_reset_ntf (wrk, tc);
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ break;
+ case TCP_STATE_SYN_SENT:
+ /* Do not program ntf because the connection is half-open */
+ tcp_handle_rst (tc);
+ break;
+ case TCP_STATE_ESTABLISHED:
+ tcp_connection_timers_reset (tc);
+ tcp_cong_recovery_off (tc);
+ tcp_program_reset_ntf (wrk, tc);
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_program_cleanup (wrk, tc);
+ break;
+ case TCP_STATE_CLOSE_WAIT:
+ case TCP_STATE_FIN_WAIT_1:
+ case TCP_STATE_FIN_WAIT_2:
+ case TCP_STATE_CLOSING:
+ case TCP_STATE_LAST_ACK:
+ tcp_connection_timers_reset (tc);
+ tcp_cong_recovery_off (tc);
+ tcp_program_reset_ntf (wrk, tc);
+ /* Make sure we mark the session as closed. In some states we may
+ * be still trying to send data */
+ tcp_connection_set_state (tc, TCP_STATE_CLOSED);
+ tcp_program_cleanup (wrk, tc);
+ break;
+ case TCP_STATE_CLOSED:
+ case TCP_STATE_TIME_WAIT:
+ break;
+ default:
+ TCP_DBG ("reset state: %u", tc->state);
+ }
+}
+
/**
* Validate incoming segment as per RFC793 p. 69 and RFC1323 p. 19
*
/* 2nd: check the RST bit */
if (PREDICT_FALSE (tcp_rst (th0)))
{
- tcp_connection_reset (tc0);
+ tcp_rcv_rst (wrk, tc0);
*error0 = TCP_ERROR_RST_RCVD;
goto error;
}
tcp_update_rto (tc);
}
-always_inline u8
-tcp_recovery_no_snd_space (tcp_connection_t * tc)
-{
- u32 space;
-
- ASSERT (tcp_in_cong_recovery (tc));
-
- if (tcp_in_recovery (tc))
- space = tcp_available_output_snd_space (tc);
- else
- space = tcp_fastrecovery_prr_snd_space (tc);
-
- return (space < tc->snd_mss + tc->burst_acked);
-}
-
/**
* Dequeue bytes for connections that have received acks in last burst
*/
tc = tcp_connection_get (pending_deq_acked[i], thread_index);
tc->flags &= ~TCP_CONN_DEQ_PENDING;
- if (tc->burst_acked)
- {
- /* Dequeue the newly ACKed bytes */
- session_tx_fifo_dequeue_drop (&tc->connection, tc->burst_acked);
- tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
-
- if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
- {
- if (seq_leq (tc->psh_seq, tc->snd_una))
- tc->flags &= ~TCP_CONN_PSH_PENDING;
- }
+ if (PREDICT_FALSE (!tc->burst_acked))
+ continue;
- /* If everything has been acked, stop retransmit timer
- * otherwise update. */
- tcp_retransmit_timer_update (tc);
+ /* Dequeue the newly ACKed bytes */
+ session_tx_fifo_dequeue_drop (&tc->connection, tc->burst_acked);
+ tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
- /* Update pacer based on our new cwnd estimate */
- tcp_connection_tx_pacer_update (tc);
+ if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
+ {
+ if (seq_leq (tc->psh_seq, tc->snd_una))
+ tc->flags &= ~TCP_CONN_PSH_PENDING;
}
- /* Reset the pacer if we've been idle, i.e., no data sent or if
- * we're in recovery and snd space constrained */
- if (tc->data_segs_out == tc->prev_dsegs_out
- || (tcp_in_cong_recovery (tc) && tcp_recovery_no_snd_space (tc)))
- transport_connection_tx_pacer_reset_bucket (&tc->connection);
+ if (tcp_is_descheduled (tc))
+ tcp_reschedule (tc);
+
+ /* If everything has been acked, stop retransmit timer
+ * otherwise update. */
+ tcp_retransmit_timer_update (tc);
+
+ /* Update pacer based on our new cwnd estimate */
+ tcp_connection_tx_pacer_update (tc);
- tc->prev_dsegs_out = tc->data_segs_out;
tc->burst_acked = 0;
}
_vec_len (wrk->pending_deq_acked) = 0;
if (!right)
{
sb->sacked_bytes = sb->high_sacked - ack;
+ sb->last_sacked_bytes = sb->sacked_bytes
+ - (old_sacked - sb->last_bytes_delivered);
return;
}
sb->last_bytes_delivered = 0;
sb->rxt_sacked = 0;
- if (!tcp_opts_sack (&tc->rcv_opts)
+ if (!tcp_opts_sack (&tc->rcv_opts) && !sb->sacked_bytes
&& sb->head == TCP_INVALID_SACK_HOLE_INDEX)
return;
{
if (seq_lt (blk->start, blk->end)
&& seq_gt (blk->start, tc->snd_una)
- && seq_gt (blk->start, ack)
- && seq_lt (blk->start, tc->snd_nxt)
- && seq_leq (blk->end, tc->snd_nxt))
+ && seq_gt (blk->start, ack) && seq_leq (blk->end, tc->snd_nxt))
{
blk++;
continue;
hole = pool_elt_at_index (sb->holes, sb->head);
if (PREDICT_FALSE (sb->is_reneging))
- sb->last_bytes_delivered += hole->start - tc->snd_una;
+ {
+ sb->last_bytes_delivered += clib_min (hole->start - tc->snd_una,
+ ack - tc->snd_una);
+ sb->is_reneging = seq_lt (ack, hole->start);
+ }
while (hole && blk_index < vec_len (rcv_sacks))
{
}
else
{
- tcp_persist_timer_reset (tc);
+ if (PREDICT_FALSE (tcp_timer_is_active (tc, TCP_TIMER_PERSIST)))
+ tcp_persist_timer_reset (tc);
+
+ if (PREDICT_FALSE (tcp_is_descheduled (tc)))
+ tcp_reschedule (tc);
+
if (PREDICT_FALSE (!tcp_in_recovery (tc) && tc->rto_boff > 0))
{
tc->rto_boff = 0;
ASSERT (tc->rto_boff == 0);
ASSERT (!tcp_in_cong_recovery (tc));
ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
+
return is_spurious;
}
{
u8 has_sack = tcp_opts_sack_permitted (&tc->rcv_opts);
+ /* If reneging, wait for timer based retransmits */
+ if (PREDICT_FALSE (tcp_is_lost_fin (tc) || tc->sack_sb.is_reneging))
+ return;
+
/*
* If not in recovery, figure out if we should enter
*/
tc->rxt_delivered += tc->sack_sb.rxt_sacked;
tc->prr_delivered += tc->bytes_acked + tc->sack_sb.last_sacked_bytes
- tc->sack_sb.last_bytes_delivered;
-
- tcp_program_retransmit (tc);
}
else
{
tc->rcv_dupacks += 1;
TCP_EVT (TCP_EVT_DUPACK_RCVD, tc, 1);
}
- tc->rxt_delivered = clib_max (tc->rxt_delivered + tc->bytes_acked,
+ tc->rxt_delivered = clib_min (tc->rxt_delivered + tc->bytes_acked,
tc->snd_rxt_bytes);
if (is_dack)
tc->prr_delivered += clib_min (tc->snd_mss,
/* If partial ack, assume that the first un-acked segment was lost */
if (tc->bytes_acked || tc->rcv_dupacks == TCP_DUPACK_THRESHOLD)
tcp_fastrecovery_first_on (tc);
-
- tcp_program_retransmit (tc);
}
/*
return;
}
+ tcp_program_retransmit (tc);
+
/*
* Notify cc of the event
*/
tcp_cc_rcv_cong_ack (tc, TCP_CC_PARTIALACK, rs);
}
+static void
+tcp_handle_old_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
+{
+ if (!tcp_in_cong_recovery (tc))
+ return;
+
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ tcp_rcv_sacks (tc, tc->snd_una);
+
+ tc->bytes_acked = 0;
+
+ if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
+ tcp_bt_sample_delivery_rate (tc, rs);
+
+ tcp_cc_handle_event (tc, rs, 1);
+}
+
/**
* Check if duplicate ack as per RFC5681 Sec. 2
*/
*is_dack = tc->sack_sb.last_sacked_bytes
|| tcp_ack_is_dupack (tc, b, prev_snd_wnd, prev_snd_una);
- /* If reneging, wait for timer based retransmits */
- if (PREDICT_FALSE (tcp_is_lost_fin (tc) || tc->sack_sb.is_reneging))
- return 0;
-
return (*is_dack || tcp_in_cong_recovery (tc));
}
tc->errors.below_ack_wnd += 1;
*error = TCP_ERROR_ACK_OLD;
TCP_EVT (TCP_EVT_ACK_RCV_ERR, tc, 1, vnet_buffer (b)->tcp.ack_number);
- if (tcp_in_fastrecovery (tc) && tc->rcv_dupacks == TCP_DUPACK_THRESHOLD)
- tcp_cc_handle_event (tc, 0, 1);
+
+ if (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una - tc->rcv_wnd))
+ return -1;
+
+ tcp_handle_old_ack (tc, &rs);
+
/* Don't drop yet */
return 0;
}
if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_sample_delivery_rate (tc, &rs);
- tcp_program_dequeue (wrk, tc);
-
if (tc->bytes_acked)
- tcp_update_rtt (tc, &rs, vnet_buffer (b)->tcp.ack_number);
+ {
+ tcp_program_dequeue (wrk, tc);
+ tcp_update_rtt (tc, &rs, vnet_buffer (b)->tcp.ack_number);
+ }
TCP_EVT (TCP_EVT_ACK_RCVD, tc);
static void
tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
{
- u32 thread_index, *pending_disconnects;
+ u32 thread_index, *pending_disconnects, *pending_resets;
tcp_connection_t *tc;
int i;
- if (!vec_len (wrk->pending_disconnects))
- return;
+ if (vec_len (wrk->pending_disconnects))
+ {
+ thread_index = wrk->vm->thread_index;
+ pending_disconnects = wrk->pending_disconnects;
+ for (i = 0; i < vec_len (pending_disconnects); i++)
+ {
+ tc = tcp_connection_get (pending_disconnects[i], thread_index);
+ tcp_disconnect_pending_off (tc);
+ session_transport_closing_notify (&tc->connection);
+ }
+ _vec_len (wrk->pending_disconnects) = 0;
+ }
- thread_index = wrk->vm->thread_index;
- pending_disconnects = wrk->pending_disconnects;
- for (i = 0; i < vec_len (pending_disconnects); i++)
+ if (vec_len (wrk->pending_resets))
{
- tc = tcp_connection_get (pending_disconnects[i], thread_index);
- tcp_disconnect_pending_off (tc);
- session_transport_closing_notify (&tc->connection);
+ thread_index = wrk->vm->thread_index;
+ pending_resets = wrk->pending_resets;
+ for (i = 0; i < vec_len (pending_resets); i++)
+ {
+ tc = tcp_connection_get (pending_resets[i], thread_index);
+ tcp_disconnect_pending_off (tc);
+ tcp_handle_rst (tc);
+ }
+ _vec_len (wrk->pending_resets) = 0;
}
- _vec_len (wrk->pending_disconnects) = 0;
}
static void
* retransmissions since we may not have any data to send */
if (seq_leq (vnet_buffer (b)->tcp.seq_end, tc->rcv_nxt))
{
- tcp_program_ack (tc);
+ tcp_program_dupack (tc);
+ tc->errors.below_data_wnd++;
error = TCP_ERROR_SEGMENT_OLD;
goto done;
}
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
+ tcp_connection_t *tc = &t->tcp_connection;
u32 indent = format_get_indent (s);
- s = format (s, "%U\n%U%U",
- format_tcp_header, &t->tcp_header, 128,
- format_white_space, indent,
- format_tcp_connection, &t->tcp_connection, 1);
+ s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
+ format_tcp_state, tc->state, format_white_space, indent,
+ format_tcp_header, &t->tcp_header, 128);
return s;
}
return tc;
}
+static tcp_connection_t *
+tcp_lookup_listener (vlib_buffer_t * b, u32 fib_index, int is_ip4)
+{
+ session_t *s;
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip4 = vlib_buffer_get_current (b);
+ tcp_header_t *tcp = tcp_buffer_hdr (b);
+ s = session_lookup_listener4 (fib_index,
+ &ip4->dst_address,
+ tcp->dst_port, TRANSPORT_PROTO_TCP, 1);
+ }
+ else
+ {
+ ip6_header_t *ip6 = vlib_buffer_get_current (b);
+ tcp_header_t *tcp = tcp_buffer_hdr (b);
+ s = session_lookup_listener6 (fib_index,
+ &ip6->dst_address,
+ tcp->dst_port, TRANSPORT_PROTO_TCP, 1);
+
+ }
+ if (PREDICT_TRUE (s != 0))
+ return tcp_get_connection_from_transport (transport_get_listener
+ (TRANSPORT_PROTO_TCP,
+ s->connection_index));
+ else
+ return 0;
+}
+
always_inline void
tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4)
{
}
lb = load_balance_get (lb_idx);
+ if (PREDICT_FALSE (lb->lb_n_buckets > 1))
+ return;
dpo = load_balance_get_bucket_i (lb, 0);
- sw_if_idx = dpo->dpoi_index;
- hw_if = vnet_get_sup_hw_interface (vnm, sw_if_idx);
+ sw_if_idx = dpo_get_urpf (dpo);
+ if (PREDICT_FALSE (sw_if_idx == ~0))
+ return;
+ hw_if = vnet_get_sup_hw_interface (vnm, sw_if_idx);
if (hw_if->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)
tc->cfg_flags |= TCP_CFG_F_TSO;
}
/* If ACK is acceptable, signal client that peer is not
* willing to accept connection and drop connection*/
if (tcp_ack (tcp0))
- tcp_connection_reset (tc0);
+ tcp_rcv_rst (wrk, tc0);
error0 = TCP_ERROR_RST_RCVD;
goto drop;
}
my_thread_index);
tcp_inc_counter (syn_sent, TCP_ERROR_MSG_QUEUE_FULL, errors);
vlib_buffer_free (vm, first_buffer, from_frame->n_vectors);
+ tcp_handle_disconnects (wrk);
return from_frame->n_vectors;
}
/* Make sure the segment is exactly right */
if (tc0->rcv_nxt != vnet_buffer (b0)->tcp.seq_number || is_fin0)
{
- tcp_connection_reset (tc0);
+ tcp_send_reset_w_pkt (tc0, b0, thread_index, is_ip4);
error0 = TCP_ERROR_SEGMENT_INVALID;
goto drop;
}
*/
if (tcp_rcv_ack_no_cc (tc0, b0, &error0))
{
- tcp_connection_reset (tc0);
+ tcp_send_reset_w_pkt (tc0, b0, thread_index, is_ip4);
+ error0 = TCP_ERROR_SEGMENT_INVALID;
goto drop;
}
if (session_stream_accept_notify (&tc0->connection))
{
error0 = TCP_ERROR_MSG_QUEUE_FULL;
- tcp_connection_reset (tc0);
+ tcp_send_reset (tc0);
+ session_transport_delete_notify (&tc0->connection);
+ tcp_connection_cleanup (tc0);
goto drop;
}
error0 = TCP_ERROR_ACK_OK;
if (tc0->flags & TCP_CONN_FINRCVD)
{
tcp_connection_set_state (tc0, TCP_STATE_CLOSED);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE,
- tcp_cfg.cleanup_time);
session_transport_closed_notify (&tc0->connection);
+ tcp_program_cleanup (wrk, tc0);
goto drop;
}
* we can't ensure that we have no packets already enqueued
* to output. Rely instead on the waitclose timer */
tcp_connection_timers_reset (tc0);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ tcp_program_cleanup (tcp_get_worker (tc0->c_thread_index), tc0);
goto drop;
vlib_frame_t * from_frame, int is_ip4)
{
u32 n_left_from, *from, n_syns = 0, *first_buffer;
- u32 my_thread_index = vm->thread_index;
+ u32 thread_index = vm->thread_index;
from = first_buffer = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
while (n_left_from > 0)
{
- u32 bi0;
- vlib_buffer_t *b0;
- tcp_rx_trace_t *t0;
- tcp_header_t *th0 = 0;
- tcp_connection_t *lc0;
- ip4_header_t *ip40;
- ip6_header_t *ip60;
- tcp_connection_t *child0;
- u32 error0 = TCP_ERROR_NONE;
+ u32 bi, error = TCP_ERROR_NONE;
+ tcp_connection_t *lc, *child;
+ vlib_buffer_t *b;
- bi0 = from[0];
+ bi = from[0];
from += 1;
n_left_from -= 1;
- b0 = vlib_get_buffer (vm, bi0);
- lc0 = tcp_listener_get (vnet_buffer (b0)->tcp.connection_index);
+ b = vlib_get_buffer (vm, bi);
- if (is_ip4)
+ lc = tcp_listener_get (vnet_buffer (b)->tcp.connection_index);
+ if (PREDICT_FALSE (lc == 0))
{
- ip40 = vlib_buffer_get_current (b0);
- th0 = ip4_next_header (ip40);
+ tcp_connection_t *tc;
+ tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
+ thread_index);
+ if (tc->state != TCP_STATE_TIME_WAIT)
+ {
+ error = TCP_ERROR_CREATE_EXISTS;
+ goto done;
+ }
+ lc = tcp_lookup_listener (b, tc->c_fib_index, is_ip4);
+ /* clean up the old session */
+ tcp_connection_del (tc);
}
- else
+
+ /* Make sure connection wasn't just created */
+ child = tcp_lookup_connection (lc->c_fib_index, b, thread_index,
+ is_ip4);
+ if (PREDICT_FALSE (child->state != TCP_STATE_LISTEN))
{
- ip60 = vlib_buffer_get_current (b0);
- th0 = ip6_next_header (ip60);
+ error = TCP_ERROR_CREATE_EXISTS;
+ goto done;
}
/* Create child session. For syn-flood protection use filter */
/* 3. check for a SYN (did that already) */
- /* Make sure connection wasn't just created */
- child0 = tcp_lookup_connection (lc0->c_fib_index, b0, my_thread_index,
- is_ip4);
- if (PREDICT_FALSE (child0->state != TCP_STATE_LISTEN))
- {
- error0 = TCP_ERROR_CREATE_EXISTS;
- goto drop;
- }
-
/* Create child session and send SYN-ACK */
- child0 = tcp_connection_alloc (my_thread_index);
- child0->c_lcl_port = th0->dst_port;
- child0->c_rmt_port = th0->src_port;
- child0->c_is_ip4 = is_ip4;
- child0->state = TCP_STATE_SYN_RCVD;
- child0->c_fib_index = lc0->c_fib_index;
- child0->cc_algo = lc0->cc_algo;
+ child = tcp_connection_alloc (thread_index);
- if (is_ip4)
+ if (tcp_options_parse (tcp_buffer_hdr (b), &child->rcv_opts, 1))
{
- child0->c_lcl_ip4.as_u32 = ip40->dst_address.as_u32;
- child0->c_rmt_ip4.as_u32 = ip40->src_address.as_u32;
- }
- else
- {
- clib_memcpy_fast (&child0->c_lcl_ip6, &ip60->dst_address,
- sizeof (ip6_address_t));
- clib_memcpy_fast (&child0->c_rmt_ip6, &ip60->src_address,
- sizeof (ip6_address_t));
+ error = TCP_ERROR_OPTIONS;
+ tcp_connection_free (child);
+ goto done;
}
- if (tcp_options_parse (th0, &child0->rcv_opts, 1))
- {
- error0 = TCP_ERROR_OPTIONS;
- tcp_connection_free (child0);
- goto drop;
- }
+ tcp_init_w_buffer (child, b, is_ip4);
- child0->irs = vnet_buffer (b0)->tcp.seq_number;
- child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1;
- child0->rcv_las = child0->rcv_nxt;
- child0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ child->state = TCP_STATE_SYN_RCVD;
+ child->c_fib_index = lc->c_fib_index;
+ child->cc_algo = lc->cc_algo;
+ tcp_connection_init_vars (child);
+ child->rto = TCP_RTO_MIN;
- /* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK}
- * segments are used to initialize PAWS. */
- if (tcp_opts_tstamp (&child0->rcv_opts))
+ if (session_stream_accept (&child->connection, lc->c_s_index,
+ lc->c_thread_index, 0 /* notify */ ))
{
- child0->tsval_recent = child0->rcv_opts.tsval;
- child0->tsval_recent_age = tcp_time_now ();
+ tcp_connection_cleanup (child);
+ error = TCP_ERROR_CREATE_SESSION_FAIL;
+ goto done;
}
- if (tcp_opts_wscale (&child0->rcv_opts))
- child0->snd_wscale = child0->rcv_opts.wscale;
-
- child0->snd_wnd = clib_net_to_host_u16 (th0->window)
- << child0->snd_wscale;
- child0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
- child0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
-
- tcp_connection_init_vars (child0);
- child0->rto = TCP_RTO_MIN;
+ child->tx_fifo_size = transport_tx_fifo_size (&child->connection);
- if (session_stream_accept (&child0->connection, lc0->c_s_index,
- lc0->c_thread_index, 0 /* notify */ ))
- {
- tcp_connection_cleanup (child0);
- error0 = TCP_ERROR_CREATE_SESSION_FAIL;
- goto drop;
- }
+ tcp_send_synack (child);
- TCP_EVT (TCP_EVT_SYN_RCVD, child0, 1);
- child0->tx_fifo_size = transport_tx_fifo_size (&child0->connection);
- tcp_send_synack (child0);
+ TCP_EVT (TCP_EVT_SYN_RCVD, child, 1);
- drop:
+ done:
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
{
- t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
- clib_memcpy_fast (&t0->tcp_header, th0, sizeof (t0->tcp_header));
- clib_memcpy_fast (&t0->tcp_connection, lc0,
- sizeof (t0->tcp_connection));
+ tcp_rx_trace_t *t;
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ clib_memcpy_fast (&t->tcp_header, tcp_buffer_hdr (b),
+ sizeof (t->tcp_header));
+ clib_memcpy_fast (&t->tcp_connection, lc,
+ sizeof (t->tcp_connection));
}
- n_syns += (error0 == TCP_ERROR_NONE);
+ n_syns += (error == TCP_ERROR_NONE);
}
tcp_inc_counter (listen, TCP_ERROR_SYNS_RCVD, n_syns);
static inline void
tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc,
- vlib_buffer_t * b, u16 * next, u32 * error)
+ vlib_buffer_t * b, u16 * next,
+ vlib_node_runtime_t * error_node)
{
tcp_header_t *tcp;
+ u32 error;
u8 flags;
tcp = tcp_buffer_hdr (b);
flags = tcp->flags & filter_flags;
*next = tm->dispatch_table[tc->state][flags].next;
- *error = tm->dispatch_table[tc->state][flags].error;
+ error = tm->dispatch_table[tc->state][flags].error;
tc->segs_in += 1;
- if (PREDICT_FALSE (*error == TCP_ERROR_DISPATCH
- || *next == TCP_INPUT_NEXT_RESET))
+ if (PREDICT_FALSE (error != TCP_ERROR_NONE))
{
- /* Overload tcp flags to store state */
- tcp_state_t state = tc->state;
- vnet_buffer (b)->tcp.flags = tc->state;
-
- if (*error == TCP_ERROR_DISPATCH)
+ b->error = error_node->errors[error];
+ if (error == TCP_ERROR_DISPATCH)
clib_warning ("tcp conn %u disp error state %U flags %U",
- tc->c_c_index, format_tcp_state, state,
+ tc->c_c_index, format_tcp_state, tc->state,
format_tcp_flags, (int) flags);
}
}
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
+ vlib_node_runtime_t *error_node;
tcp_set_time_now (tcp_get_worker (thread_index));
+ error_node = vlib_node_get_runtime (vm, tcp_node_index (input, is_ip4));
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], &error0);
- tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], &error1);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], error_node);
+ tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], error_node);
}
else
{
{
ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], &error0);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], error_node);
}
else
- tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
+ {
+ tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
+ b[0]->error = error_node->errors[error0];
+ }
if (PREDICT_TRUE (tc1 != 0))
{
ASSERT (tcp_lookup_is_valid (tc1, b[1], tcp_buffer_hdr (b[1])));
vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
- tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], &error1);
+ tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], error_node);
}
else
- tcp_input_set_error_next (tm, &next[1], &error1, is_ip4);
+ {
+ tcp_input_set_error_next (tm, &next[1], &error1, is_ip4);
+ b[1]->error = error_node->errors[error1];
+ }
}
b += 2;
{
ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], &error0);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], error_node);
}
else
- tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
+ {
+ tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
+ b[0]->error = error_node->errors[error0];
+ }
b += 1;
next += 1;
_(LISTEN, TCP_FLAG_FIN | TCP_FLAG_RST, TCP_INPUT_NEXT_DROP,
TCP_ERROR_SEGMENT_INVALID);
_(LISTEN, TCP_FLAG_FIN | TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
- TCP_ERROR_NONE);
+ TCP_ERROR_SEGMENT_INVALID);
_(LISTEN, TCP_FLAG_FIN | TCP_FLAG_SYN, TCP_INPUT_NEXT_DROP,
TCP_ERROR_SEGMENT_INVALID);
_(LISTEN, TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
TCP_ERROR_NONE);
_(LAST_ACK, TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK,
TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
- _(TIME_WAIT, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(TIME_WAIT, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
_(TIME_WAIT, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(TIME_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(CLOSED, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED);
_(CLOSED, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
TCP_ERROR_CONNECTION_CLOSED);
- _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_NONE);
- _(CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_RESET, TCP_ERROR_NONE);
+ _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
+ _(CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
_(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
- TCP_ERROR_NONE);
+ TCP_ERROR_CONNECTION_CLOSED);
#undef _
}