CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
+ tcp_connection_t *tc = &t->tcp_connection;
u32 indent = format_get_indent (s);
- s = format (s, "%U\n%U%U",
- format_tcp_header, &t->tcp_header, 128,
- format_white_space, indent,
- format_tcp_connection, &t->tcp_connection, 1);
+ s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
+ format_tcp_state, tc->state, format_white_space, indent,
+ format_tcp_header, &t->tcp_header, 128);
return s;
}
#endif /* CLIB_MARCH_VARIANT */
static int
-tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0,
- tcp_state_t state, u8 thread_index, u8 is_ip4)
+tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b, u8 is_ip4)
{
ip4_header_t *ih4;
ip6_header_t *ih6;
- tcp_header_t *th0;
- ip4_address_t src_ip40, dst_ip40;
- ip6_address_t src_ip60, dst_ip60;
+ tcp_header_t *th;
+ ip4_address_t src_ip4, dst_ip4;
+ ip6_address_t src_ip6, dst_ip6;
u16 src_port, dst_port;
- u32 tmp;
- u32 seq, ack;
+ u32 tmp, len, seq, ack;
u8 flags;
/* Find IP and TCP headers */
- th0 = tcp_buffer_hdr (b0);
+ th = tcp_buffer_hdr (b);
/* Save src and dst ip */
if (is_ip4)
{
- ih4 = vlib_buffer_get_current (b0);
+ ih4 = vlib_buffer_get_current (b);
ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
- src_ip40.as_u32 = ih4->src_address.as_u32;
- dst_ip40.as_u32 = ih4->dst_address.as_u32;
+ src_ip4.as_u32 = ih4->src_address.as_u32;
+ dst_ip4.as_u32 = ih4->dst_address.as_u32;
}
else
{
- ih6 = vlib_buffer_get_current (b0);
+ ih6 = vlib_buffer_get_current (b);
ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
- clib_memcpy_fast (&src_ip60, &ih6->src_address, sizeof (ip6_address_t));
- clib_memcpy_fast (&dst_ip60, &ih6->dst_address, sizeof (ip6_address_t));
+ clib_memcpy_fast (&src_ip6, &ih6->src_address, sizeof (ip6_address_t));
+ clib_memcpy_fast (&dst_ip6, &ih6->dst_address, sizeof (ip6_address_t));
}
- src_port = th0->src_port;
- dst_port = th0->dst_port;
+ src_port = th->src_port;
+ dst_port = th->dst_port;
+ flags = TCP_FLAG_RST;
- /* Try to determine what/why we're actually resetting */
- if (state == TCP_STATE_CLOSED)
+ /*
+ * RFC 793. If the ACK bit is off, sequence number zero is used,
+ * <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
+ * If the ACK bit is on,
+ * <SEQ=SEG.ACK><CTL=RST>
+ */
+ if (tcp_ack (th))
{
- if (!tcp_syn (th0))
- return -1;
-
- tmp = clib_net_to_host_u32 (th0->seq_number);
-
- /* Got a SYN for no listener. */
- flags = TCP_FLAG_RST | TCP_FLAG_ACK;
- ack = clib_host_to_net_u32 (tmp + 1);
- seq = 0;
+ seq = th->ack_number;
+ ack = 0;
}
else
{
- flags = TCP_FLAG_RST;
- seq = th0->ack_number;
- ack = 0;
+ flags |= TCP_FLAG_ACK;
+ tmp = clib_net_to_host_u32 (th->seq_number);
+ len = vnet_buffer (b)->tcp.data_len + tcp_is_syn (th) + tcp_is_fin (th);
+ ack = clib_host_to_net_u32 (tmp + len);
+ seq = 0;
}
- tcp_reuse_buffer (vm, b0);
- tcp_trajectory_add_start (b0, 4);
- th0 = vlib_buffer_push_tcp_net_order (b0, dst_port, src_port, seq, ack,
- sizeof (tcp_header_t), flags, 0);
+ tcp_reuse_buffer (vm, b);
+ tcp_trajectory_add_start (b, 4);
+ th = vlib_buffer_push_tcp_net_order (b, dst_port, src_port, seq, ack,
+ sizeof (tcp_header_t), flags, 0);
if (is_ip4)
{
- ih4 = vlib_buffer_push_ip4 (vm, b0, &dst_ip40, &src_ip40,
+ ih4 = vlib_buffer_push_ip4 (vm, b, &dst_ip4, &src_ip4,
IP_PROTOCOL_TCP, 1);
- th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4);
+ th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
}
else
{
int bogus = ~0;
- ih6 = vlib_buffer_push_ip6 (vm, b0, &dst_ip60, &src_ip60,
- IP_PROTOCOL_TCP);
- th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus);
+ ih6 = vlib_buffer_push_ip6 (vm, b, &dst_ip6, &src_ip6, IP_PROTOCOL_TCP);
+ th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
ASSERT (!bogus);
}
TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
}
-/**
- * Flush ip lookup tx frames populated by timer pops
- */
-static void
-tcp_flush_frame_to_ip_lookup (tcp_worker_ctx_t * wrk, u8 is_ip4)
-{
- if (wrk->ip_lookup_tx_frames[!is_ip4])
- {
- u32 next_index;
- next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
- vlib_put_frame_to_node (wrk->vm, next_index,
- wrk->ip_lookup_tx_frames[!is_ip4]);
- wrk->ip_lookup_tx_frames[!is_ip4] = 0;
- }
-}
-
-/**
- * Flush v4 and v6 tcp and ip-lookup tx frames for thread index
- */
-void
-tcp_flush_frames_to_output (tcp_worker_ctx_t * wrk)
-{
- tcp_flush_frame_to_ip_lookup (wrk, 1);
- tcp_flush_frame_to_ip_lookup (wrk, 0);
-}
-
/**
* Send FIN
*/
* Sends delayed ACK when timer expires
*/
void
-tcp_timer_delack_handler (u32 index)
+tcp_timer_delack_handler (tcp_connection_t * tc)
{
- u32 thread_index = vlib_get_thread_index ();
- tcp_connection_t *tc;
-
- tc = tcp_connection_get (index, thread_index);
- tc->timers[TCP_TIMER_DELACK] = TCP_TIMER_HANDLE_INVALID;
tcp_send_ack (tc);
}
tc->bytes_retrans += n_bytes;
tc->segs_retrans += 1;
+ tcp_workerp_stats_inc (wrk, rxt_segs, 1);
TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
return n_bytes;
}
void
-tcp_timer_retransmit_handler (u32 tc_index)
+tcp_timer_retransmit_handler (tcp_connection_t * tc)
{
- u32 thread_index = vlib_get_thread_index ();
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
vlib_main_t *vm = wrk->vm;
- tcp_connection_t *tc;
vlib_buffer_t *b = 0;
u32 bi, n_bytes;
- tc = tcp_connection_get (tc_index, thread_index);
+ tcp_workerp_stats_inc (wrk, tr_events, 1);
- /* Note: the connection may have been closed and pool_put */
- if (PREDICT_FALSE (tc == 0 || tc->state == TCP_STATE_SYN_SENT))
+ /* Should be handled by a different handler */
+ if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT))
return;
- tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
-
/* Wait-close and retransmit could pop at the same time */
if (tc->state == TCP_STATE_CLOSED)
return;
session_transport_closing_notify (&tc->connection);
session_transport_closed_notify (&tc->connection);
tcp_connection_timers_reset (tc);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
+ tcp_program_cleanup (wrk, tc);
+ tcp_workerp_stats_inc (wrk, tr_abort, 1);
return;
}
{
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
tcp_connection_timers_reset (tc);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
+ tcp_program_cleanup (wrk, tc);
+ tcp_workerp_stats_inc (wrk, tr_abort, 1);
return;
}
* SYN retransmit timer handler. Active open only.
*/
void
-tcp_timer_retransmit_syn_handler (u32 tc_index)
+tcp_timer_retransmit_syn_handler (tcp_connection_t * tc)
{
- u32 thread_index = vlib_get_thread_index ();
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
vlib_main_t *vm = wrk->vm;
- tcp_connection_t *tc;
vlib_buffer_t *b = 0;
u32 bi;
- tc = tcp_half_open_connection_get (tc_index);
-
/* Note: the connection may have transitioned to ESTABLISHED... */
- if (PREDICT_FALSE (tc == 0 || tc->state != TCP_STATE_SYN_SENT))
+ if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT))
return;
- tc->timers[TCP_TIMER_RETRANSMIT_SYN] = TCP_TIMER_HANDLE_INVALID;
-
/* Half-open connection actually moved to established but we were
* waiting for syn retransmit to pop to call cleanup from the right
* thread. */
*
*/
void
-tcp_timer_persist_handler (u32 index)
+tcp_timer_persist_handler (tcp_connection_t * tc)
{
- u32 thread_index = vlib_get_thread_index ();
- tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
u32 bi, max_snd_bytes, available_bytes, offset;
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_main_t *vm = wrk->vm;
- tcp_connection_t *tc;
vlib_buffer_t *b;
int n_bytes = 0;
u8 *data;
- tc = tcp_connection_get_if_valid (index, thread_index);
- if (!tc)
- return;
-
- /* Make sure timer handle is set to invalid */
- tc->timers[TCP_TIMER_PERSIST] = TCP_TIMER_HANDLE_INVALID;
-
/* Problem already solved or worse */
if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
|| (tc->flags & TCP_CONN_FINSNT))
- return;
+ goto update_scheduler;
available_bytes = transport_max_tx_dequeue (&tc->connection);
offset = tc->snd_nxt - tc->snd_una;
}
if (available_bytes <= offset)
- {
- ASSERT (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT));
- return;
- }
+ goto update_scheduler;
/* Increment RTO backoff */
tc->rto_boff += 1;
tcp_persist_timer_set (tc);
return;
}
+
b = vlib_get_buffer (vm, bi);
data = tcp_init_buffer (vm, b);
/* Just sent new data, enable retransmit */
tcp_retransmit_timer_update (tc);
+
+ return;
+
+update_scheduler:
+
+ if (tcp_is_descheduled (tc))
+ transport_connection_reschedule (&tc->connection);
}
/**
u32 n_segs_new;
int av_wnd;
+ /* Make sure we don't exceed available window and leave space
+ * for one more packet, to avoid zero window acks */
av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
- av_wnd = clib_max (av_wnd, 0);
+ av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
snd_space = clib_min (snd_space, av_wnd);
snd_space = clib_min (max_deq, snd_space);
burst_size = clib_min (burst_size - n_segs,
* unSACKed sequence number SHOULD be returned, and RescueRxt set to
* RecoveryPoint. HighRxt MUST NOT be updated.
*/
- max_bytes = clib_min (tc->snd_mss,
- tc->snd_congestion - tc->snd_una);
+ hole = scoreboard_last_hole (sb);
+ max_bytes = clib_min (tc->snd_mss, hole->end - hole->start);
max_bytes = clib_min (max_bytes, snd_space);
- offset = tc->snd_congestion - tc->snd_una - max_bytes;
- sb->rescue_rxt = tc->snd_congestion;
+ offset = hole->end - tc->snd_una - max_bytes;
n_written = tcp_prepare_retransmit_segment (wrk, tc, offset,
max_bytes, &b);
if (!n_written)
goto done;
+ sb->rescue_rxt = tc->snd_congestion;
bi = vlib_get_buffer_index (vm, b);
tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
n_segs += 1;
if (!vec_len (tc->snd_sacks))
{
tcp_send_ack (tc);
+ tc->dupacks_out += 1;
tc->pending_dupacks = 0;
return 1;
}
tcp46_output_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 * to_next, u32 n_bufs)
{
- u32 n_trace = vlib_get_trace_count (vm, node);
tcp_connection_t *tc;
tcp_tx_trace_t *t;
vlib_buffer_t *b;
tcp_header_t *th;
int i;
- for (i = 0; i < clib_min (n_trace, n_bufs); i++)
+ for (i = 0; i < n_bufs; i++)
{
b = vlib_get_buffer (vm, to_next[i]);
+ if (!(b->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
th = vlib_buffer_get_current (b);
tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
vm->thread_index);
tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame, u8 is_ip4)
{
+ u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
u32 n_left_from, next_index, *from, *to_next;
- u32 my_thread_index = vm->thread_index;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
while (n_left_from > 0 && n_left_to_next > 0)
{
- u32 bi0;
vlib_buffer_t *b0;
tcp_tx_trace_t *t0;
tcp_header_t *th0;
- u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
+ u32 bi0;
bi0 = from[0];
to_next[0] = bi0;
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
-
- if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags,
- my_thread_index, is_ip4))
- {
- error0 = TCP_ERROR_LOOKUP_DROPS;
- next0 = TCP_RESET_NEXT_DROP;
- goto done;
- }
+ tcp_make_reset_in_place (vm, b0, is_ip4);
/* Prepare to send to IP lookup */
vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
- next0 = TCP_RESET_NEXT_IP_LOOKUP;
- done:
b0->error = node->errors[error0];
b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))