CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
- uword indent = format_get_indent (s);
+ u32 indent = format_get_indent (s);
s = format (s, "%U\n%U%U",
format_tcp_header, &t->tcp_header, 128,
case TCP_STATE_SYN_SENT:
return tcp_make_syn_options (opts, tc->rcv_wscale);
default:
- clib_warning ("Not handled!");
+ clib_warning ("State not handled! %d", state);
return 0;
}
}
always_inline int
tcp_alloc_tx_buffers (tcp_main_t * tm, u8 thread_index, u32 n_free_buffers)
{
+ vlib_main_t *vm = vlib_get_main ();
u32 current_length = vec_len (tm->tx_buffers[thread_index]);
+ u32 n_allocated;
vec_validate (tm->tx_buffers[thread_index],
current_length + n_free_buffers - 1);
- _vec_len (tm->tx_buffers[thread_index]) = current_length
- + vlib_buffer_alloc (vlib_get_main (),
- &tm->tx_buffers[thread_index][current_length],
- n_free_buffers);
+ n_allocated =
+ vlib_buffer_alloc (vm, &tm->tx_buffers[thread_index][current_length],
+ n_free_buffers);
+ _vec_len (tm->tx_buffers[thread_index]) = current_length + n_allocated;
/* buffer shortage, report failure */
if (vec_len (tm->tx_buffers[thread_index]) == 0)
{
{
u32 *my_tx_buffers;
u32 thread_index = vlib_get_thread_index ();
+
+ TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL (thread_index);
+
if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0))
{
if (tcp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE))
tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
{
ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
- b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK;
+ b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST;
b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
b->total_length_not_including_first_buffer = 0;
vnet_buffer (b)->tcp.flags = 0;
-
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
/* Leave enough space for headers */
return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
}
initial_wnd);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
tcp_options_write ((u8 *) (th + 1), &snd_opts);
-
- tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
- tc->rto * TCP_TO_TIMER_TICK);
}
/**
}
always_inline void
-tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
- u8 is_ip4)
+tcp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4, u8 flush)
{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ u32 thread_index = vlib_get_thread_index ();
u32 *to_next, next_index;
vlib_frame_t *f;
b->error = 0;
/* Default FIB for now */
- vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
/* Send to IP lookup */
next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
- f = vlib_get_frame_to_node (vm, next_index);
+ tcp_trajectory_add_start (b, 1);
+
+ f = tm->ip_lookup_tx_frames[!is_ip4][thread_index];
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, next_index);
+ ASSERT (f);
+ tm->ip_lookup_tx_frames[!is_ip4][thread_index] = f;
+ }
- /* Enqueue the packet */
to_next = vlib_frame_vector_args (f);
- to_next[0] = bi;
- f->n_vectors = 1;
- vlib_put_frame_to_node (vm, next_index, f);
+ to_next[f->n_vectors] = bi;
+ f->n_vectors += 1;
+ if (flush || f->n_vectors == VLIB_FRAME_SIZE)
+ {
+ vlib_put_frame_to_node (vm, next_index, f);
+ tm->ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+ }
+}
+
+always_inline void
+tcp_enqueue_to_ip_lookup_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4)
+{
+ tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 1);
+}
+
+always_inline void
+tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4)
+{
+ tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 0);
}
always_inline void
/* Decide where to send the packet */
next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
-
- /* Initialize the trajectory trace, if configured */
- if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
- {
- b->pre_data[0] = 1;
- b->pre_data[1] = next_index;
- }
+ tcp_trajectory_add_start (b, 2);
/* Get frame to v4/6 output node */
f = tm->tx_frames[!is_ip4][thread_index];
}
tcp_reuse_buffer (vm, b0);
+ tcp_trajectory_add_start (b0, 4);
th0 = vlib_buffer_push_tcp_net_order (b0, dst_port, src_port, seq, ack,
sizeof (tcp_header_t), flags, 0);
ASSERT (!bogus);
}
- tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4);
+ tcp_enqueue_to_ip_lookup_now (vm, b, bi, is_ip4);
TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
ASSERT (opts_write_len == tc->snd_opts_len);
vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
- tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4);
+ if (tc->c_is_ip4)
+ {
+ ip4_header_t *ih4;
+ ih4 = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip.ip4,
+ &tc->c_rmt_ip.ip4, IP_PROTOCOL_TCP, 0);
+ th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
+ }
+ else
+ {
+ int bogus = ~0;
+ ip6_header_t *ih6;
+ ih6 = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip.ip6,
+ &tc->c_rmt_ip.ip6, IP_PROTOCOL_TCP);
+ th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
+ ASSERT (!bogus);
+ }
+ tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
void
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_main_t *vm = vlib_get_main ();
+ /*
+ * Setup retransmit and establish timers before requesting buffer
+ * such that we can return if we've ran out.
+ */
+ tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
+ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
+ tc->rto * TCP_TO_TIMER_TICK);
+
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
tc->rtt_seq = tc->snd_nxt;
tc->rto_boff = 0;
- /* Set the connection establishment timer */
- tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
-
tcp_push_ip_hdr (tm, tc, b);
tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc);
}
/**
- * Flush both v4 and v6 tx frames for thread index
+ * Flush ip lookup tx frames populated by timer pops
+ */
+always_inline void
+tcp_flush_frame_to_ip_lookup (vlib_main_t * vm, u8 thread_index, u8 is_ip4)
+{
+ if (tcp_main.ip_lookup_tx_frames[!is_ip4][thread_index])
+ {
+ u32 next_index;
+ next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
+ vlib_put_frame_to_node (vm, next_index,
+ tcp_main.ip_lookup_tx_frames[!is_ip4]
+ [thread_index]);
+ tcp_main.ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+ }
+}
+
+/**
+ * Flush v4 and v6 tcp and ip-lookup tx frames for thread index
*/
void
tcp_flush_frames_to_output (u8 thread_index)
vlib_main_t *vm = vlib_get_main ();
tcp_flush_frame_to_output (vm, thread_index, 1);
tcp_flush_frame_to_output (vm, thread_index, 0);
+ tcp_flush_frame_to_ip_lookup (vm, thread_index, 1);
+ tcp_flush_frame_to_ip_lookup (vm, thread_index, 0);
}
/**
void
tcp_send_fin (tcp_connection_t * tc)
{
- vlib_buffer_t *b;
- u32 bi;
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_main_t *vm = vlib_get_main ();
+ vlib_buffer_t *b;
+ u32 bi;
+ u8 fin_snt = 0;
+ tcp_retransmit_timer_force_update (tc);
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
b = vlib_get_buffer (vm, bi);
- /* buffer will be initialized by in tcp_make_fin */
+ fin_snt = tc->flags & TCP_CONN_FINSNT;
+ if (fin_snt)
+ tc->snd_nxt = tc->snd_una;
tcp_make_fin (tc, b);
tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4);
- if (!(tc->flags & TCP_CONN_FINSNT))
+ if (!fin_snt)
{
tc->flags |= TCP_CONN_FINSNT;
tc->flags &= ~TCP_CONN_FINPNDG;
- tc->snd_nxt += 1;
+ /* Account for the FIN */
+ tc->snd_una_max += 1;
+ tc->snd_nxt = tc->snd_una_max;
+ }
+ else
+ {
+ tc->snd_nxt = tc->snd_una_max;
}
- tcp_retransmit_timer_force_update (tc);
TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
}
ASSERT (n_peeked == len_to_deq);
n_bytes += n_peeked;
chain_b->current_length = n_peeked;
- chain_b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK;
+ chain_b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST;
chain_b->next_buffer = 0;
/* update previous buffer */
tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
tc->cwnd = tcp_loss_wnd (tc);
tc->snd_congestion = tc->snd_una_max;
-
+ tc->rtt_ts = 0;
tcp_recovery_on (tc);
}
if (tcp_is_lost_fin (tc))
{
tcp_send_fin (tc);
+ tc->rto_boff += 1;
+ tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+ return;
+ }
+
+ /* Shouldn't be here */
+ if (tc->snd_una == tc->snd_una_max)
+ {
+ tcp_recovery_off (tc);
return;
}
/* Increment RTO backoff (also equal to number of retries) and go back
* to first un-acked byte */
tc->rto_boff += 1;
- tc->snd_nxt = tc->snd_una;
/* First retransmit timeout */
if (tc->rto_boff == 1)
tcp_rtx_timeout_cc (tc);
- /* Exponential backoff */
+ tc->snd_nxt = tc->snd_una;
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
if (n_bytes == 0)
{
- ASSERT (!b);
- if (tc->snd_una == tc->snd_una_max)
- return;
- ASSERT (tc->rto_boff > 1 && tc->snd_una == tc->snd_congestion);
- clib_warning ("retransmit fail: %U", format_tcp_connection, tc, 2);
- /* Try again eventually */
tcp_retransmit_timer_set (tc);
return;
}
if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+ tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN,
+ tc->rto * TCP_TO_TIMER_TICK);
+
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
else if (tc->state == TCP_STATE_SYN_RCVD)
{
tc->rto_boff += 1;
- tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+ if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
+ tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
tc->rtt_ts = 0;
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
- return;
+ {
+ tcp_retransmit_timer_force_update (tc);
+ return;
+ }
b = vlib_get_buffer (vm, bi);
tcp_make_synack (tc, b);
else
{
ASSERT (tc->state == TCP_STATE_CLOSED);
- clib_warning ("connection closed ...");
return;
}
}
u32 thread_index = vlib_get_thread_index ();
tcp_connection_t *tc;
vlib_buffer_t *b;
- u32 bi, old_snd_nxt, max_snd_bytes, available_bytes, offset;
+ u32 bi, max_snd_bytes, available_bytes, offset;
int n_bytes = 0;
u8 *data;
n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
max_snd_bytes);
b->current_length = n_bytes;
- ASSERT (n_bytes != 0 && (tc->snd_nxt == tc->snd_una_max || tc->rto_boff > 1
- || tcp_timer_is_active (tc,
- TCP_TIMER_RETRANSMIT)));
+ ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
+ || tc->snd_nxt == tc->snd_una_max
+ || tc->rto_boff > 1));
- /* Allow updating of snd_una_max but don't update snd_nxt */
- old_snd_nxt = tc->snd_nxt;
tcp_push_hdr_i (tc, b, tc->state, 0);
- tc->snd_nxt = old_snd_nxt;
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
/* Just sent new data, enable retransmit */
always_inline u32
tcp_session_has_ooo_data (tcp_connection_t * tc)
{
- stream_session_t *s =
- stream_session_get (tc->c_s_index, tc->c_thread_index);
+ stream_session_t *s = session_get (tc->c_s_index, tc->c_thread_index);
return svm_fifo_has_ooo_data (s->server_rx_fifo);
}
tc->rtt_ts = tcp_time_now ();
tc->rtt_seq = tc->snd_nxt;
}
+ tcp_trajectory_add_start (b, 3);
return 0;
}
}
/* Prepare to send to IP lookup */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
next0 = TCP_RESET_NEXT_IP_LOOKUP;
done: