return clib_min (tc->rcv_wnd, TCP_WND_MAX);
}
-/**
- * Compute and return window to advertise, scaled as per RFC1323
- */
-u32
-tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
-{
- if (state < TCP_STATE_ESTABLISHED)
- return tcp_initial_window_to_advertise (tc);
-
- tcp_update_rcv_wnd (tc);
-
- if (tc->rcv_wnd == 0)
- {
- tc->flags |= TCP_CONN_SENT_RCV_WND0;
- }
- else
- {
- tc->flags &= ~TCP_CONN_SENT_RCV_WND0;
- }
-
- return tc->rcv_wnd >> tc->rcv_wscale;
-}
-
-void
+static void
tcp_update_rcv_wnd (tcp_connection_t * tc)
{
i32 observed_wnd;
tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
}
+/**
+ * Compute and return window to advertise, scaled as per RFC1323
+ */
+static u32
+tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
+{
+ if (state < TCP_STATE_ESTABLISHED)
+ return tcp_initial_window_to_advertise (tc);
+
+ tcp_update_rcv_wnd (tc);
+
+ if (tc->rcv_wnd == 0)
+ {
+ tc->flags |= TCP_CONN_SENT_RCV_WND0;
+ }
+ else
+ {
+ tc->flags &= ~TCP_CONN_SENT_RCV_WND0;
+ }
+
+ return tc->rcv_wnd >> tc->rcv_wscale;
+}
+
/**
* Write TCP options to segment.
*/
-u32
+static u32
tcp_options_write (u8 * data, tcp_options_t * opts)
{
u32 opts_len = 0;
return opts_len;
}
-always_inline int
+static int
tcp_make_syn_options (tcp_options_t * opts, u8 wnd_scale)
{
u8 len = 0;
return len;
}
-always_inline int
+static int
tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts)
{
u8 len = 0;
return len;
}
-always_inline int
+static int
tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts)
{
u8 len = 0;
}
/**
- * Update snd_mss to reflect the effective segment size that we can send
- * by taking into account all TCP options, including SACKs
+ * Update burst send vars
+ *
+ * - Updates snd_mss to reflect the effective segment size that we can send
+ * by taking into account all TCP options, including SACKs.
+ * - Cache 'on the wire' options for reuse
+ * - Updates receive window which can be reused for a burst.
+ *
+ * This should *only* be called when doing bursts
*/
void
-tcp_update_snd_mss (tcp_connection_t * tc)
+tcp_update_burst_snd_vars (tcp_connection_t * tc)
{
+ tcp_main_t *tm = &tcp_main;
+
/* Compute options to be used for connection. These may be reused when
* sending data or to compute the effective mss (snd_mss) */
- tc->snd_opts_len =
- tcp_make_options (tc, &tc->snd_opts, TCP_STATE_ESTABLISHED);
+ tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts,
+ TCP_STATE_ESTABLISHED);
/* XXX check if MTU has been updated */
tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
ASSERT (tc->snd_mss > 0);
+
+ tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
+ &tc->snd_opts);
+
+ tcp_update_rcv_wnd (tc);
}
void
tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP;
}
-always_inline int
+static int
tcp_alloc_tx_buffers (tcp_main_t * tm, u8 thread_index, u16 * n_bufs,
u32 wanted)
{
+ tcp_worker_ctx_t *ctx = &tm->wrk_ctx[thread_index];
vlib_main_t *vm = vlib_get_main ();
u32 n_alloc;
ASSERT (wanted > *n_bufs);
- vec_validate_aligned (tm->tx_buffers[thread_index], wanted - 1,
- CLIB_CACHE_LINE_BYTES);
- n_alloc = vlib_buffer_alloc (vm, &tm->tx_buffers[thread_index][*n_bufs],
+ vec_validate_aligned (ctx->tx_buffers, wanted - 1, CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc (vm, &ctx->tx_buffers[*n_bufs],
wanted - *n_bufs);
*n_bufs += n_alloc;
- _vec_len (tm->tx_buffers[thread_index]) = *n_bufs;
+ _vec_len (ctx->tx_buffers) = *n_bufs;
return n_alloc;
}
tcp_get_free_buffer_index (tcp_main_t * tm, u32 * bidx)
{
u32 thread_index = vlib_get_thread_index ();
- u16 n_bufs = vec_len (tm->tx_buffers[thread_index]);
+ tcp_worker_ctx_t *ctx = &tm->wrk_ctx[thread_index];
+ u16 n_bufs = vec_len (ctx->tx_buffers);
TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL (thread_index);
return -1;
}
}
- *bidx = tm->tx_buffers[thread_index][--n_bufs];
- _vec_len (tm->tx_buffers[thread_index]) = n_bufs;
+ *bidx = ctx->tx_buffers[--n_bufs];
+ _vec_len (ctx->tx_buffers) = n_bufs;
return 0;
}
-always_inline void *
+static void *
tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
{
if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
}
-always_inline void *
+static void *
tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
{
ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
/**
* Prepare ACK
*/
-void
+static void
tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
u8 flags)
{
next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
tcp_trajectory_add_start (b, 1);
- f = tm->ip_lookup_tx_frames[!is_ip4][thread_index];
+ f = tm->wrk_ctx[thread_index].ip_lookup_tx_frames[!is_ip4];
if (!f)
{
f = vlib_get_frame_to_node (vm, next_index);
ASSERT (f);
- tm->ip_lookup_tx_frames[!is_ip4][thread_index] = f;
+ tm->wrk_ctx[thread_index].ip_lookup_tx_frames[!is_ip4] = f;
}
to_next = vlib_frame_vector_args (f);
if (flush || f->n_vectors == VLIB_FRAME_SIZE)
{
vlib_put_frame_to_node (vm, next_index, f);
- tm->ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+ tm->wrk_ctx[thread_index].ip_lookup_tx_frames[!is_ip4] = 0;
}
}
-always_inline void
+static void
tcp_enqueue_to_ip_lookup_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
u8 is_ip4, u32 fib_index)
{
tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, fib_index, 1);
}
-always_inline void
+static void
tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
u8 is_ip4, u32 fib_index)
{
tcp_trajectory_add_start (b, 2);
/* Get frame to v4/6 output node */
- f = tm->tx_frames[!is_ip4][thread_index];
+ f = tm->wrk_ctx[thread_index].tx_frames[!is_ip4];
if (!f)
{
f = vlib_get_frame_to_node (vm, next_index);
ASSERT (f);
- tm->tx_frames[!is_ip4][thread_index] = f;
+ tm->wrk_ctx[thread_index].tx_frames[!is_ip4] = f;
}
to_next = vlib_frame_vector_args (f);
to_next[f->n_vectors] = bi;
if (flush || f->n_vectors == VLIB_FRAME_SIZE)
{
vlib_put_frame_to_node (vm, next_index, f);
- tm->tx_frames[!is_ip4][thread_index] = 0;
+ tm->wrk_ctx[thread_index].tx_frames[!is_ip4] = 0;
}
}
-always_inline void
+static void
tcp_enqueue_to_output (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, u8 is_ip4)
{
tcp_enqueue_to_output_i (vm, b, bi, is_ip4, 0);
}
-always_inline void
+static void
tcp_enqueue_to_output_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
u8 is_ip4)
{
tcp_enqueue_to_output_i (vm, b, bi, is_ip4, 1);
}
-int
+static int
tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0,
tcp_state_t state, u8 thread_index, u8 is_ip4)
{
TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
-void
+static void
tcp_push_ip_hdr (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b)
{
tcp_header_t *th = vlib_buffer_get_current (b);
void
tcp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, u8 is_ip4)
{
- if (tcp_main.tx_frames[!is_ip4][thread_index])
+ if (tcp_main.wrk_ctx[thread_index].tx_frames[!is_ip4])
{
u32 next_index;
next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
vlib_put_frame_to_node (vm, next_index,
- tcp_main.tx_frames[!is_ip4][thread_index]);
- tcp_main.tx_frames[!is_ip4][thread_index] = 0;
+ tcp_main.
+ wrk_ctx[thread_index].tx_frames[!is_ip4]);
+ tcp_main.wrk_ctx[thread_index].tx_frames[!is_ip4] = 0;
}
}
/**
* Flush ip lookup tx frames populated by timer pops
*/
-always_inline void
+static void
tcp_flush_frame_to_ip_lookup (vlib_main_t * vm, u8 thread_index, u8 is_ip4)
{
- if (tcp_main.ip_lookup_tx_frames[!is_ip4][thread_index])
+ if (tcp_main.wrk_ctx[thread_index].ip_lookup_tx_frames[!is_ip4])
{
u32 next_index;
next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
vlib_put_frame_to_node (vm, next_index,
- tcp_main.ip_lookup_tx_frames[!is_ip4]
- [thread_index]);
- tcp_main.ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+ tcp_main.
+ wrk_ctx[thread_index].ip_lookup_tx_frames
+ [!is_ip4]);
+ tcp_main.wrk_ctx[thread_index].ip_lookup_tx_frames[!is_ip4] = 0;
}
}
/**
* Push TCP header and update connection variables
*/
-static void
+always_inline void
tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b,
- tcp_state_t next_state, u8 compute_opts)
+ tcp_state_t next_state, u8 compute_opts, u8 maybe_burst)
{
u32 advertise_wnd, data_len;
- u8 tcp_hdr_opts_len, opts_write_len, flags;
+ u8 tcp_hdr_opts_len, flags;
+ tcp_main_t *tm = &tcp_main;
tcp_header_t *th;
- data_len = b->current_length + b->total_length_not_including_first_buffer;
- ASSERT (!b->total_length_not_including_first_buffer
- || (b->flags & VLIB_BUFFER_NEXT_PRESENT));
+ data_len = b->current_length;
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ data_len += b->total_length_not_including_first_buffer;
+
vnet_buffer (b)->tcp.flags = 0;
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
if (compute_opts)
tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
- advertise_wnd = tcp_window_to_advertise (tc, next_state);
+
+ if (maybe_burst)
+ advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
+ else
+ advertise_wnd = tcp_window_to_advertise (tc, next_state);
+
flags = tcp_make_state_flags (tc, next_state);
- /* Push header and options */
th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
tc->rcv_nxt, tcp_hdr_opts_len, flags,
advertise_wnd);
- opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
- ASSERT (opts_write_len == tc->snd_opts_len);
- vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+ if (maybe_burst)
+ {
+ clib_memcpy ((u8 *) (th + 1),
+ tm->wrk_ctx[tc->c_thread_index].cached_opts,
+ tc->snd_opts_len);
+ }
+ else
+ {
+ u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
+ ASSERT (len == tc->snd_opts_len);
+ }
/*
* Update connection variables
TCP_EVT_DBG (TCP_EVT_PKTIZE, tc);
}
+u32
+tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+ tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0,
+ /* burst */ 1);
+ tc->snd_una_max = tc->snd_nxt;
+ ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
+ tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
+ /* If not tracking an ACK, start tracking */
+ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
+ {
+ tc->rtt_ts = tcp_time_now ();
+ tc->rtt_seq = tc->snd_nxt;
+ }
+ if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
+ {
+ tcp_retransmit_timer_set (tc);
+ tc->rto_boff = 0;
+ }
+ tcp_trajectory_add_start (b, 3);
+ return 0;
+}
+
void
tcp_send_ack (tcp_connection_t * tc)
{
* @return the number of bytes in the segment or 0 if there's nothing to
* retransmit
*/
-u32
+static u32
tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset,
u32 max_deq_bytes, vlib_buffer_t ** b)
{
max_deq_bytes);
ASSERT (n_bytes == max_deq_bytes);
b[0]->current_length = n_bytes;
- tcp_push_hdr_i (tc, *b, tc->state, 0);
+ tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0);
+ if (seq_gt (tc->snd_nxt, tc->snd_una_max))
+ tc->snd_una_max = tc->snd_nxt;
}
/* Split mss into multiple buffers */
else
/* Make sure we have enough buffers */
n_bufs_per_seg = ceil ((double) seg_size / tm->bytes_per_buffer);
- available_bufs = vec_len (tm->tx_buffers[thread_index]);
+ available_bufs = vec_len (tm->wrk_ctx[thread_index].tx_buffers);
if (n_bufs_per_seg > available_bufs)
{
tcp_alloc_tx_buffers (tm, thread_index, &available_bufs,
b[0]->total_length_not_including_first_buffer += n_peeked;
}
- tcp_push_hdr_i (tc, *b, tc->state, 0);
+ tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0);
+ if (seq_gt (tc->snd_nxt, tc->snd_una_max))
+ tc->snd_una_max = tc->snd_nxt;
}
ASSERT (n_bytes > 0);
tcp_recovery_on (tc);
}
-static void
+static inline void
tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
{
tcp_main_t *tm = vnet_get_tcp_main ();
|| tc->snd_nxt == tc->snd_una_max
|| tc->rto_boff > 1));
- tcp_push_hdr_i (tc, b, tc->state, 0);
+ tcp_push_hdr_i (tc, b, tc->state, /* compute opts */ 0, /* burst */ 0);
tc->snd_una_max = tc->snd_nxt;
tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
tcp_fast_retransmit_no_sack (tc);
}
-always_inline u32
+static u32
tcp_session_has_ooo_data (tcp_connection_t * tc)
{
stream_session_t *s = session_get (tc->c_s_index, tc->c_thread_index);
}
}
-static inline void
+always_inline void
tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
tcp_connection_t * tc0, u8 is_ip4)
{
}
}
-static inline void
+always_inline void
tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0,
u32 * error0, u16 * next0, u8 is_ip4)
{
if (n_left_from > 1)
{
- vlib_prefetch_buffer_header (b[0], STORE);
- CLIB_PREFETCH (b[0]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (b[1], STORE);
+ CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
}
next[0] = TCP_OUTPUT_NEXT_IP_LOOKUP;
VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output);
-u32
-tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
-{
- tcp_connection_t *tc = (tcp_connection_t *) tconn;
- tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, 0);
- tc->snd_una_max = tc->snd_nxt;
- ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
- tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
- /* If not tracking an ACK, start tracking */
- if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
- {
- tc->rtt_ts = tcp_time_now ();
- tc->rtt_seq = tc->snd_nxt;
- }
- if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
- {
- tcp_retransmit_timer_set (tc);
- tc->rto_boff = 0;
- }
- tcp_trajectory_add_start (b, 3);
- return 0;
-}
-
typedef enum _tcp_reset_next
{
TCP_RESET_NEXT_DROP,