#include <vnet/tcp/tcp.h>
#include <math.h>
-vlib_node_registration_t tcp4_output_node;
-vlib_node_registration_t tcp6_output_node;
-
typedef enum _tcp_output_next
{
TCP_OUTPUT_NEXT_DROP,
tcp_connection_t tcp_connection;
} tcp_tx_trace_t;
+#ifndef CLIB_MARCH_VARIANT
u16 dummy_mtu = 1460;
+#endif /* CLIB_MARCH_VARIANT */
-u8 *
+static u8 *
format_tcp_tx_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
return s;
}
+#ifndef CLIB_MARCH_VARIANT
static u8
tcp_window_compute_scale (u32 window)
{
/* Make sure we have a multiple of rcv_wscale */
if (wnd && tc->rcv_wscale)
{
- wnd &= ~(1 << tc->rcv_wscale);
+ wnd &= ~((1 << tc->rcv_wscale) - 1);
if (wnd == 0)
wnd = 1 << tc->rcv_wscale;
}
if (tcp_opts_sack (opts))
{
int i;
- u32 n_sack_blocks = clib_min (vec_len (opts->sacks),
- TCP_OPTS_MAX_SACK_BLOCKS);
- if (n_sack_blocks != 0)
+ if (opts->n_sack_blocks != 0)
{
*data++ = TCP_OPTION_SACK_BLOCK;
- *data++ = 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
- for (i = 0; i < n_sack_blocks; i++)
+ *data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
+ for (i = 0; i < opts->n_sack_blocks; i++)
{
buf = clib_host_to_net_u32 (opts->sacks[i].start);
clib_memcpy_fast (data, &buf, seq_len);
clib_memcpy_fast (data, &buf, seq_len);
data += seq_len;
}
- opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
+ opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
}
}
if (vec_len (tc->snd_sacks))
{
opts->flags |= TCP_OPTS_FLAG_SACK;
- opts->sacks = tc->snd_sacks;
- opts->n_sack_blocks = clib_min (vec_len (tc->snd_sacks),
+ if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
+ tc->snd_sack_pos = 0;
+ opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
+ opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
+ opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
TCP_OPTS_MAX_SACK_BLOCKS);
+ tc->snd_sack_pos += opts->n_sack_blocks;
len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
}
}
if (tcp_opts_tstamp (&tc->rcv_opts))
tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP;
}
+#endif /* CLIB_MARCH_VARIANT */
static void *
tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
}
+#ifndef CLIB_MARCH_VARIANT
static void *
tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
{
{
tcp_enqueue_to_output_i (wrk, b, bi, is_ip4, 1);
}
+#endif /* CLIB_MARCH_VARIANT */
static int
tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0,
return 0;
}
+#ifndef CLIB_MARCH_VARIANT
/**
* Send reset without reusing existing buffer
*
}
}
-always_inline u8
-tcp_make_state_flags (tcp_connection_t * tc, tcp_state_t next_state)
-{
- switch (next_state)
- {
- case TCP_STATE_ESTABLISHED:
- case TCP_STATE_CLOSE_WAIT:
- case TCP_STATE_TIME_WAIT:
- case TCP_STATE_FIN_WAIT_2:
- return TCP_FLAG_ACK;
- case TCP_STATE_SYN_RCVD:
- return TCP_FLAG_SYN | TCP_FLAG_ACK;
- case TCP_STATE_SYN_SENT:
- return TCP_FLAG_SYN;
- case TCP_STATE_LAST_ACK:
- case TCP_STATE_FIN_WAIT_1:
- case TCP_STATE_CLOSING:
- if (tc->snd_nxt + 1 < tc->snd_una_max)
- return TCP_FLAG_ACK;
- else
- return TCP_FLAG_FIN;
- default:
- clib_warning ("Shouldn't be here!");
- }
- return 0;
-}
-
/**
- * Push TCP header and update connection variables
+ * Push TCP header and update connection variables. Should only be called
+ * for segments with data, not for 'control' packets.
*/
always_inline void
tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b,
tcp_state_t next_state, u8 compute_opts, u8 maybe_burst)
{
+ u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
u32 advertise_wnd, data_len;
- u8 tcp_hdr_opts_len, flags;
tcp_main_t *tm = &tcp_main;
tcp_header_t *th;
else
advertise_wnd = tcp_window_to_advertise (tc, next_state);
- flags = tcp_make_state_flags (tc, next_state);
if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
{
if (seq_geq (tc->psh_seq, tc->snd_nxt)
}
u32
-tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b)
+tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
{
+ tcp_connection_t *tc = (tcp_connection_t *) tconn;
tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0,
/* burst */ 1);
tc->snd_una_max = tc->snd_nxt;
{
tc = tcp_connection_get (pending_acks[i], thread_index);
tc->flags &= ~TCP_CONN_SNDACK;
- n_acks = clib_max (1, tc->pending_dupacks);
+ if (!tc->pending_dupacks)
+ {
+ tcp_send_ack (tc);
+ continue;
+ }
+
/* If we're supposed to send dupacks but have no ooo data
* send only one ack */
- if (tc->pending_dupacks && !vec_len (tc->snd_sacks))
- n_acks = 1;
+ if (!vec_len (tc->snd_sacks))
+ {
+ tcp_send_ack (tc);
+ continue;
+ }
+
+ /* Start with first sack block */
+ tc->snd_sack_pos = 0;
+
+ /* Generate enough dupacks to cover all sack blocks. Do not generate
+ * more sacks than the number of packets received. But do generate at
+ * least 3, i.e., the number needed to signal congestion, if needed. */
+ n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
+ n_acks = clib_min (n_acks, tc->pending_dupacks);
+ n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
for (j = 0; j < n_acks; j++)
tcp_send_ack (tc);
+
tc->pending_dupacks = 0;
+ tc->snd_sack_pos = 0;
}
_vec_len (wrk->pending_acks) = 0;
}
return 0;
*b = vlib_get_buffer (vm, bi);
data = tcp_init_buffer (vm, *b);
- n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
- max_deq_bytes);
+ n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
+ max_deq_bytes);
ASSERT (n_bytes == max_deq_bytes);
b[0]->current_length = n_bytes;
tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0);
*b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]);
data = tcp_init_buffer (vm, *b);
- n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
- bytes_per_buffer -
- TRANSPORT_MAX_HDRS_LEN);
+ n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
+ bytes_per_buffer -
+ TRANSPORT_MAX_HDRS_LEN);
b[0]->current_length = n_bytes;
b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
b[0]->total_length_not_including_first_buffer = 0;
chain_b = vlib_get_buffer (vm, chain_bi);
chain_b->current_data = 0;
data = vlib_buffer_get_current (chain_b);
- n_peeked = stream_session_peek_bytes (&tc->connection, data,
- offset + n_bytes, len_to_deq);
+ n_peeked = session_tx_fifo_peek_bytes (&tc->connection, data,
+ offset + n_bytes,
+ len_to_deq);
ASSERT (n_peeked == len_to_deq);
n_bytes += n_peeked;
chain_b->current_length = n_peeked;
/*
* Make sure we can retransmit something
*/
- available_bytes = session_tx_fifo_max_dequeue (&tc->connection);
+ available_bytes = transport_max_tx_dequeue (&tc->connection);
ASSERT (available_bytes >= offset);
available_bytes -= offset;
if (!available_bytes)
u8 *data;
tc = tcp_connection_get_if_valid (index, thread_index);
-
if (!tc)
return;
tc->timers[TCP_TIMER_PERSIST] = TCP_TIMER_HANDLE_INVALID;
/* Problem already solved or worse */
- if (tc->state == TCP_STATE_CLOSED || tc->state > TCP_STATE_ESTABLISHED
- || tc->snd_wnd > tc->snd_mss)
+ if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
+ || (tc->flags & TCP_CONN_FINSNT))
return;
- available_bytes = session_tx_fifo_max_dequeue (&tc->connection);
+ available_bytes = transport_max_tx_dequeue (&tc->connection);
offset = tc->snd_una_max - tc->snd_una;
/* Reprogram persist if no new bytes available to send. We may have data
tcp_validate_txf_size (tc, offset);
tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
- max_snd_bytes =
- clib_min (tc->snd_mss, tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
- n_bytes =
- stream_session_peek_bytes (&tc->connection, data, offset, max_snd_bytes);
+ max_snd_bytes = clib_min (tc->snd_mss,
+ tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
+ n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
+ max_snd_bytes);
b->current_length = n_bytes;
ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
|| tc->snd_nxt == tc->snd_una_max
sb = &tc->sack_sb;
hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
- max_deq = session_tx_fifo_max_dequeue (&tc->connection);
+ max_deq = transport_max_tx_dequeue (&tc->connection);
max_deq -= tc->snd_una_max - tc->snd_una;
while (snd_space > 0 && n_segs < burst_size)
if (snd_space < tc->snd_mss || tc->snd_mss == 0)
goto done;
- max_deq = session_tx_fifo_max_dequeue (&tc->connection);
+ max_deq = transport_max_tx_dequeue (&tc->connection);
max_deq -= tc->snd_una_max - tc->snd_una;
if (max_deq)
{
else
return tcp_fast_retransmit_no_sack (wrk, tc, burst_size);
}
+#endif /* CLIB_MARCH_VARIANT */
static void
tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0,
return frame->n_vectors;
}
-static uword
-tcp4_output (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (tcp4_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
{
return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
}
-static uword
-tcp6_output (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (tcp6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
{
return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_output_node) =
{
- .function = tcp4_output,
.name = "tcp4-output",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
};
/* *INDENT-ON* */
-VLIB_NODE_FUNCTION_MULTIARCH (tcp4_output_node, tcp4_output);
-
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_output_node) =
{
- .function = tcp6_output,
.name = "tcp6-output",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
};
/* *INDENT-ON* */
-VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output);
-
typedef enum _tcp_reset_next
{
TCP_RESET_NEXT_DROP,
return from_frame->n_vectors;
}
-static uword
-tcp4_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (tcp4_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
{
return tcp46_send_reset_inline (vm, node, from_frame, 1);
}
-static uword
-tcp6_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (tcp6_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
{
return tcp46_send_reset_inline (vm, node, from_frame, 0);
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_reset_node) = {
- .function = tcp4_send_reset,
.name = "tcp4-reset",
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
};
/* *INDENT-ON* */
-VLIB_NODE_FUNCTION_MULTIARCH (tcp4_reset_node, tcp4_send_reset);
-
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_reset_node) = {
- .function = tcp6_send_reset,
.name = "tcp6-reset",
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
};
/* *INDENT-ON* */
-VLIB_NODE_FUNCTION_MULTIARCH (tcp6_reset_node, tcp6_send_reset);
-
/*
* fd.io coding-style-patch-verification: ON
*