{
TCP_OUTPUT_NEXT_DROP,
TCP_OUTPUT_NEXT_IP_LOOKUP,
+ TCP_OUTPUT_NEXT_IP_REWRITE,
+ TCP_OUTPUT_NEXT_IP_ARP,
TCP_OUTPUT_N_NEXT
} tcp_output_next_t;
#define foreach_tcp4_output_next \
_ (DROP, "error-drop") \
- _ (IP_LOOKUP, "ip4-lookup")
+ _ (IP_LOOKUP, "ip4-lookup") \
+ _ (IP_REWRITE, "ip4-rewrite") \
+ _ (IP_ARP, "ip4-arp")
#define foreach_tcp6_output_next \
_ (DROP, "error-drop") \
- _ (IP_LOOKUP, "ip6-lookup")
+ _ (IP_LOOKUP, "ip6-lookup") \
+ _ (IP_REWRITE, "ip6-rewrite") \
+ _ (IP_ARP, "ip6-discover-neighbor")
static char *tcp_error_strings[] = {
#define tcp_error(n,s) s,
/*
* Figure out how much space we have available
*/
- available_space = stream_session_max_rx_enqueue (&tc->connection);
- max_fifo = stream_session_rx_fifo_size (&tc->connection);
+ available_space = transport_max_rx_enqueue (&tc->connection);
+ max_fifo = transport_rx_fifo_size (&tc->connection);
ASSERT (tc->rcv_opts.mss < max_fifo);
if (available_space < tc->rcv_opts.mss && available_space < max_fifo >> 3)
{
case TCP_STATE_ESTABLISHED:
case TCP_STATE_FIN_WAIT_1:
+ case TCP_STATE_CLOSED:
+ case TCP_STATE_CLOSE_WAIT:
return tcp_make_established_options (tc, opts);
case TCP_STATE_SYN_RCVD:
return tcp_make_synack_options (tc, opts);
u8 is_ip4, u32 fib_index)
{
tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, fib_index, 0);
+ if (vm->thread_index == 0 && vlib_num_workers ())
+ session_flush_frames_main_thread (vm);
}
always_inline void
/*
* Make sure we can retransmit something
*/
- available_bytes = stream_session_tx_fifo_max_dequeue (&tc->connection);
+ available_bytes = session_tx_fifo_max_dequeue (&tc->connection);
ASSERT (available_bytes >= offset);
available_bytes -= offset;
if (!available_bytes)
* Reset congestion control, switch cwnd to loss window and try again.
*/
static void
-tcp_rtx_timeout_cc (tcp_connection_t * tc)
+tcp_rxt_timeout_cc (tcp_connection_t * tc)
{
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 6);
tc->prev_ssthresh = tc->ssthresh;
tc->prev_cwnd = tc->cwnd;
tcp_cc_fastrecovery_exit (tc);
/* Start again from the beginning */
- tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
+ tc->cc_algo->congestion (tc);
tc->cwnd = tcp_loss_wnd (tc);
tc->snd_congestion = tc->snd_una_max;
tc->rtt_ts = 0;
+ tc->cwnd_acc_bytes = 0;
+
tcp_recovery_on (tc);
}
tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
}
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
+
if (tc->state >= TCP_STATE_ESTABLISHED)
{
/* Lost FIN, retransmit and return */
}
/* Shouldn't be here */
- if (tc->snd_una == tc->snd_una_max)
+ if ((tc->rto_boff == 0 && tc->snd_una == tc->snd_una_max)
+ || (tc->rto_boff > 0 && seq_geq (tc->snd_una, tc->snd_congestion)))
{
tcp_recovery_off (tc);
return;
/* First retransmit timeout */
if (tc->rto_boff == 1)
- tcp_rtx_timeout_cc (tc);
+ tcp_rxt_timeout_cc (tc);
- tc->snd_nxt = tc->snd_una;
+ tc->snd_una_max = tc->snd_nxt = tc->snd_una;
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
- TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
-
/* Send one segment. Note that n_bytes may be zero due to buffer shortfall */
n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b);
|| tc->snd_wnd > tc->snd_mss || tcp_in_recovery (tc))
return;
- available_bytes = stream_session_tx_fifo_max_dequeue (&tc->connection);
+ available_bytes = session_tx_fifo_max_dequeue (&tc->connection);
offset = tc->snd_una_max - tc->snd_una;
/* Reprogram persist if no new bytes available to send. We may have data
tcp_fast_retransmit_sack (tcp_connection_t * tc)
{
vlib_main_t *vm = vlib_get_main ();
- u32 n_written = 0, offset, max_bytes;
+ u32 n_written = 0, offset, max_bytes, n_segs = 0;
vlib_buffer_t *b = 0;
sack_scoreboard_hole_t *hole;
sack_scoreboard_t *sb;
u8 snd_limited = 0, can_rescue = 0;
ASSERT (tcp_in_fastrecovery (tc));
- TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
old_snd_nxt = tc->snd_nxt;
sb = &tc->sack_sb;
- snd_space = tcp_available_snd_space (tc);
+ snd_space = tcp_available_cc_snd_space (tc);
+ if (snd_space < tc->snd_mss)
+ goto done;
+
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
- while (hole && snd_space > 0)
+ while (hole && snd_space > 0 && n_segs++ < VLIB_FRAME_SIZE)
{
hole = scoreboard_next_rxt_hole (sb, hole,
tcp_fastrecovery_sent_1_smss (tc),
/* Start resending from first un-acked segment */
old_snd_nxt = tc->snd_nxt;
tc->snd_nxt = tc->snd_una;
- snd_space = tcp_available_snd_space (tc);
+ snd_space = tcp_available_cc_snd_space (tc);
while (snd_space > 0)
{
void
tcp_fast_retransmit (tcp_connection_t * tc)
{
- if (tcp_opts_sack_permitted (&tc->rcv_opts)
- && scoreboard_first_hole (&tc->sack_sb))
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
tcp_fast_retransmit_sack (tc);
else
tcp_fast_retransmit_no_sack (tc);
return svm_fifo_has_ooo_data (s->server_rx_fifo);
}
+static void
+tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0,
+ u32 * next0, u32 * error0)
+{
+ ip_adjacency_t *adj;
+ adj_index_t ai;
+
+ /* Not thread safe but as long as the connection exists the adj should
+ * not be removed */
+ ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
+ tc0->sw_if_index);
+ if (ai == ADJ_INDEX_INVALID)
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+ *next0 = TCP_OUTPUT_NEXT_DROP;
+ *error0 = TCP_ERROR_LINK_LOCAL_RW;
+ return;
+ }
+
+ adj = adj_get (ai);
+ if (PREDICT_TRUE (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE))
+ *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
+ else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
+ *next0 = TCP_OUTPUT_NEXT_IP_ARP;
+ else
+ {
+ *next0 = TCP_OUTPUT_NEXT_DROP;
+ *error0 = TCP_ERROR_LINK_LOCAL_RW;
+ }
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
+}
+
always_inline uword
tcp46_output_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
th0 = vlib_buffer_get_current (b0);
TCP_EVT_DBG (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
if (is_ip4)
{
vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
th0->checksum = 0;
+
+ if (PREDICT_FALSE
+ (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
+ tcp_output_handle_link_local (tc0, b0, &next0, &error0);
}
/* Filter out DUPACKs if there are no OOO segments left */
vnet_buffer (b0)->ip.adj_index[VLIB_TX] = tc0->c_rmt_dpo.dpoi_index;
#endif
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
-
- b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
done:
b0->error = node->errors[error0];
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))