{
TCP_OUTPUT_NEXT_DROP,
TCP_OUTPUT_NEXT_IP_LOOKUP,
+ TCP_OUTPUT_NEXT_IP_REWRITE,
+ TCP_OUTPUT_NEXT_IP_ARP,
TCP_OUTPUT_N_NEXT
} tcp_output_next_t;
#define foreach_tcp4_output_next \
_ (DROP, "error-drop") \
- _ (IP_LOOKUP, "ip4-lookup")
+ _ (IP_LOOKUP, "ip4-lookup") \
+ _ (IP_REWRITE, "ip4-rewrite") \
+ _ (IP_ARP, "ip4-arp")
#define foreach_tcp6_output_next \
_ (DROP, "error-drop") \
- _ (IP_LOOKUP, "ip6-lookup")
+ _ (IP_LOOKUP, "ip6-lookup") \
+ _ (IP_REWRITE, "ip6-rewrite") \
+ _ (IP_ARP, "ip6-discover-neighbor")
static char *tcp_error_strings[] = {
#define tcp_error(n,s) s,
/*
* Figure out how much space we have available
*/
- available_space = stream_session_max_rx_enqueue (&tc->connection);
- max_fifo = stream_session_rx_fifo_size (&tc->connection);
+ available_space = transport_max_rx_enqueue (&tc->connection);
+ max_fifo = transport_rx_fifo_size (&tc->connection);
ASSERT (tc->rcv_opts.mss < max_fifo);
if (available_space < tc->rcv_opts.mss && available_space < max_fifo >> 3)
}
always_inline int
-tcp_alloc_tx_buffers (tcp_main_t * tm, u8 thread_index, u32 n_free_buffers)
+tcp_alloc_tx_buffers (tcp_main_t * tm, u8 thread_index, u16 * n_bufs,
+ u32 wanted)
{
vlib_main_t *vm = vlib_get_main ();
- u32 current_length = vec_len (tm->tx_buffers[thread_index]);
- u32 n_allocated;
-
- vec_validate (tm->tx_buffers[thread_index],
- current_length + n_free_buffers - 1);
- n_allocated =
- vlib_buffer_alloc (vm, &tm->tx_buffers[thread_index][current_length],
- n_free_buffers);
- _vec_len (tm->tx_buffers[thread_index]) = current_length + n_allocated;
- /* buffer shortage, report failure */
- if (vec_len (tm->tx_buffers[thread_index]) == 0)
- {
- clib_warning ("out of buffers");
- return -1;
- }
- return 0;
+ u32 n_alloc;
+
+ ASSERT (wanted > *n_bufs);
+ vec_validate_aligned (tm->tx_buffers[thread_index], wanted - 1,
+ CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc (vm, &tm->tx_buffers[thread_index][*n_bufs],
+ wanted - *n_bufs);
+ *n_bufs += n_alloc;
+ _vec_len (tm->tx_buffers[thread_index]) = *n_bufs;
+ return n_alloc;
}
always_inline int
tcp_get_free_buffer_index (tcp_main_t * tm, u32 * bidx)
{
- u32 *my_tx_buffers;
u32 thread_index = vlib_get_thread_index ();
+ u16 n_bufs = vec_len (tm->tx_buffers[thread_index]);
TCP_DBG_BUFFER_ALLOC_MAYBE_FAIL (thread_index);
- if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0))
+ if (PREDICT_FALSE (!n_bufs))
{
- if (tcp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE))
- return -1;
+ if (!tcp_alloc_tx_buffers (tm, thread_index, &n_bufs, VLIB_FRAME_SIZE))
+ {
+ *bidx = ~0;
+ return -1;
+ }
}
- my_tx_buffers = tm->tx_buffers[thread_index];
- *bidx = my_tx_buffers[vec_len (my_tx_buffers) - 1];
- _vec_len (my_tx_buffers) -= 1;
+ *bidx = tm->tx_buffers[thread_index][--n_bufs];
+ _vec_len (tm->tx_buffers[thread_index]) = n_bufs;
return 0;
}
-always_inline void
-tcp_return_buffer (tcp_main_t * tm)
-{
- _vec_len (tm->tx_buffers[vlib_get_thread_index ()]) += 1;
-}
-
always_inline void *
tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
{
b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST;
b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
b->total_length_not_including_first_buffer = 0;
+ b->current_data = 0;
vnet_buffer (b)->tcp.flags = 0;
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
/* Leave enough space for headers */
always_inline void
tcp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
- u8 is_ip4, u8 flush)
+ u8 is_ip4, u32 fib_index, u8 flush)
{
tcp_main_t *tm = vnet_get_tcp_main ();
u32 thread_index = vlib_get_thread_index ();
b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
b->error = 0;
- /* Default FIB for now */
- vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = fib_index;
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
/* Send to IP lookup */
next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
always_inline void
tcp_enqueue_to_ip_lookup_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
- u8 is_ip4)
+ u8 is_ip4, u32 fib_index)
{
- tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 1);
+ tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, fib_index, 1);
}
always_inline void
tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
- u8 is_ip4)
+ u8 is_ip4, u32 fib_index)
{
- tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 0);
+ tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, fib_index, 0);
+ if (vm->thread_index == 0 && vlib_num_workers ())
+ session_flush_frames_main_thread (vm);
}
always_inline void
tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, u8 is_ip4)
{
vlib_buffer_t *b;
- u32 bi;
+ u32 bi, sw_if_index, fib_index;
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_main_t *vm = vlib_get_main ();
u8 tcp_hdr_len, flags = 0;
u32 seq, ack;
ip4_header_t *ih4, *pkt_ih4;
ip6_header_t *ih6, *pkt_ih6;
+ fib_protocol_t fib_proto;
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
b = vlib_get_buffer (vm, bi);
+ sw_if_index = vnet_buffer (pkt)->sw_if_index[VLIB_RX];
+ fib_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+ fib_index = fib_table_get_index_for_sw_if_index (fib_proto, sw_if_index);
tcp_init_buffer (vm, b);
/* Make and write options */
ASSERT (!bogus);
}
- tcp_enqueue_to_ip_lookup_now (vm, b, bi, is_ip4);
+ tcp_enqueue_to_ip_lookup_now (vm, b, bi, is_ip4, fib_index);
TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
ASSERT (!bogus);
}
- tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4);
+ tcp_enqueue_to_ip_lookup_now (vm, b, bi, tc->c_is_ip4, tc->c_fib_index);
TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
tc->rto_boff = 0;
tcp_push_ip_hdr (tm, tc, b);
- tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
+ tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4, tc->c_fib_index);
TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc);
}
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
b = vlib_get_buffer (vm, bi);
+ tcp_init_buffer (vm, b);
fin_snt = tc->flags & TCP_CONN_FINSNT;
if (fin_snt)
tc->snd_nxt = tc->snd_una;
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
b = vlib_get_buffer (vm, bi);
+ tcp_init_buffer (vm, b);
/* Fill in the ACK */
tcp_make_ack (tc, b);
* Allocate and fill in buffer(s)
*/
- if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
- return 0;
- *b = vlib_get_buffer (vm, bi);
- data = tcp_init_buffer (vm, *b);
-
/* Easy case, buffer size greater than mss */
if (PREDICT_TRUE (seg_size <= tm->bytes_per_buffer))
{
+ if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
+ return 0;
+ *b = vlib_get_buffer (vm, bi);
+ data = tcp_init_buffer (vm, *b);
n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
max_deq_bytes);
ASSERT (n_bytes == max_deq_bytes);
vlib_buffer_t *chain_b, *prev_b;
int i;
- n_bufs_per_seg = ceil ((double) seg_size / tm->bytes_per_buffer);
-
/* Make sure we have enough buffers */
+ n_bufs_per_seg = ceil ((double) seg_size / tm->bytes_per_buffer);
available_bufs = vec_len (tm->tx_buffers[thread_index]);
if (n_bufs_per_seg > available_bufs)
{
- if (tcp_alloc_tx_buffers (tm, thread_index,
- VLIB_FRAME_SIZE - available_bufs))
+ tcp_alloc_tx_buffers (tm, thread_index, &available_bufs,
+ VLIB_FRAME_SIZE);
+
+ if (n_bufs_per_seg > available_bufs)
{
- tcp_return_buffer (tm);
*b = 0;
return 0;
}
}
+ tcp_get_free_buffer_index (tm, &bi);
+ ASSERT (bi != (u32) ~ 0);
+ *b = vlib_get_buffer (vm, bi);
+ data = tcp_init_buffer (vm, *b);
n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
tm->bytes_per_buffer -
MAX_HDRS_LEN);
ASSERT (n_peeked == len_to_deq);
n_bytes += n_peeked;
chain_b->current_length = n_peeked;
- chain_b->flags &= VLIB_BUFFER_NON_DEFAULT_FREELIST;
chain_b->next_buffer = 0;
/* update previous buffer */
tcp_cc_fastrecovery_exit (tc);
/* Start again from the beginning */
- tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
+ tc->cc_algo->congestion (tc);
tc->cwnd = tcp_loss_wnd (tc);
tc->snd_congestion = tc->snd_una_max;
tc->rtt_ts = 0;
+ tc->cwnd_acc_bytes = 0;
+
tcp_recovery_on (tc);
}
}
/* Shouldn't be here */
- if (tc->snd_una == tc->snd_una_max)
+ if (seq_geq (tc->snd_una, tc->snd_congestion))
{
tcp_recovery_off (tc);
return;
if (tc->rto_boff == 1)
tcp_rtx_timeout_cc (tc);
- tc->snd_nxt = tc->snd_una;
+ tc->snd_una_max = tc->snd_nxt = tc->snd_una;
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
/* This goes straight to ipx_lookup. Retransmit timer set already */
tcp_push_ip_hdr (tm, tc, b);
- tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
+ tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4, tc->c_fib_index);
}
/* Retransmit SYN-ACK */
else if (tc->state == TCP_STATE_SYN_RCVD)
}
b = vlib_get_buffer (vm, bi);
+ tcp_init_buffer (vm, b);
tcp_make_synack (tc, b);
TCP_EVT_DBG (TCP_EVT_SYN_RXT, tc, 1);
tc->snd_nxt = tc->snd_una + offset;
n_written = tcp_prepare_retransmit_segment (tc, offset, max_bytes,
&b);
- ASSERT (n_written);
+ if (!n_written)
+ goto done;
+
bi = vlib_get_buffer_index (vm, b);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
break;
snd_space -= n_written;
}
+done:
/* If window allows, send 1 SMSS of new data */
tc->snd_nxt = old_snd_nxt;
}
return svm_fifo_has_ooo_data (s->server_rx_fifo);
}
+static void
+tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0,
+ u32 * next0, u32 * error0)
+{
+ ip_adjacency_t *adj;
+ adj_index_t ai;
+
+ /* Not thread safe but as long as the connection exists the adj should
+ * not be removed */
+ ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
+ tc0->sw_if_index);
+ if (ai == ADJ_INDEX_INVALID)
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+ *next0 = TCP_OUTPUT_NEXT_DROP;
+ *error0 = TCP_ERROR_LINK_LOCAL_RW;
+ return;
+ }
+
+ adj = adj_get (ai);
+ if (PREDICT_TRUE (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE))
+ *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
+ else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
+ *next0 = TCP_OUTPUT_NEXT_IP_ARP;
+ else
+ {
+ *next0 = TCP_OUTPUT_NEXT_DROP;
+ *error0 = TCP_ERROR_LINK_LOCAL_RW;
+ }
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
+}
+
always_inline uword
tcp46_output_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
th0 = vlib_buffer_get_current (b0);
TCP_EVT_DBG (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
if (is_ip4)
{
vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
th0->checksum = 0;
+
+ if (PREDICT_FALSE
+ (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
+ tcp_output_handle_link_local (tc0, b0, &next0, &error0);
}
/* Filter out DUPACKs if there are no OOO segments left */
if (PREDICT_FALSE
(vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
{
+ /* N.B. Should not filter burst of dupacks. Two issues:
+ * 1) dupacks open cwnd on remote peer when congested
+ * 2) acks leaving should have the latest rcv_wnd since the
+ * burst may have eaten up all of it, so only the old ones
+ * could be filtered.
+ */
if (!tcp_session_has_ooo_data (tc0))
{
error0 = TCP_ERROR_FILTERED_DUPACKS;
vnet_buffer (b0)->ip.adj_index[VLIB_TX] = tc0->c_rmt_dpo.dpoi_index;
#endif
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
-
- b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
done:
b0->error = node->errors[error0];
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))