vlib_node_registration_t tcp4_output_node;
vlib_node_registration_t tcp6_output_node;
-typedef enum _tcp_output_nect
+typedef enum _tcp_output_next
{
TCP_OUTPUT_NEXT_DROP,
+ TCP_OUTPUT_NEXT_IP_LOOKUP,
TCP_OUTPUT_N_NEXT
} tcp_output_next_t;
#define foreach_tcp4_output_next \
_ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip4-lookup")
#define foreach_tcp6_output_next \
_ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip6-lookup")
static char *tcp_error_strings[] = {
#define tcp_error(n,s) s,
}
/**
- * TCP's IW as recommended by RFC6928
+ * Update max segment size we're able to process.
+ *
+ * The value is constrained by our interface's MTU and IP options. It is
+ * also what we advertise to our peer.
+ */
+void
+tcp_update_rcv_mss (tcp_connection_t * tc)
+{
+ /* TODO find our iface MTU */
+ tc->mss = dummy_mtu;
+}
+
+/**
+ * TCP's initial window
*/
always_inline u32
tcp_initial_wnd_unscaled (tcp_connection_t * tc)
{
- return TCP_IW_N_SEGMENTS * tc->mss;
+ /* RFC 6928 recommends the value lower. However at the time our connections
+ * are initialized, fifos may not be allocated. Therefore, advertise the
+ * smallest possible unscaled window size and update once fifos are
+ * assigned to the session.
+ */
+ /*
+ tcp_update_rcv_mss (tc);
+ TCP_IW_N_SEGMENTS * tc->mss;
+ */
+ return TCP_MIN_RX_FIFO_SIZE;
}
/**
}
}
-/**
- * Update max segment size we're able to process.
- *
- * The value is constrained by our interface's MTU and IP options. It is
- * also what we advertise to our peer.
- */
-void
-tcp_update_rcv_mss (tcp_connection_t * tc)
-{
- /* TODO find our iface MTU */
- tc->mss = dummy_mtu;
-}
-
/**
* Update snd_mss to reflect the effective segment size that we can send
* by taking into account all TCP options, including SACKs
#define tcp_get_free_buffer_index(tm, bidx) \
do { \
u32 *my_tx_buffers, n_free_buffers; \
- u32 thread_index = vlib_get_thread_index(); \
- my_tx_buffers = tm->tx_buffers[thread_index]; \
+ u32 thread_index = vlib_get_thread_index(); \
+ my_tx_buffers = tm->tx_buffers[thread_index]; \
if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0)) \
{ \
n_free_buffers = 32; /* TODO config or macro */ \
vec_validate (my_tx_buffers, n_free_buffers - 1); \
_vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list ( \
- tm->vlib_main, my_tx_buffers, n_free_buffers, \
+ vlib_get_main(), my_tx_buffers, n_free_buffers, \
VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); \
- tm->tx_buffers[thread_index] = my_tx_buffers; \
+ tm->tx_buffers[thread_index] = my_tx_buffers; \
} \
/* buffer shortage */ \
if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0)) \
_vec_len (my_tx_buffers) -= 1; \
} while (0)
-#define tcp_return_buffer(tm) \
-do { \
- u32 *my_tx_buffers; \
- u32 thread_index = vlib_get_thread_index(); \
- my_tx_buffers = tm->tx_buffers[thread_index]; \
- _vec_len (my_tx_buffers) +=1; \
+#define tcp_return_buffer(tm) \
+do { \
+ u32 *my_tx_buffers; \
+ u32 thread_index = vlib_get_thread_index(); \
+ my_tx_buffers = tm->tx_buffers[thread_index]; \
+ _vec_len (my_tx_buffers) +=1; \
} while (0)
always_inline void
/* Init retransmit timer */
tcp_retransmit_timer_set (tc);
+ TCP_EVT_DBG (TCP_EVT_SYNACK_SENT, tc);
}
always_inline void
u32 *to_next, next_index;
vlib_frame_t *f;
- b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
b->error = 0;
/* Default FIB for now */
if (is_ip4)
{
ih4 = vlib_buffer_push_ip4 (vm, b0, &dst_ip40, &src_ip40,
- IP_PROTOCOL_TCP);
+ IP_PROTOCOL_TCP, 1);
th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4);
}
else
* Send reset without reusing existing buffer
*/
void
-tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4)
+tcp_send_reset (tcp_connection_t * tc, vlib_buffer_t * pkt, u8 is_ip4)
{
vlib_buffer_t *b;
u32 bi;
{
flags = TCP_FLAG_RST;
seq = pkt_th->ack_number;
- ack = 0;
+ ack = (tc && tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
}
else
{
{
ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
- &pkt_ih4->src_address, IP_PROTOCOL_TCP);
+ &pkt_ih4->src_address, IP_PROTOCOL_TCP, 1);
th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
}
else
}
tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4);
+ TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
void
tcp_push_ip_hdr (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b)
{
tcp_header_t *th = vlib_buffer_get_current (b);
-
+ vlib_main_t *vm = vlib_get_main ();
if (tc->c_is_ip4)
{
ip4_header_t *ih;
- ih = vlib_buffer_push_ip4 (tm->vlib_main, b, &tc->c_lcl_ip4,
- &tc->c_rmt_ip4, IP_PROTOCOL_TCP);
- th->checksum = ip4_tcp_udp_compute_checksum (tm->vlib_main, b, ih);
+ ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4,
+ &tc->c_rmt_ip4, IP_PROTOCOL_TCP, 1);
+ th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih);
}
else
{
ip6_header_t *ih;
int bogus = ~0;
- ih = vlib_buffer_push_ip6 (tm->vlib_main, b, &tc->c_lcl_ip6,
+ ih = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip6,
&tc->c_rmt_ip6, IP_PROTOCOL_TCP);
- th->checksum = ip6_tcp_udp_icmp_compute_checksum (tm->vlib_main, b, ih,
- &bogus);
+ th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih, &bogus);
ASSERT (!bogus);
}
}
tcp_push_ip_hdr (tm, tc, b);
tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
+ TCP_EVT_DBG (TCP_EVT_SYN_SENT, tc);
}
always_inline void
u32 *to_next, next_index;
vlib_frame_t *f;
- b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
b->error = 0;
/* Decide where to send the packet */
next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
+ /* Initialize the trajectory trace, if configured */
+ if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
+ {
+ b->pre_data[0] = 1;
+ b->pre_data[1] = next_index;
+ }
+
/* Enqueue the packet */
f = vlib_get_frame_to_node (vm, next_index);
to_next = vlib_frame_vector_args (f);
if (is_syn)
{
tc = tcp_half_open_connection_get (index);
+ tc->timers[TCP_TIMER_RETRANSMIT_SYN] = TCP_TIMER_HANDLE_INVALID;
}
else
{
tc = tcp_connection_get (index, thread_index);
+ tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
}
- /* Make sure timer handle is set to invalid */
- tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
-
if (!tcp_in_recovery (tc) && tc->rto_boff > 0
&& tc->state >= TCP_STATE_ESTABLISHED)
{
/* Retransmit for SYN/SYNACK */
else if (tc->state == TCP_STATE_SYN_RCVD || tc->state == TCP_STATE_SYN_SENT)
{
+ /* Half-open connection actually moved to established but we were
+ * waiting for syn retransmit to pop to call cleanup from the right
+ * thread. */
+ if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
+ {
+ ASSERT (tc->state == TCP_STATE_SYN_SENT);
+ if (tcp_half_open_connection_cleanup (tc))
+ {
+ clib_warning ("could not remove half-open connection");
+ ASSERT (0);
+ }
+ return;
+ }
+
/* Try without increasing RTO a number of times. If this fails,
* start growing RTO exponentially */
if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
-
tcp_push_hdr_i (tc, b, tc->state, 1);
/* Account for the SYN */
tc->snd_nxt += 1;
+ tc->rtt_ts = 0;
+ TCP_EVT_DBG (TCP_EVT_SYN_RXT, tc,
+ (tc->state == TCP_STATE_SYN_SENT ? 0 : 1));
}
else
{
{
ASSERT (tc->state == TCP_STATE_SYN_SENT);
- TCP_EVT_DBG (TCP_EVT_SYN_RTX, tc);
-
/* This goes straight to ipx_lookup */
tcp_push_ip_hdr (tm, tc, b);
tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
/* Nothing to send */
if (n_bytes <= 0)
{
- clib_warning ("persist found nothing to send");
+ // clib_warning ("persist found nothing to send");
tcp_return_buffer (tm);
return;
}
tcp_connection_t *tc0;
tcp_tx_trace_t *t0;
tcp_header_t *th0 = 0;
- u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_DROP;
+ u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
bi0 = from[0];
to_next[0] = bi0;
{
ip4_header_t *ih0;
ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4,
- &tc0->c_rmt_ip4, IP_PROTOCOL_TCP);
- th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih0);
+ &tc0->c_rmt_ip4, IP_PROTOCOL_TCP,
+ 1);
+ b0->flags |=
+ VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_OFFLOAD_IP_CKSUM |
+ VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+ th0->checksum = 0;
}
else
{
ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6,
&tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
- th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih0,
- &bogus);
+
+ b0->flags |= VNET_BUFFER_F_IS_IP6 |
+ VNET_BUFFER_F_OFFLOAD_IP_CKSUM |
+ VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+ th0->checksum = 0;
ASSERT (!bogus);
}
tc0->rto_boff = 0;
}
+#if 0
/* Make sure we haven't lost route to our peer */
if (PREDICT_FALSE (tc0->last_fib_check
< tc0->snd_opts.tsval + TCP_FIB_RECHECK_PERIOD))
/* Use pre-computed dpo to set next node */
next0 = tc0->c_rmt_dpo.dpoi_next_node;
vnet_buffer (b0)->ip.adj_index[VLIB_TX] = tc0->c_rmt_dpo.dpoi_index;
+#endif
+
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
- b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
done:
b0->error = node->errors[error0];
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
done:
b0->error = node->errors[error0];
- b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
th0 = vlib_buffer_get_current (b0);