always_inline int
tcp_segment_check_paws (tcp_connection_t * tc)
{
- /* XXX normally test for timestamp should be lt instead of leq, but for
- * local testing this is not enough */
return tcp_opts_tstamp (&tc->opt) && tc->tsval_recent
&& timestamp_lt (tc->opt.tsval, tc->tsval_recent);
}
if (tc0->rcv_wnd == 0
&& tc0->rcv_nxt == vnet_buffer (b0)->tcp.seq_number)
{
- /* Make it look as if there's nothing to dequeue */
- vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number;
+ /* TODO Should segment be tagged? */
}
else
{
if (tc->rtt_seq && seq_gt (ack, tc->rtt_seq) && !tc->rto_boff)
{
mrtt = tcp_time_now () - tc->rtt_ts;
- tc->rtt_seq = 0;
}
/* As per RFC7323 TSecr can be used for RTTM only if the segment advances
tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX);
+ /* Allow measuring of RTT and make sure boff is 0 */
+ tc->rtt_seq = 0;
+ tc->rto_boff = 0;
+
return 1;
}
stream_session_dequeue_drop (&tc->connection, tc->bytes_acked);
/* Update rtt and rto */
- if (tcp_update_rtt (tc, ack))
- {
- /* Good ACK received and valid RTT, make sure retransmit backoff is 0 */
- tc->rto_boff = 0;
- }
+ tcp_update_rtt (tc, ack);
}
/**
tc->snd_wl1 = seq;
tc->snd_wl2 = ack;
TCP_EVT_DBG (TCP_EVT_SND_WND, tc);
+
+ /* Set probe timer if we just got 0 wnd */
+ if (tc->snd_wnd < tc->snd_mss
+ && !tcp_timer_is_active (tc, TCP_TIMER_PERSIST))
+ tcp_persist_timer_set (tc);
+ else
+ tcp_persist_timer_reset (tc);
}
}
void
tcp_cc_recover (tcp_connection_t * tc)
{
+ /* TODO: check if time to recover was small. It might be that RTO popped
+ * too soon.
+ */
+
tc->cc_algo->recovered (tc);
tc->rtx_bytes = 0;
tc->cc_algo->rcv_ack (tc);
tc->tsecr_last_ack = tc->opt.tsecr;
- tcp_fastrecovery_1_smss_off (tc);
- tcp_fastrecovery_off (tc);
+ tcp_cong_recovery_off (tc);
TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
}
{
u8 partial_ack;
- if (tcp_in_recovery (tc))
+ if (tcp_in_cong_recovery (tc))
{
partial_ack = seq_lt (tc->snd_una, tc->snd_congestion);
if (!partial_ack)
/* In case snd_nxt is still in the past and output tries to
* shove some new bytes */
- tc->snd_nxt = tc->snd_una;
+ tc->snd_nxt = tc->snd_una_max;
/* XXX need proper RFC6675 support */
- if (tc->sack_sb.last_sacked_bytes)
+ if (tc->sack_sb.last_sacked_bytes && !tcp_in_recovery (tc))
{
tcp_fast_retransmit (tc);
}
{
/* Retransmit first unacked segment */
tcp_retransmit_first_unacked (tc);
- /* If window allows, send 1 SMSS of new data */
- if (seq_lt (tc->snd_nxt, tc->snd_congestion))
- tc->snd_nxt = tc->snd_congestion;
}
}
}
return -1;
}
- tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
- *error = TCP_ERROR_ACK_FUTURE;
TCP_EVT_DBG (TCP_EVT_ACK_RCV_ERR, tc, 2,
vnet_buffer (b)->tcp.ack_number);
+
+ tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
+ *error = TCP_ERROR_ACK_FUTURE;
}
/* If old ACK, probably it's an old dupack */
* timer. */
if (tc->bytes_acked)
{
- TCP_EVT_DBG (TCP_EVT_ACK_RCVD, tc, vnet_buffer (b)->tcp.ack_number);
+ TCP_EVT_DBG (TCP_EVT_ACK_RCVD, tc);
/* Updates congestion control (slow start/congestion avoidance) */
tcp_cc_rcv_ack (tc, b);
* @param start Start sequence number of the newest SACK block
* @param end End sequence of the newest SACK block
*/
-static void
+void
tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end)
{
- sack_block_t *new_list = 0, block;
+ sack_block_t *new_list = 0, *block = 0;
int i;
/* If the first segment is ooo add it to the list. Last write might've moved
* rcv_nxt over the first segment. */
if (seq_lt (tc->rcv_nxt, start))
{
- block.start = start;
- block.end = end;
- vec_add1 (new_list, block);
+ vec_add2 (new_list, block, 1);
+ block->start = start;
+ block->end = end;
}
/* Find the blocks still worth keeping. */
for (i = 0; i < vec_len (tc->snd_sacks); i++)
{
- /* Discard if:
- * 1) rcv_nxt advanced beyond current block OR
- * 2) Segment overlapped by the first segment, i.e., it has been merged
- * into it.*/
- if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt)
- || seq_leq (tc->snd_sacks[i].start, end))
+ /* Discard if rcv_nxt advanced beyond current block */
+ if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt))
continue;
- /* Save to new SACK list. */
- vec_add1 (new_list, tc->snd_sacks[i]);
+ /* Merge or drop if segment overlapped by the new segment */
+ if (block && (seq_geq (tc->snd_sacks[i].end, new_list[0].start)
+ && seq_leq (tc->snd_sacks[i].start, new_list[0].end)))
+ {
+ if (seq_lt (tc->snd_sacks[i].start, new_list[0].start))
+ new_list[0].start = tc->snd_sacks[i].start;
+ if (seq_lt (new_list[0].end, tc->snd_sacks[i].end))
+ new_list[0].end = tc->snd_sacks[i].end;
+ continue;
+ }
+
+ /* Save to new SACK list if we have space. */
+ if (vec_len (new_list) < TCP_MAX_SACK_BLOCKS)
+ {
+ vec_add1 (new_list, tc->snd_sacks[i]);
+ }
+ else
+ {
+ clib_warning ("dropped sack blocks");
+ }
}
- ASSERT (vec_len (new_list) < TCP_MAX_SACK_BLOCKS);
+ ASSERT (vec_len (new_list) <= TCP_MAX_SACK_BLOCKS);
/* Replace old vector with new one */
vec_free (tc->snd_sacks);
tc->rcv_nxt += written;
/* Depending on how fast the app is, all remaining buffers in burst will
- * not be enqueued. Should we inform peer of the damage? XXX */
+ * not be enqueued. Inform peer */
+ tc->flags |= TCP_CONN_SNDACK;
+
return TCP_ERROR_PARTIALLY_ENQUEUED;
}
else
{
+ tc->flags |= TCP_CONN_SNDACK;
return TCP_ERROR_FIFO_FULL;
}
u16 data_len)
{
stream_session_t *s0;
- u32 offset, seq;
+ u32 offset;
int rv;
/* Pure ACK. Do nothing */
}
s0 = stream_session_get (tc->c_s_index, tc->c_thread_index);
- seq = vnet_buffer (b)->tcp.seq_number;
- offset = seq - tc->rcv_nxt;
+ offset = vnet_buffer (b)->tcp.seq_number - tc->irs;
- rv = svm_fifo_enqueue_with_offset (s0->server_rx_fifo, s0->pid, offset,
- data_len, vlib_buffer_get_current (b));
+ clib_warning ("ooo: offset %d len %d", offset, data_len);
+
+ rv = svm_fifo_enqueue_with_offset (s0->server_rx_fifo, offset, data_len,
+ vlib_buffer_get_current (b));
/* Nothing written */
if (rv)
/* Get the newest segment from the fifo */
newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo);
- start = tc->rcv_nxt + ooo_segment_offset (s0->server_rx_fifo, newest);
- end = tc->rcv_nxt + ooo_segment_end_offset (s0->server_rx_fifo, newest);
+ start = ooo_segment_offset (s0->server_rx_fifo, newest);
+ end = ooo_segment_end_offset (s0->server_rx_fifo, newest);
tcp_update_sack_list (tc, start, end);
}
{
/* Old sequence numbers allowed through because they overlapped
* the rx window */
+
if (seq_lt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt))
{
error = TCP_ERROR_SEGMENT_OLD;
goto done;
}
- if (PREDICT_FALSE (error == TCP_ERROR_FIFO_FULL))
- *next0 = TCP_NEXT_DROP;
-
/* Check if ACK can be delayed */
- if (!tcp_can_delack (tc))
- {
- /* Nothing to do for pure ACKs XXX */
- if (n_data_bytes == 0)
- goto done;
-
- *next0 = tcp_next_output (tc->c_is_ip4);
- tcp_make_ack (tc, b);
- }
- else
+ if (tcp_can_delack (tc))
{
if (!tcp_timer_is_active (tc, TCP_TIMER_DELACK))
tcp_timer_set (tc, TCP_TIMER_DELACK, TCP_DELACK_TIME);
+ goto done;
}
+ *next0 = tcp_next_output (tc->c_is_ip4);
+ tcp_make_ack (tc, b);
+
done:
return error;
}
+typedef struct
+{
+ tcp_header_t tcp_header;
+ tcp_connection_t tcp_connection;
+} tcp_rx_trace_t;
+
+u8 *
+format_tcp_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%U\n%U%U",
+ format_tcp_header, &t->tcp_header, 128,
+ format_white_space, indent,
+ format_tcp_connection_verbose, &t->tcp_connection);
+
+ return s;
+}
+
+u8 *
+format_tcp_rx_trace_short (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
+
+ s = format (s, "%d -> %d (%U)",
+ clib_net_to_host_u16 (t->tcp_header.src_port),
+ clib_net_to_host_u16 (t->tcp_header.dst_port), format_tcp_state,
+ &t->tcp_connection.state);
+
+ return s;
+}
+
+void
+tcp_set_rx_trace_data (tcp_rx_trace_t * t0, tcp_connection_t * tc0,
+ tcp_header_t * th0, vlib_buffer_t * b0, u8 is_ip4)
+{
+ if (tc0)
+ {
+ clib_memcpy (&t0->tcp_connection, tc0, sizeof (t0->tcp_connection));
+ }
+ else
+ {
+ th0 = tcp_buffer_hdr (b0);
+ }
+ clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header));
+}
+
always_inline void
tcp_established_inc_counter (vlib_main_t * vm, u8 is_ip4, u8 evt, u8 val)
{
vlib_frame_t * from_frame, int is_ip4)
{
u32 n_left_from, next_index, *from, *to_next;
- u32 my_thread_index = vm->cpu_index, errors = 0;
+ u32 my_thread_index = vm->thread_index, errors = 0;
tcp_main_t *tm = vnet_get_tcp_main ();
+ u8 is_fin = 0;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
vlib_buffer_t *b0;
tcp_header_t *th0 = 0;
tcp_connection_t *tc0;
- ip4_header_t *ip40;
- ip6_header_t *ip60;
- u32 n_advance_bytes0, n_data_bytes0;
u32 next0 = TCP_ESTABLISHED_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
bi0 = from[0];
goto done;
}
- /* Checksum computed by ipx_local no need to compute again */
+ th0 = tcp_buffer_hdr (b0);
- if (is_ip4)
- {
- ip40 = vlib_buffer_get_current (b0);
- th0 = ip4_next_header (ip40);
- n_advance_bytes0 = (ip4_header_bytes (ip40)
- + tcp_header_bytes (th0));
- n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
- - n_advance_bytes0;
- }
- else
- {
- ip60 = vlib_buffer_get_current (b0);
- th0 = ip6_next_header (ip60);
- n_advance_bytes0 = tcp_header_bytes (th0);
- n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length)
- - n_advance_bytes0;
- n_advance_bytes0 += sizeof (ip60[0]);
- }
+ is_fin = (th0->flags & TCP_FLAG_FIN) != 0;
/* SYNs, FINs and data consume sequence numbers */
vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
- + tcp_is_syn (th0) + tcp_is_fin (th0) + n_data_bytes0;
+ + tcp_is_syn (th0) + is_fin + vnet_buffer (b0)->tcp.data_len;
/* TODO header prediction fast path */
/* 7: process the segment text */
- vlib_buffer_advance (b0, n_advance_bytes0);
- error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0);
+ vlib_buffer_advance (b0, vnet_buffer (b0)->tcp.data_offset);
+ error0 = tcp_segment_rcv (tm, tc0, b0,
+ vnet_buffer (b0)->tcp.data_len, &next0);
+
+ /* N.B. buffer is rewritten if segment is ooo. Thus, th0 becomes a
+ * dangling reference. */
/* 8: check the FIN bit */
- if (tcp_fin (th0))
+ if (is_fin)
{
/* Enter CLOSE-WAIT and notify session. Don't send ACK, instead
* wait for session to call close. To avoid lingering
b0->error = node->errors[error0];
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ tcp_rx_trace_t *t0 =
+ vlib_add_trace (vm, node, b0, sizeof (*t0));
+ tcp_set_rx_trace_data (t0, tc0, th0, b0, is_ip4);
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
{
tcp_main_t *tm = vnet_get_tcp_main ();
u32 n_left_from, next_index, *from, *to_next;
- u32 my_thread_index = vm->cpu_index, errors = 0;
+ u32 my_thread_index = vm->thread_index, errors = 0;
u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
from = vlib_frame_vector_args (from_frame);
{
u32 bi0, ack0, seq0;
vlib_buffer_t *b0;
+ tcp_rx_trace_t *t0;
tcp_header_t *tcp0 = 0;
tcp_connection_t *tc0;
- ip4_header_t *ip40;
- ip6_header_t *ip60;
- u32 n_advance_bytes0, n_data_bytes0;
tcp_connection_t *new_tc0;
u32 next0 = TCP_SYN_SENT_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
ack0 = vnet_buffer (b0)->tcp.ack_number;
seq0 = vnet_buffer (b0)->tcp.seq_number;
-
- /* Checksum computed by ipx_local no need to compute again */
-
- if (is_ip4)
- {
- ip40 = vlib_buffer_get_current (b0);
- tcp0 = ip4_next_header (ip40);
- n_advance_bytes0 = (ip4_header_bytes (ip40)
- + tcp_header_bytes (tcp0));
- n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
- - n_advance_bytes0;
- }
- else
- {
- ip60 = vlib_buffer_get_current (b0);
- tcp0 = ip6_next_header (ip60);
- n_advance_bytes0 = tcp_header_bytes (tcp0);
- n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length)
- - n_advance_bytes0;
- n_advance_bytes0 += sizeof (ip60[0]);
- }
+ tcp0 = tcp_buffer_hdr (b0);
if (PREDICT_FALSE
(!tcp_ack (tcp0) && !tcp_rst (tcp0) && !tcp_syn (tcp0)))
/* SYNs, FINs and data consume sequence numbers */
vnet_buffer (b0)->tcp.seq_end = seq0 + tcp_is_syn (tcp0)
- + tcp_is_fin (tcp0) + n_data_bytes0;
+ + tcp_is_fin (tcp0) + vnet_buffer (b0)->tcp.data_len;
/*
* 1. check the ACK bit
}
/* Read data, if any */
- if (n_data_bytes0)
+ if (vnet_buffer (b0)->tcp.data_len)
{
- error0 =
- tcp_segment_rcv (tm, new_tc0, b0, n_data_bytes0, &next0);
+ vlib_buffer_advance (b0, vnet_buffer (b0)->tcp.data_offset);
+ error0 = tcp_segment_rcv (tm, new_tc0, b0,
+ vnet_buffer (b0)->tcp.data_len,
+ &next0);
if (error0 == TCP_ERROR_PURE_ACK)
error0 = TCP_ERROR_SYN_ACKS_RCVD;
}
b0->error = error0 ? node->errors[error0] : 0;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, tcp0, sizeof (t0->tcp_header));
+ clib_memcpy (&t0->tcp_connection, tc0,
+ sizeof (t0->tcp_connection));
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
foreach_tcp_state_next
#undef _
- }
-,};
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
/* *INDENT-ON* */
VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv);
{
tcp_main_t *tm = vnet_get_tcp_main ();
u32 n_left_from, next_index, *from, *to_next;
- u32 my_thread_index = vm->cpu_index, errors = 0;
+ u32 my_thread_index = vm->thread_index, errors = 0;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
vlib_buffer_t *b0;
tcp_header_t *tcp0 = 0;
tcp_connection_t *tc0;
- ip4_header_t *ip40;
- ip6_header_t *ip60;
- u32 n_advance_bytes0, n_data_bytes0;
u32 next0 = TCP_RCV_PROCESS_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
bi0 = from[0];
goto drop;
}
- /* Checksum computed by ipx_local no need to compute again */
-
- if (is_ip4)
- {
- ip40 = vlib_buffer_get_current (b0);
- tcp0 = ip4_next_header (ip40);
- n_advance_bytes0 = (ip4_header_bytes (ip40)
- + tcp_header_bytes (tcp0));
- n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
- - n_advance_bytes0;
- }
- else
- {
- ip60 = vlib_buffer_get_current (b0);
- tcp0 = ip6_next_header (ip60);
- n_advance_bytes0 = tcp_header_bytes (tcp0);
- n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length)
- - n_advance_bytes0;
- n_advance_bytes0 += sizeof (ip60[0]);
- }
+ tcp0 = tcp_buffer_hdr (b0);
/* SYNs, FINs and data consume sequence numbers */
vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
- + tcp_is_syn (tcp0) + tcp_is_fin (tcp0) + n_data_bytes0;
+ + tcp_is_syn (tcp0) + tcp_is_fin (tcp0)
+ + vnet_buffer (b0)->tcp.data_len;
/*
* Special treatment for CLOSED
case TCP_STATE_ESTABLISHED:
case TCP_STATE_FIN_WAIT_1:
case TCP_STATE_FIN_WAIT_2:
- error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0);
+ vlib_buffer_advance (b0, vnet_buffer (b0)->tcp.data_offset);
+ error0 = tcp_segment_rcv (tm, tc0, b0,
+ vnet_buffer (b0)->tcp.data_len,
+ &next0);
break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_CLOSING:
}
TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
+ drop:
b0->error = error0 ? node->errors[error0] : 0;
- drop:
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ tcp_rx_trace_t *t0 =
+ vlib_add_trace (vm, node, b0, sizeof (*t0));
+ tcp_set_rx_trace_data (t0, tc0, tcp0, b0, is_ip4);
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
vlib_frame_t * from_frame, int is_ip4)
{
u32 n_left_from, next_index, *from, *to_next;
- u32 my_thread_index = vm->cpu_index;
+ u32 my_thread_index = vm->thread_index;
tcp_main_t *tm = vnet_get_tcp_main ();
u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
{
u32 bi0;
vlib_buffer_t *b0;
+ tcp_rx_trace_t *t0;
tcp_header_t *th0 = 0;
tcp_connection_t *lc0;
ip4_header_t *ip40;
child0->irs = vnet_buffer (b0)->tcp.seq_number;
child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1;
+ child0->rcv_las = child0->rcv_nxt;
child0->state = TCP_STATE_SYN_RCVD;
/* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK}
drop:
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header));
+ clib_memcpy (&t0->tcp_connection, lc0,
+ sizeof (t0->tcp_connection));
}
b0->error = node->errors[error0];
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
_ (ESTABLISHED, "tcp6-established") \
_ (RESET, "tcp6-reset")
-typedef struct
-{
- u16 src_port;
- u16 dst_port;
- u8 state;
-} tcp_rx_trace_t;
-
-u8 *
-format_tcp_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
-
- s = format (s, "TCP: src-port %d dst-port %U%s\n",
- clib_net_to_host_u16 (t->src_port),
- clib_net_to_host_u16 (t->dst_port), format_tcp_state, t->state);
-
- return s;
-}
-
#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
always_inline uword
vlib_frame_t * from_frame, int is_ip4)
{
u32 n_left_from, next_index, *from, *to_next;
- u32 my_thread_index = vm->cpu_index;
+ u32 my_thread_index = vm->thread_index;
tcp_main_t *tm = vnet_get_tcp_main ();
from = vlib_frame_vector_args (from_frame);
while (n_left_from > 0 && n_left_to_next > 0)
{
+ int n_advance_bytes0, n_data_bytes0;
u32 bi0;
vlib_buffer_t *b0;
tcp_header_t *tcp0 = 0;
b0 = vlib_get_buffer (vm, bi0);
vnet_buffer (b0)->tcp.flags = 0;
+ /* Checksum computed by ipx_local no need to compute again */
+
if (is_ip4)
{
ip40 = vlib_buffer_get_current (b0);
tcp0 = ip4_next_header (ip40);
+ n_advance_bytes0 = (ip4_header_bytes (ip40)
+ + tcp_header_bytes (tcp0));
+ n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
+ - n_advance_bytes0;
/* lookup session */
tc0 =
{
ip60 = vlib_buffer_get_current (b0);
tcp0 = ip6_next_header (ip60);
+ n_advance_bytes0 = tcp_header_bytes (tcp0);
+ n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length)
+ - n_advance_bytes0;
+ n_advance_bytes0 += sizeof (ip60[0]);
+
tc0 =
(tcp_connection_t *)
stream_session_lookup_transport6 (&ip60->src_address,
my_thread_index);
}
+ /* Length check */
+ if (PREDICT_FALSE (n_advance_bytes0 < 0))
+ {
+ error0 = TCP_ERROR_LENGTH;
+ goto done;
+ }
+
/* Session exists */
if (PREDICT_TRUE (0 != tc0))
{
vnet_buffer (b0)->tcp.ack_number =
clib_net_to_host_u32 (tcp0->ack_number);
+ vnet_buffer (b0)->tcp.hdr_offset = (u8 *) tcp0
+ - (u8 *) vlib_buffer_get_current (b0);
+ vnet_buffer (b0)->tcp.data_offset = n_advance_bytes0;
+ vnet_buffer (b0)->tcp.data_len = n_data_bytes0;
+
flags0 = tcp0->flags & filter_flags;
next0 = tm->dispatch_table[tc0->state][flags0].next;
error0 = tm->dispatch_table[tc0->state][flags0].error;
if (PREDICT_FALSE (error0 == TCP_ERROR_DISPATCH))
{
+ tcp_state_t state0 = tc0->state;
/* Overload tcp flags to store state */
vnet_buffer (b0)->tcp.flags = tc0->state;
+ clib_warning ("disp error state %U flags %U",
+ format_tcp_state, &state0,
+ format_tcp_flags, (int) flags0);
}
}
else
error0 = TCP_ERROR_NO_LISTENER;
}
+ done:
b0->error = error0 ? node->errors[error0] : 0;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ tcp_rx_trace_t *t0 =
+ vlib_add_trace (vm, node, b0, sizeof (*t0));
+ tcp_set_rx_trace_data (t0, tc0, tcp0, b0, is_ip4);
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
/* *INDENT-ON* */
VLIB_NODE_FUNCTION_MULTIARCH (tcp6_input_node, tcp6_input);
-void
-tcp_update_time (f64 now, u32 thread_index)
-{
- tcp_main_t *tm = vnet_get_tcp_main ();
- tw_timer_expire_timers_16t_2w_512sl (&tm->timer_wheels[thread_index], now);
-}
static void
tcp_dispatch_table_init (tcp_main_t * tm)