always_inline int
tcp_segment_check_paws (tcp_connection_t * tc)
{
- /* XXX normally test for timestamp should be lt instead of leq, but for
- * local testing this is not enough */
return tcp_opts_tstamp (&tc->opt) && tc->tsval_recent
&& timestamp_lt (tc->opt.tsval, tc->tsval_recent);
}
* @param start Start sequence number of the newest SACK block
* @param end End sequence of the newest SACK block
*/
-static void
+void
tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end)
{
- sack_block_t *new_list = 0, block;
+ sack_block_t *new_list = 0, *block = 0;
int i;
/* If the first segment is ooo add it to the list. Last write might've moved
* rcv_nxt over the first segment. */
if (seq_lt (tc->rcv_nxt, start))
{
- block.start = start;
- block.end = end;
- vec_add1 (new_list, block);
+ vec_add2 (new_list, block, 1);
+ block->start = start;
+ block->end = end;
}
/* Find the blocks still worth keeping. */
for (i = 0; i < vec_len (tc->snd_sacks); i++)
{
- /* Discard if:
- * 1) rcv_nxt advanced beyond current block OR
- * 2) Segment overlapped by the first segment, i.e., it has been merged
- * into it.*/
- if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt)
- || seq_leq (tc->snd_sacks[i].start, end))
+ /* Discard if rcv_nxt advanced beyond current block */
+ if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt))
continue;
- /* Save to new SACK list. */
- vec_add1 (new_list, tc->snd_sacks[i]);
+ /* Merge or drop if segment overlapped by the new segment */
+ if (block && (seq_geq (tc->snd_sacks[i].end, new_list[0].start)
+ && seq_leq (tc->snd_sacks[i].start, new_list[0].end)))
+ {
+ if (seq_lt (tc->snd_sacks[i].start, new_list[0].start))
+ new_list[0].start = tc->snd_sacks[i].start;
+ if (seq_lt (new_list[0].end, tc->snd_sacks[i].end))
+ new_list[0].end = tc->snd_sacks[i].end;
+ continue;
+ }
+
+ /* Save to new SACK list if we have space. */
+ if (vec_len (new_list) < TCP_MAX_SACK_BLOCKS)
+ {
+ vec_add1 (new_list, tc->snd_sacks[i]);
+ }
+ else
+ {
+ clib_warning ("dropped sack blocks");
+ }
}
- ASSERT (vec_len (new_list) < TCP_MAX_SACK_BLOCKS);
+ ASSERT (vec_len (new_list) <= TCP_MAX_SACK_BLOCKS);
/* Replace old vector with new one */
vec_free (tc->snd_sacks);
u16 data_len)
{
stream_session_t *s0;
- u32 offset, seq;
+ u32 offset;
int rv;
/* Pure ACK. Do nothing */
}
s0 = stream_session_get (tc->c_s_index, tc->c_thread_index);
- seq = vnet_buffer (b)->tcp.seq_number;
- offset = seq - tc->rcv_nxt;
+ offset = vnet_buffer (b)->tcp.seq_number - tc->irs;
+
+ clib_warning ("ooo: offset %d len %d", offset, data_len);
- rv = svm_fifo_enqueue_with_offset (s0->server_rx_fifo, s0->pid, offset,
- data_len, vlib_buffer_get_current (b));
+ rv = svm_fifo_enqueue_with_offset (s0->server_rx_fifo, offset, data_len,
+ vlib_buffer_get_current (b));
/* Nothing written */
if (rv)
/* Get the newest segment from the fifo */
newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo);
- start = tc->rcv_nxt + ooo_segment_offset (s0->server_rx_fifo, newest);
- end = tc->rcv_nxt + ooo_segment_end_offset (s0->server_rx_fifo, newest);
+ start = ooo_segment_offset (s0->server_rx_fifo, newest);
+ end = ooo_segment_end_offset (s0->server_rx_fifo, newest);
tcp_update_sack_list (tc, start, end);
}
{
/* Old sequence numbers allowed through because they overlapped
* the rx window */
+
if (seq_lt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt))
{
error = TCP_ERROR_SEGMENT_OLD;
return error;
}
+typedef struct
+{
+ tcp_header_t tcp_header;
+ tcp_connection_t tcp_connection;
+} tcp_rx_trace_t;
+
+u8 *
+format_tcp_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%U\n%U%U",
+ format_tcp_header, &t->tcp_header, 128,
+ format_white_space, indent,
+ format_tcp_connection_verbose, &t->tcp_connection);
+
+ return s;
+}
+
+u8 *
+format_tcp_rx_trace_short (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
+
+ s = format (s, "%d -> %d (%U)",
+ clib_net_to_host_u16 (t->tcp_header.src_port),
+ clib_net_to_host_u16 (t->tcp_header.dst_port), format_tcp_state,
+ &t->tcp_connection.state);
+
+ return s;
+}
+
always_inline void
tcp_established_inc_counter (vlib_main_t * vm, u8 is_ip4, u8 evt, u8 val)
{
u32 n_left_from, next_index, *from, *to_next;
u32 my_thread_index = vm->thread_index, errors = 0;
tcp_main_t *tm = vnet_get_tcp_main ();
+ u8 is_fin = 0;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
{
u32 bi0;
vlib_buffer_t *b0;
+ tcp_rx_trace_t *t0;
tcp_header_t *th0 = 0;
tcp_connection_t *tc0;
ip4_header_t *ip40;
n_advance_bytes0 += sizeof (ip60[0]);
}
+ is_fin = (th0->flags & TCP_FLAG_FIN) != 0;
+
/* SYNs, FINs and data consume sequence numbers */
vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
- + tcp_is_syn (th0) + tcp_is_fin (th0) + n_data_bytes0;
+ + tcp_is_syn (th0) + is_fin + n_data_bytes0;
/* TODO header prediction fast path */
vlib_buffer_advance (b0, n_advance_bytes0);
error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0);
+ /* N.B. buffer is rewritten if segment is ooo. Thus, th0 becomes a
+ * dangling reference. */
+
/* 8: check the FIN bit */
- if (tcp_fin (th0))
+ if (is_fin)
{
/* Enter CLOSE-WAIT and notify session. Don't send ACK, instead
* wait for session to call close. To avoid lingering
b0->error = node->errors[error0];
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header));
+ clib_memcpy (&t0->tcp_connection, tc0,
+ sizeof (t0->tcp_connection));
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
{
u32 bi0, ack0, seq0;
vlib_buffer_t *b0;
+ tcp_rx_trace_t *t0;
tcp_header_t *tcp0 = 0;
tcp_connection_t *tc0;
ip4_header_t *ip40;
b0->error = error0 ? node->errors[error0] : 0;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, tcp0, sizeof (t0->tcp_header));
+ clib_memcpy (&t0->tcp_connection, tc0,
+ sizeof (t0->tcp_connection));
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
foreach_tcp_state_next
#undef _
- }
-,};
+ },
+ .format_trace = format_tcp_rx_trace_short,
+};
/* *INDENT-ON* */
VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv);
{
u32 bi0;
vlib_buffer_t *b0;
+ tcp_rx_trace_t *t0;
tcp_header_t *tcp0 = 0;
tcp_connection_t *tc0;
ip4_header_t *ip40;
drop:
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, tcp0, sizeof (t0->tcp_header));
+ clib_memcpy (&t0->tcp_connection, tc0,
+ sizeof (t0->tcp_connection));
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
{
u32 bi0;
vlib_buffer_t *b0;
+ tcp_rx_trace_t *t0;
tcp_header_t *th0 = 0;
tcp_connection_t *lc0;
ip4_header_t *ip40;
drop:
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header));
+ clib_memcpy (&t0->tcp_connection, lc0,
+ sizeof (t0->tcp_connection));
}
b0->error = node->errors[error0];
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
foreach_tcp_state_next
#undef _
},
+ .format_trace = format_tcp_rx_trace_short,
};
/* *INDENT-ON* */
_ (ESTABLISHED, "tcp6-established") \
_ (RESET, "tcp6-reset")
-typedef struct
-{
- u16 src_port;
- u16 dst_port;
- u8 state;
-} tcp_rx_trace_t;
-
-u8 *
-format_tcp_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
-
- s = format (s, "TCP: src-port %d dst-port %U%s\n",
- clib_net_to_host_u16 (t->src_port),
- clib_net_to_host_u16 (t->dst_port), format_tcp_state, t->state);
-
- return s;
-}
-
#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
always_inline uword
{
u32 bi0;
vlib_buffer_t *b0;
+ tcp_rx_trace_t *t0;
tcp_header_t *tcp0 = 0;
tcp_connection_t *tc0;
ip4_header_t *ip40;
if (PREDICT_FALSE (error0 == TCP_ERROR_DISPATCH))
{
+ tcp_state_t state0 = tc0->state;
/* Overload tcp flags to store state */
vnet_buffer (b0)->tcp.flags = tc0->state;
+ clib_warning ("disp error state %U flags %U",
+ format_tcp_state, &state0,
+ format_tcp_flags, (int) flags0);
}
}
else
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
-
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ clib_memcpy (&t0->tcp_header, tcp0, sizeof (t0->tcp_header));
+ if (tc0)
+ clib_memcpy (&t0->tcp_connection, tc0, sizeof (*tc0));
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,