From edfe0eea7a938e650074fcb82a971187a7beb12e Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Mon, 29 Jul 2019 18:13:25 -0700 Subject: [PATCH] tcp: add more connection stats Type:feature Change-Id: If02884d0f1f26bfe31ec609ea9611cb27b699868 Signed-off-by: Florin Coras --- src/vnet/tcp/tcp.c | 24 ++++++++++++++++++++++++ src/vnet/tcp/tcp.h | 34 +++++++++++++++++++++++++++++++--- src/vnet/tcp/tcp_input.c | 14 ++++++++++++++ src/vnet/tcp/tcp_output.c | 15 ++++++++++++--- 4 files changed, 81 insertions(+), 6 deletions(-) diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 5a215b658a0..adff77c14ad 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -670,6 +670,8 @@ tcp_connection_init_vars (tcp_connection_t * tc) if (tc->flags & TCP_CONN_RATE_SAMPLE) tcp_bt_init (tc); + + tc->start_ts = tcp_time_now_us (tc->c_thread_index); } static int @@ -863,6 +865,27 @@ format_tcp_congestion (u8 * s, va_list * args) return s; } +static u8 * +format_tcp_stats (u8 * s, va_list * args) +{ + tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); + u32 indent = format_get_indent (s); + s = format (s, "in segs %lu dsegs %lu bytes %lu dupacks %u\n", + tc->segs_in, tc->data_segs_in, tc->bytes_in, tc->dupacks_in); + s = format (s, "%Uout segs %lu dsegs %lu bytes %lu dupacks %u\n", + format_white_space, indent, tc->segs_out, + tc->data_segs_out, tc->bytes_out, tc->dupacks_out); + s = format (s, "%Ufr %u tr %u rxt segs %lu bytes %lu duration %.3f\n", + format_white_space, indent, tc->fr_occurences, + tc->tr_occurences, tc->segs_retrans, tc->bytes_retrans, + tcp_time_now_us (tc->c_thread_index) - tc->start_ts); + s = format (s, "%Uerr wnd data below %u above %u ack below %u above %u", + format_white_space, indent, tc->errors.below_data_wnd, + tc->errors.above_data_wnd, tc->errors.below_ack_wnd, + tc->errors.above_ack_wnd); + return s; +} + static u8 * format_tcp_vars (u8 * s, va_list * args) { @@ -896,6 +919,7 @@ format_tcp_vars (u8 * s, va_list * args) { s = format (s, " sboard: %U\n", format_tcp_scoreboard, &tc->sack_sb, tc); + s = format (s, " stats: %U\n", format_tcp_stats, tc); } if (vec_len (tc->snd_sacks)) s = format (s, " sacks tx: %U\n", format_tcp_sacks, tc); diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 698173ee28a..fe02640f249 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -300,6 +300,17 @@ typedef enum tcp_cc_event_ TCP_CC_EVT_START_TX, } tcp_cc_event_t; +/* + * As per RFC4898 tcpEStatsStackSoftErrors + */ +typedef struct tcp_errors_ +{ + u32 below_data_wnd; /**< All data in seg is below snd_una */ + u32 above_data_wnd; /**< Some data in segment is above snd_wnd */ + u32 below_ack_wnd; /**< Acks for data below snd_una */ + u32 above_ack_wnd; /**< Acks for data not sent */ +} tcp_errors_t; + typedef struct _tcp_connection { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -309,7 +320,10 @@ typedef struct _tcp_connection u16 flags; /**< Connection flags (see tcp_conn_flags_e) */ u32 timers[TCP_N_TIMERS]; /**< Timer handles into timer wheel */ - /* TODO RFC4898 */ + u64 segs_in; /** RFC4022/4898 tcpHCInSegs/tcpEStatsPerfSegsIn */ + u64 bytes_in; /** RFC4898 tcpEStatsPerfHCDataOctetsIn */ + u64 segs_out; /** RFC4898 tcpEStatsPerfSegsOut */ + u64 bytes_out; /** RFC4898 tcpEStatsPerfHCDataOctetsOut */ /** Send sequence variables RFC793 */ u32 snd_una; /**< oldest unacknowledged sequence number */ @@ -320,6 +334,9 @@ typedef struct _tcp_connection u32 snd_nxt; /**< next seq number to be sent */ u16 snd_mss; /**< Effective send max seg (data) size */ + u64 data_segs_in; /** RFC4898 tcpEStatsPerfDataSegsIn */ + u64 data_segs_out; /** RFC4898 tcpEStatsPerfDataSegsOut */ + /** Receive sequence variables RFC793 */ u32 rcv_nxt; /**< next sequence number expected */ u32 rcv_wnd; /**< receive window we expect */ @@ -342,8 +359,10 @@ typedef struct _tcp_connection sack_block_t *snd_sacks_fl; /**< Vector for building new list */ sack_scoreboard_t sack_sb; /**< SACK "scoreboard" that tracks holes */ - u16 rcv_dupacks; /**< Number of DUPACKs received */ + u16 rcv_dupacks; /**< Number of recent DUPACKs received */ + u32 dupacks_in; /**< RFC4898 tcpEStatsStackDupAcksIn*/ u8 pending_dupacks; /**< Number of DUPACKs to be sent */ + u32 dupacks_out; /**< RFC4898 tcpEStatsPathDupAcksOut */ /* Congestion control */ u32 cwnd; /**< Congestion window */ @@ -361,6 +380,12 @@ typedef struct _tcp_connection tcp_cc_algorithm_t *cc_algo; /**< Congestion control algorithm */ u8 cc_data[TCP_CC_DATA_SZ]; /**< Congestion control algo private data */ + u32 fr_occurences; /**< fast-retransmit occurrences RFC4898 + tcpEStatsStackFastRetran */ + u32 tr_occurences; /**< timer-retransmit occurrences */ + u64 bytes_retrans; /**< RFC4898 tcpEStatsPerfOctetsRetrans */ + u64 segs_retrans; /**< RFC4898 tcpEStatsPerfSegsRetrans*/ + /* RTT and RTO */ u32 rto; /**< Retransmission timeout */ u32 rto_boff; /**< Index for RTO backoff */ @@ -382,9 +407,12 @@ typedef struct _tcp_connection f64 delivered_time; /**< Time last bytes were acked */ tcp_byte_tracker_t *bt; /**< Tx byte tracker */ + tcp_errors_t errors; /**< Soft connection errors */ + + f64 start_ts; /**< Timestamp when connection initialized */ u32 last_fib_check; /**< Last time we checked fib route for peer */ u16 mss; /**< Our max seg size that includes options */ - u32 timestamp_delta; + u32 timestamp_delta; /**< Offset for timestamp */ } tcp_connection_t; /* *INDENT-OFF* */ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 4695fbb1161..2fadab59eb8 100755 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -365,6 +365,9 @@ tcp_segment_validate (tcp_worker_ctx_t * wrk, tcp_connection_t * tc0, *error0 = TCP_ERROR_RCV_WND; + tc0->errors.below_data_wnd += seq_lt (vnet_buffer (b0)->tcp.seq_end, + tc0->rcv_las); + /* If not RST, send dup ack */ if (!tcp_rst (th0)) { @@ -1174,6 +1177,7 @@ tcp_cc_init_congestion (tcp_connection_t * tc) tc->prev_ssthresh = tc->ssthresh; tc->prev_cwnd = tc->cwnd; tc->cc_algo->congestion (tc); + tc->fr_occurences += 1; TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 4); } #endif /* CLIB_MARCH_VARIANT */ @@ -1524,6 +1528,7 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b, goto process_ack; } + tc->errors.above_ack_wnd += 1; *error = TCP_ERROR_ACK_FUTURE; TCP_EVT_DBG (TCP_EVT_ACK_RCV_ERR, tc, 0, vnet_buffer (b)->tcp.ack_number); @@ -1533,6 +1538,7 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b, /* If old ACK, probably it's an old dupack */ if (PREDICT_FALSE (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))) { + tc->errors.below_ack_wnd += 1; *error = TCP_ERROR_ACK_OLD; TCP_EVT_DBG (TCP_EVT_ACK_RCV_ERR, tc, 1, vnet_buffer (b)->tcp.ack_number); @@ -1547,6 +1553,7 @@ process_ack: /* * Looks okay, process feedback */ + if (tcp_opts_sack_permitted (&tc->rcv_opts)) tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number); @@ -1577,6 +1584,7 @@ process_ack: if (tcp_ack_is_cc_event (tc, b, prev_snd_wnd, prev_snd_una, &is_dack)) { tcp_cc_handle_event (tc, &rs, is_dack); + tc->dupacks_in += is_dack; if (!tcp_in_cong_recovery (tc)) { *error = TCP_ERROR_ACK_OK; @@ -1741,6 +1749,7 @@ tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b, ASSERT (data_len); written = session_enqueue_stream_connection (&tc->connection, b, 0, 1 /* queue event */ , 1); + tc->bytes_in += written; TCP_EVT_DBG (TCP_EVT_INPUT, tc, 0, data_len, written); @@ -1801,6 +1810,7 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, } TCP_EVT_DBG (TCP_EVT_INPUT, tc, 1, data_len, data_len); + tc->bytes_in += data_len; /* Update SACK list if in use */ if (tcp_opts_sack_permitted (&tc->rcv_opts)) @@ -1889,6 +1899,7 @@ tcp_segment_rcv (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_advance (b, vnet_buffer (b)->tcp.data_offset); n_data_bytes = vnet_buffer (b)->tcp.data_len; ASSERT (n_data_bytes); + tc->data_segs_in += 1; /* Handle out-of-order data */ if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt)) @@ -1923,6 +1934,8 @@ tcp_segment_rcv (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, error = tcp_session_enqueue_ooo (tc, b, n_data_bytes); tcp_program_dupack (tc); TCP_EVT_DBG (TCP_EVT_DUPACK_SENT, tc, vnet_buffer (b)->tcp); + tc->errors.above_data_wnd += seq_gt (vnet_buffer (b)->tcp.seq_end, + tc->rcv_las + tc->rcv_wnd); goto done; } @@ -3398,6 +3411,7 @@ tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc, flags = tcp->flags & filter_flags; *next = tm->dispatch_table[tc->state][flags].next; *error = tm->dispatch_table[tc->state][flags].error; + tc->segs_in += 1; if (PREDICT_FALSE (*error == TCP_ERROR_DISPATCH || *next == TCP_INPUT_NEXT_RESET)) diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 010397b9e04..751d8b31382 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1137,6 +1137,9 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt, tc->snd_nxt += data_len; tc->rcv_las = tc->rcv_nxt; + tc->bytes_out += data_len; + tc->data_segs_out += 1; + TCP_EVT_DBG (TCP_EVT_PKTIZE, tc); } @@ -1410,14 +1413,14 @@ tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk, /* Start is beyond snd_congestion */ start = tc->snd_una + offset; if (seq_geq (start, tc->snd_congestion)) - goto done; + return 0; /* Don't overshoot snd_congestion */ if (seq_gt (start + max_deq_bytes, tc->snd_congestion)) { max_deq_bytes = tc->snd_congestion - start; if (max_deq_bytes == 0) - goto done; + return 0; } n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b); @@ -1431,7 +1434,8 @@ tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk, tcp_bt_track_rxt (tc, start, start + n_bytes); } -done: + tc->bytes_retrans += n_bytes; + tc->segs_retrans += 1; TCP_EVT_DBG (TCP_EVT_CC_RTX, tc, offset, n_bytes); return n_bytes; } @@ -1458,6 +1462,7 @@ tcp_cc_init_rxt_timeout (tcp_connection_t * tc) tc->rcv_dupacks = 0; tc->rtt_ts = 0; tc->cwnd_acc_bytes = 0; + tc->tr_occurences += 1; tcp_connection_tx_pacer_reset (tc, tc->cwnd, 2 * tc->snd_mss); tcp_recovery_on (tc); } @@ -2015,12 +2020,14 @@ tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size) { tc->pending_dupacks = 0; tc->snd_sack_pos = 0; + tc->dupacks_out += n_acks; return n_acks; } else { TCP_DBG ("constrained by burst size"); tc->pending_dupacks = n_acks - max_burst_size; + tc->dupacks_out += max_burst_size; tcp_program_dupack (tc); return max_burst_size; } @@ -2196,6 +2203,8 @@ tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0, if (!TCP_ALWAYS_ACK) tcp_timer_reset (tc0, TCP_TIMER_DELACK); + + tc0->segs_out += 1; } always_inline uword -- 2.16.6