From 1bd01099a6512b6119bbf337b36222a6f0770d49 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 15 Mar 2017 15:41:17 -0400 Subject: [PATCH] 64 bit per-thread counters after: TenGigabitEthernet5/0/1-output active 107522 17375708 0 7.22e0 161.60 TenGigabitEthernet5/0/1-tx active 107522 17375708 0 6.93e1 161.60 ip4-input-no-checksum active 107522 17375708 0 2.52e1 161.60 ip4-lookup active 107522 17375708 0 3.10e1 161.60 ip4-rewrite active 107522 17375708 0 2.52e1 161.60 before TenGigabitEthernet5/0/1-output active 433575 110995200 0 6.95e0 256.00 TenGigabitEthernet5/0/1-tx active 433575 110995200 0 7.14e1 256.00 ip4-input-no-checksum active 433575 110995200 0 2.66e1 256.00 ip4-lookup active 433575 110995200 0 3.29e1 256.00 ip4-rewrite active 433575 110995200 0 2.59e1 256.00 Change-Id: I46405bd22189f48a39f06e3443bb7e13f410b539 Signed-off-by: Neale Ranns --- src/vlib/counter.c | 66 +++++++-------- src/vlib/counter.h | 207 ++++++++++++++++------------------------------ src/vnet/ip/ip4_forward.c | 36 ++++---- src/vnet/map/map.c | 2 +- src/vnet/map/map_api.c | 2 +- src/vnet/rewrite.c | 2 +- src/vpp/api/api.c | 2 +- src/vpp/stats/stats.c | 16 ++-- 8 files changed, 132 insertions(+), 201 deletions(-) diff --git a/src/vlib/counter.c b/src/vlib/counter.c index 9f66e04d88e..62f4bd66ddc 100644 --- a/src/vlib/counter.c +++ b/src/vlib/counter.c @@ -42,56 +42,36 @@ void vlib_clear_simple_counters (vlib_simple_counter_main_t * cm) { + counter_t *my_counters; uword i, j; - u16 *my_minis; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; + my_counters = cm->counters[i]; - for (j = 0; j < vec_len (my_minis); j++) + for (j = 0; j < vec_len (my_counters); j++) { - cm->maxi[j] += my_minis[j]; - my_minis[j] = 0; + my_counters[j] = 0; } } - - j = vec_len (cm->maxi); - if (j > 0) - vec_validate (cm->value_at_last_clear, j - 1); - for (i = 0; i < j; i++) - cm->value_at_last_clear[i] = cm->maxi[i]; } void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm) { + vlib_counter_t *my_counters; uword i, j; - vlib_mini_counter_t *my_minis; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; + my_counters = cm->counters[i]; - for (j = 0; j < vec_len (my_minis); j++) + for (j = 0; j < vec_len (my_counters); j++) { - cm->maxi[j].packets += my_minis[j].packets; - cm->maxi[j].bytes += my_minis[j].bytes; - my_minis[j].packets = 0; - my_minis[j].bytes = 0; + my_counters[j].packets = 0; + my_counters[j].bytes = 0; } } - - j = vec_len (cm->maxi); - if (j > 0) - vec_validate (cm->value_at_last_clear, j - 1); - - for (i = 0; i < j; i++) - { - vlib_counter_t *c = vec_elt_at_index (cm->value_at_last_clear, i); - - c[0] = cm->maxi[i]; - } } void @@ -100,10 +80,9 @@ vlib_validate_simple_counter (vlib_simple_counter_main_t * cm, u32 index) vlib_thread_main_t *tm = vlib_get_thread_main (); int i; - vec_validate (cm->minis, tm->n_vlib_mains - 1); + vec_validate (cm->counters, tm->n_vlib_mains - 1); for (i = 0; i < tm->n_vlib_mains; i++) - vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES); } void @@ -112,10 +91,23 @@ vlib_validate_combined_counter (vlib_combined_counter_main_t * cm, u32 index) vlib_thread_main_t *tm = vlib_get_thread_main (); int i; - vec_validate (cm->minis, tm->n_vlib_mains - 1); + vec_validate (cm->counters, tm->n_vlib_mains - 1); for (i = 0; i < tm->n_vlib_mains; i++) - vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES); +} + +u32 +vlib_combined_counter_n_counters (const vlib_combined_counter_main_t * cm) +{ + ASSERT (cm->counters); + return (vec_len (cm->counters[0])); +} + +u32 +vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm) +{ + ASSERT (cm->counters); + return (vec_len (cm->counters[0])); } void diff --git a/src/vlib/counter.h b/src/vlib/counter.h index abfa89eea6d..17a8521776b 100644 --- a/src/vlib/counter.h +++ b/src/vlib/counter.h @@ -44,59 +44,48 @@ Optimized thread-safe counters. - Each vlib_[simple|combined]_counter_main_t consists of a single - vector of thread-safe / atomically-updated u64 counters [the - "maxi" vector], and a (u16 **) per-thread vector [the "minis" - vector] of narrow, per-thread counters. - - The idea is to drastically reduce the number of atomic operations. - In the case of packet counts, we divide the number of atomic ops - by 2**16, etc. + Each vlib_[simple|combined]_counter_main_t consists of a per-thread + vector of per-object counters. + + The idea is to drastically eliminate atomic operations. */ +/** 64bit counters */ +typedef u64 counter_t; + /** A collection of simple counters */ typedef struct { - u16 **minis; /**< Per-thread u16 non-atomic counters */ - u64 *maxi; /**< Shared wide counters */ - u64 *value_at_last_clear; /**< Counter values as of last clear. */ - u64 *value_at_last_serialize; /**< Values as of last serialize. */ + counter_t **counters; /**< Per-thread u64 non-atomic counters */ + counter_t *value_at_last_serialize; /**< Values as of last serialize. */ u32 last_incremental_serialize_index; /**< Last counter index serialized incrementally. */ char *name; /**< The counter collection's name. */ } vlib_simple_counter_main_t; +/** The number of counters (not the number of per-thread counters) */ +u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm); + /** Increment a simple counter @param cm - (vlib_simple_counter_main_t *) simple counter main pointer @param cpu_index - (u32) the current cpu index @param index - (u32) index of the counter to increment - @param increment - (u32) quantitiy to add to the counter + @param increment - (u64) quantitiy to add to the counter */ always_inline void vlib_increment_simple_counter (vlib_simple_counter_main_t * cm, - u32 cpu_index, u32 index, u32 increment) + u32 cpu_index, u32 index, u64 increment) { - u16 *my_minis; - u16 *mini; - u32 old, new; - - my_minis = cm->minis[cpu_index]; - mini = vec_elt_at_index (my_minis, index); - old = mini[0]; - new = old + increment; - mini[0] = new; + counter_t *my_counters; - if (PREDICT_FALSE (mini[0] != new)) - { - __sync_fetch_and_add (&cm->maxi[index], new); - my_minis[index] = 0; - } + my_counters = cm->counters[cpu_index]; + my_counters[index] += increment; } /** Get the value of a simple counter - Scrapes the entire set of mini counters. Innacurate unless + Scrapes the entire set of per-thread counters. Innacurate unless worker threads which might increment the counter are barrier-synchronized @@ -104,30 +93,21 @@ vlib_increment_simple_counter (vlib_simple_counter_main_t * cm, @param index - (u32) index of the counter to fetch @returns - (u64) current counter value */ -always_inline u64 +always_inline counter_t vlib_get_simple_counter (vlib_simple_counter_main_t * cm, u32 index) { - u16 *my_minis, *mini; - u64 v; + counter_t *my_counters; + counter_t v; int i; - ASSERT (index < vec_len (cm->maxi)); + ASSERT (index < vlib_simple_counter_n_counters (cm)); v = 0; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; - mini = vec_elt_at_index (my_minis, index); - v += mini[0]; - } - - v += cm->maxi[index]; - - if (index < vec_len (cm->value_at_last_clear)) - { - ASSERT (v >= cm->value_at_last_clear[index]); - v -= cm->value_at_last_clear[index]; + my_counters = cm->counters[i]; + v += my_counters[index]; } return v; @@ -142,29 +122,24 @@ vlib_get_simple_counter (vlib_simple_counter_main_t * cm, u32 index) always_inline void vlib_zero_simple_counter (vlib_simple_counter_main_t * cm, u32 index) { - u16 *my_minis; + counter_t *my_counters; int i; - ASSERT (index < vec_len (cm->maxi)); + ASSERT (index < vlib_simple_counter_n_counters (cm)); - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; - my_minis[index] = 0; + my_counters = cm->counters[i]; + my_counters[index] = 0; } - - cm->maxi[index] = 0; - - if (index < vec_len (cm->value_at_last_clear)) - cm->value_at_last_clear[index] = 0; } /** Combined counter to hold both packets and byte differences. */ typedef struct { - u64 packets; /**< packet counter */ - u64 bytes; /**< byte counter */ + counter_t packets; /**< packet counter */ + counter_t bytes; /**< byte counter */ } vlib_counter_t; /** Add two combined counters, results in the first counter @@ -201,24 +176,19 @@ vlib_counter_zero (vlib_counter_t * a) a->packets = a->bytes = 0; } -/** Mini combined counter */ -typedef struct -{ - u16 packets; /**< Packet count */ - i16 bytes; /**< Byte count */ -} vlib_mini_counter_t; - /** A collection of combined counters */ typedef struct { - vlib_mini_counter_t **minis; /**< Per-thread u16 non-atomic counter pairs */ - vlib_counter_t *maxi; /**< Shared wide counter pairs */ - vlib_counter_t *value_at_last_clear; /**< Counter values as of last clear. */ + vlib_counter_t **counters; /**< Per-thread u64 non-atomic counter pairs */ vlib_counter_t *value_at_last_serialize; /**< Counter values as of last serialize. */ u32 last_incremental_serialize_index; /**< Last counter index serialized incrementally. */ char *name; /**< The counter collection's name. */ } vlib_combined_counter_main_t; +/** The number of counters (not the number of per-thread counters) */ +u32 vlib_combined_counter_n_counters (const vlib_combined_counter_main_t * + cm); + /** Clear a collection of simple counters @param cm - (vlib_simple_counter_main_t *) collection to clear */ @@ -233,62 +203,41 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm); @param cm - (vlib_combined_counter_main_t *) comined counter main pointer @param cpu_index - (u32) the current cpu index @param index - (u32) index of the counter to increment - @param packet_increment - (u32) number of packets to add to the counter - @param byte_increment - (u32) number of bytes to add to the counter + @param packet_increment - (u64) number of packets to add to the counter + @param byte_increment - (u64) number of bytes to add to the counter */ always_inline void vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, u32 cpu_index, - u32 index, - u32 packet_increment, u32 byte_increment) + u32 index, u64 n_packets, u64 n_bytes) { - vlib_mini_counter_t *my_minis, *mini; - u32 old_packets, new_packets; - i32 old_bytes, new_bytes; - - /* Use this CPU's mini counter array */ - my_minis = cm->minis[cpu_index]; - - mini = vec_elt_at_index (my_minis, index); - old_packets = mini->packets; - old_bytes = mini->bytes; - - new_packets = old_packets + packet_increment; - new_bytes = old_bytes + byte_increment; - - mini->packets = new_packets; - mini->bytes = new_bytes; - - /* Bytes always overflow before packets.. */ - if (PREDICT_FALSE (mini->bytes != new_bytes)) - { - vlib_counter_t *maxi = vec_elt_at_index (cm->maxi, index); + vlib_counter_t *my_counters; - __sync_fetch_and_add (&maxi->packets, new_packets); - __sync_fetch_and_add (&maxi->bytes, new_bytes); + /* Use this CPU's counter array */ + my_counters = cm->counters[cpu_index]; - mini->packets = 0; - mini->bytes = 0; - } + my_counters[index].packets += n_packets; + my_counters[index].bytes += n_bytes; } -#define vlib_prefetch_combined_counter(_cm, _cpu_index, _index) \ -{ \ - vlib_mini_counter_t *_cpu_minis; \ - \ - /* \ - * This CPU's mini index is assumed to already be in cache \ - */ \ - _cpu_minis = (_cm)->minis[(_cpu_index)]; \ - CLIB_PREFETCH(_cpu_minis + (_index), \ - sizeof(*_cpu_minis), \ - STORE); \ +/** Pre-fetch a per-thread combined counter for the given object index */ +always_inline void +vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm, + u32 cpu_index, u32 index) +{ + vlib_counter_t *cpu_counters; + + /* + * This CPU's index is assumed to already be in cache + */ + cpu_counters = cm->counters[cpu_index]; + CLIB_PREFETCH (cpu_counters + index, CLIB_CACHE_LINE_BYTES, STORE); } /** Get the value of a combined counter, never called in the speed path - Scrapes the entire set of mini counters. Innacurate unless + Scrapes the entire set of per-thread counters. Innacurate unless worker threads which might increment the counter are barrier-synchronized @@ -298,35 +247,27 @@ vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, */ static inline void -vlib_get_combined_counter (vlib_combined_counter_main_t * cm, +vlib_get_combined_counter (const vlib_combined_counter_main_t * cm, u32 index, vlib_counter_t * result) { - vlib_mini_counter_t *my_minis, *mini; - vlib_counter_t *maxi; + vlib_counter_t *my_counters, *counter; int i; result->packets = 0; result->bytes = 0; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; + my_counters = cm->counters[i]; - mini = vec_elt_at_index (my_minis, index); - result->packets += mini->packets; - result->bytes += mini->bytes; + counter = vec_elt_at_index (my_counters, index); + result->packets += counter->packets; + result->bytes += counter->bytes; } - - maxi = vec_elt_at_index (cm->maxi, index); - result->packets += maxi->packets; - result->bytes += maxi->bytes; - - if (index < vec_len (cm->value_at_last_clear)) - vlib_counter_sub (result, &cm->value_at_last_clear[index]); } /** Clear a combined counter - Clears the set of per-thread u16 counters, and the shared vlib_counter_t + Clears the set of per-thread counters. @param cm - (vlib_combined_counter_main_t *) combined counter main pointer @param index - (u32) index of the counter to clear @@ -334,21 +275,17 @@ vlib_get_combined_counter (vlib_combined_counter_main_t * cm, always_inline void vlib_zero_combined_counter (vlib_combined_counter_main_t * cm, u32 index) { - vlib_mini_counter_t *mini, *my_minis; + vlib_counter_t *my_counters, *counter; int i; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; + my_counters = cm->counters[i]; - mini = vec_elt_at_index (my_minis, index); - mini->packets = 0; - mini->bytes = 0; + counter = vec_elt_at_index (my_counters, index); + counter->packets = 0; + counter->bytes = 0; } - - vlib_counter_zero (&cm->maxi[index]); - if (index < vec_len (cm->value_at_last_clear)) - vlib_counter_zero (&cm->value_at_last_clear[index]); } /** validate a simple counter diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 0dad61d45d0..7352c2e718d 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -2375,6 +2375,17 @@ ip4_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + /* + * pre-fetch the per-adjacency counters + */ + if (do_counters) + { + vlib_prefetch_combined_counter (&adjacency_counters, + cpu_index, adj_index0); + vlib_prefetch_combined_counter (&adjacency_counters, + cpu_index, adj_index1); + } + /* We should never rewrite a pkt using the MISS adjacency */ ASSERT (adj_index0 && adj_index1); @@ -2480,17 +2491,6 @@ ip4_rewrite_inline (vlib_main_t * vm, rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : error1); - /* - * pre-fetch the per-adjacency counters - */ - if (do_counters) - { - vlib_prefetch_combined_counter (&adjacency_counters, - cpu_index, adj_index0); - vlib_prefetch_combined_counter (&adjacency_counters, - cpu_index, adj_index1); - } - /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP headerr */ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) @@ -2624,8 +2624,9 @@ ip4_rewrite_inline (vlib_main_t * vm, p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; } - vlib_prefetch_combined_counter (&adjacency_counters, - cpu_index, adj_index0); + if (do_counters) + vlib_prefetch_combined_counter (&adjacency_counters, + cpu_index, adj_index0); /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); @@ -2641,10 +2642,11 @@ ip4_rewrite_inline (vlib_main_t * vm, rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; - vlib_increment_combined_counter - (&adjacency_counters, - cpu_index, - adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); + if (do_counters) + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ error0 = (vlib_buffer_length_in_chain (vm, p0) diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c index 7006b1db8f1..99305afaf41 100644 --- a/src/vnet/map/map.c +++ b/src/vnet/map/map.c @@ -1304,7 +1304,7 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, { which = cm - mm->domain_counters; - for (i = 0; i < vec_len (cm->maxi); i++) + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) { vlib_get_combined_counter (cm, i, &v); total_pkts[which] += v.packets; diff --git a/src/vnet/map/map_api.c b/src/vnet/map/map_api.c index 7febeb3d7a3..d618e7a65bf 100644 --- a/src/vnet/map/map_api.c +++ b/src/vnet/map/map_api.c @@ -211,7 +211,7 @@ vl_api_map_summary_stats_t_handler (vl_api_map_summary_stats_t * mp) { which = cm - mm->domain_counters; - for (i = 0; i < vec_len (cm->maxi); i++) + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) { vlib_get_combined_counter (cm, i, &v); total_pkts[which] += v.packets; diff --git a/src/vnet/rewrite.c b/src/vnet/rewrite.c index c4a171c17c5..47fb74df01b 100644 --- a/src/vnet/rewrite.c +++ b/src/vnet/rewrite.c @@ -79,7 +79,7 @@ format_vnet_rewrite (u8 * s, va_list * args) if (NULL != si) s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si); else - s = format (s, "DELETED"); + s = format (s, "DELETED:%d", rw->sw_if_index); } /* Format rewrite string. */ diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index d8301fa69bb..8df40406131 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -849,7 +849,7 @@ vl_api_vnet_get_summary_stats_t_handler (vl_api_vnet_get_summary_stats_t * mp) { which = cm - im->combined_sw_if_counters; - for (i = 0; i < vec_len (cm->maxi); i++) + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) { vlib_get_combined_counter (cm, i, &v); total_pkts[which] += v.packets; diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index c46d441a297..1927da0b5ff 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -134,7 +134,7 @@ do_simple_interface_counters (stats_main_t * sm) vlib_simple_counter_main_t *cm; u32 items_this_message = 0; u64 v, *vp = 0; - int i; + int i, n_counts; /* * Prevent interface registration from expanding / moving the vectors... @@ -144,13 +144,13 @@ do_simple_interface_counters (stats_main_t * sm) vec_foreach (cm, im->sw_if_counters) { - - for (i = 0; i < vec_len (cm->maxi); i++) + n_counts = vlib_simple_counter_n_counters (cm); + for (i = 0; i < n_counts; i++) { if (mp == 0) { items_this_message = clib_min (SIMPLE_COUNTER_BATCH_SIZE, - vec_len (cm->maxi) - i); + n_counts - i); mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + items_this_message * sizeof (v)); @@ -189,19 +189,19 @@ do_combined_interface_counters (stats_main_t * sm) vlib_combined_counter_main_t *cm; u32 items_this_message = 0; vlib_counter_t v, *vp = 0; - int i; + int i, n_counts; vnet_interface_counter_lock (im); vec_foreach (cm, im->combined_sw_if_counters) { - - for (i = 0; i < vec_len (cm->maxi); i++) + n_counts = vlib_combined_counter_n_counters (cm); + for (i = 0; i < n_counts; i++) { if (mp == 0) { items_this_message = clib_min (COMBINED_COUNTER_BATCH_SIZE, - vec_len (cm->maxi) - i); + n_counts - i); mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + items_this_message * sizeof (v)); -- 2.16.6