From dbf19ca7f9b93a843503f9204afd0815f3ef8332 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 15 Mar 2016 10:21:54 +0100 Subject: [PATCH] Make adjacencies shareable Change-Id: I620871ca715b751d2e487f37341b7118797c9176 Signed-off-by: Damjan Marion --- vnet/vnet/ip/ip4_forward.c | 8 ++ vnet/vnet/ip/ip6_forward.c | 8 ++ vnet/vnet/ip/lookup.c | 178 ++++++++++++++++++++++++++++++++++++++++++--- vnet/vnet/ip/lookup.h | 50 ++++++++++++- vppinfra/vppinfra/clib.h | 2 + 5 files changed, 235 insertions(+), 11 deletions(-) diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index 45bc22e7550..c5b3e9a5a0a 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -380,6 +380,14 @@ void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a) old_adj_index = fib->old_hash_values[0]; + /* Avoid spurious reference count increments */ + if (old_adj_index == adj_index) + { + ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); + if (adj->share_count > 0) + adj->share_count --; + } + ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length, is_del ? old_adj_index : adj_index, is_del); diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index fd6874d4def..1d0e21e528c 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -265,6 +265,14 @@ void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * a) BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 1 /* is_add */); } + /* Avoid spurious reference count increments */ + if (old_adj_index == adj_index) + { + ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); + if (adj->share_count > 0) + adj->share_count --; + } + /* Delete old adjacency index if present and changed. */ { if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY) diff --git a/vnet/vnet/ip/lookup.c b/vnet/vnet/ip/lookup.c index d0ec4947d7a..9e34bfa9064 100644 --- a/vnet/vnet/ip/lookup.c +++ b/vnet/vnet/ip/lookup.c @@ -60,6 +60,55 @@ ip_add_adjacency (ip_lookup_main_t * lm, ip_adjacency_t * adj; u32 ai, i, handle; + /* See if we know enough to attempt to share an existing adjacency */ + if (copy_adj && n_adj == 1) + { + uword signature; + uword * p; + + switch (copy_adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_DROP: + if (lm->drop_adj_index) + { + adj = ip_get_adjacency (lm, lm->drop_adj_index); + *adj_index_return = lm->drop_adj_index; + return (adj); + } + break; + + case IP_LOOKUP_NEXT_LOCAL: + if (lm->local_adj_index) + { + adj = ip_get_adjacency (lm, lm->local_adj_index); + *adj_index_return = lm->local_adj_index; + return (adj); + } + default: + break; + } + + signature = vnet_ip_adjacency_signature (copy_adj); + p = hash_get (lm->adj_index_by_signature, signature); + if (p) + { + adj = heap_elt_at_index (lm->adjacency_heap, p[0]); + while (1) + { + if (vnet_ip_adjacency_share_compare (adj, copy_adj)) + { + adj->share_count++; + *adj_index_return = p[0]; + return adj; + } + if (adj->next_adj_with_signature == 0) + break; + adj = heap_elt_at_index (lm->adjacency_heap, + adj->next_adj_with_signature); + } + } + } + ai = heap_alloc (lm->adjacency_heap, n_adj, handle); adj = heap_elt_at_index (lm->adjacency_heap, ai); @@ -82,11 +131,37 @@ ip_add_adjacency (ip_lookup_main_t * lm, adj[i].heap_handle = handle; adj[i].n_adj = n_adj; + adj[i].share_count = 0; + adj[i].next_adj_with_signature = 0; /* Zero possibly stale counters for re-used adjacencies. */ vlib_zero_combined_counter (&lm->adjacency_counters, ai + i); } + /* Set up to share the adj later */ + if (copy_adj && n_adj == 1) + { + uword * p; + u32 old_ai; + uword signature = vnet_ip_adjacency_signature (adj); + + p = hash_get (lm->adj_index_by_signature, signature); + /* Hash collision? */ + if (p) + { + /* Save the adj index, p[0] will be toast after the unset! */ + old_ai = p[0]; + hash_unset (lm->adj_index_by_signature, signature); + hash_set (lm->adj_index_by_signature, signature, ai); + adj->next_adj_with_signature = old_ai; + } + else + { + adj->next_adj_with_signature = 0; + hash_set (lm->adj_index_by_signature, signature, ai); + } + } + *adj_index_return = ai; return adj; } @@ -101,6 +176,69 @@ static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_ adj = ip_get_adjacency (lm, adj_index); handle = adj->heap_handle; + /* Special-case local, drop adjs */ + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_DROP: + return; + default: + break; + } + + + if (adj->n_adj == 1) + { + uword signature; + uword * p; + u32 this_ai; + ip_adjacency_t * this_adj, * prev_adj = 0; + if (adj->share_count > 0) + { + adj->share_count --; + return; + } + + signature = vnet_ip_adjacency_signature (adj); + p = hash_get (lm->adj_index_by_signature, signature); + if (p == 0) + { + clib_warning ("adj 0x%llx signature %llx not in table", + adj, signature); + goto bag_it; + } + this_ai = p[0]; + /* At the top of the signature chain (likely)? */ + if (this_ai == adj_index) + { + if (adj->next_adj_with_signature == 0) + { + hash_unset (lm->adj_index_by_signature, signature); + goto bag_it; + } + else + { + this_adj = ip_get_adjacency (lm, adj->next_adj_with_signature); + hash_unset (lm->adj_index_by_signature, signature); + hash_set (lm->adj_index_by_signature, signature, + this_adj->heap_handle); + } + } + else /* walk signature chain */ + { + this_adj = ip_get_adjacency (lm, this_ai); + while (this_adj != adj) + { + prev_adj = this_adj; + this_adj = ip_get_adjacency + (lm, this_adj->next_adj_with_signature); + ASSERT(this_adj->heap_handle != 0); + } + prev_adj->next_adj_with_signature = this_adj->next_adj_with_signature; + } + } + + bag_it: if (delete_multipath_adjacency) ip_multipath_del_adjacency (lm, adj_index); @@ -829,6 +967,14 @@ void unserialize_ip_lookup_main (serialize_main_t * m, va_list * va) void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) { ip_adjacency_t * adj; + ip_adjacency_t template_adj; + + /* ensure that adjacency is cacheline aligned and sized */ + ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0); + ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) == CLIB_CACHE_LINE_BYTES); + + lm->adj_index_by_signature = hash_create (0, sizeof (uword)); + memset (&template_adj, 0, sizeof (template_adj)); /* Hand-craft special miss adjacency to use when nothing matches in the routing table. Same for drop adjacency. */ @@ -836,12 +982,14 @@ void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) adj->lookup_next_index = IP_LOOKUP_NEXT_MISS; ASSERT (lm->miss_adj_index == IP_LOOKUP_MISS_ADJ_INDEX); - adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->drop_adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_DROP; + /* Make the "drop" adj sharable */ + template_adj.lookup_next_index = IP_LOOKUP_NEXT_DROP; + adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, &lm->drop_adj_index); - adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->local_adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; + /* Make the "local" adj sharable */ + template_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL; + template_adj.if_address_index = ~0; + adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, &lm->local_adj_index); if (! lm->fib_result_n_bytes) lm->fib_result_n_bytes = sizeof (uword); @@ -983,6 +1131,10 @@ u8 * format_ip_adjacency (u8 * s, va_list * args) } if (adj->explicit_fib_index != ~0 && adj->explicit_fib_index != 0) s = format (s, " lookup fib index %d", adj->explicit_fib_index); + if (adj->share_count > 0) + s = format (s, " shared %d", adj->share_count + 1); + if (adj->next_adj_with_signature) + s = format (s, " next_adj_with_signature %d", adj->next_adj_with_signature); return s; } @@ -1083,11 +1235,17 @@ static uword unformat_ip_adjacency (unformat_input_t * input, va_list * args) if (next == IP_LOOKUP_NEXT_LOCAL) (void) unformat (input, "%d", &adj->if_address_index); else if (next == IP_LOOKUP_NEXT_CLASSIFY) - if (!unformat (input, "%d", &adj->classify_table_index)) - { - clib_warning ("classify adj must specify table index"); - return 0; - } + { + if (!unformat (input, "%d", &adj->classify_table_index)) + { + clib_warning ("classify adj must specify table index"); + return 0; + } + } + else if (next == IP_LOOKUP_NEXT_DROP) + { + adj->rewrite_header.node_index = 0; + } } else if (unformat_user (input, diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h index ccce88935eb..02ab20d11d9 100644 --- a/vnet/vnet/ip/lookup.h +++ b/vnet/vnet/ip/lookup.h @@ -106,9 +106,12 @@ _(reverse, IP_FLOW_HASH_REVERSE_SRC_DST) /* IP unicast adjacency. */ typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); /* Handle for this adjacency in adjacency heap. */ u32 heap_handle; + STRUCT_MARK(signature_start); + /* Interface address index for this local/arp adjacency. */ u32 if_address_index; @@ -131,9 +134,51 @@ typedef struct { /* Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */ u16 saved_lookup_next_index; - vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE - 5*sizeof(u32)); + STRUCT_MARK(signature_end); + + /* Number of FIB entries sharing this adjacency */ + u32 share_count; + /* Use this adjacency instead */ + u32 next_adj_with_signature; + + CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); + + /* Rewrite in second/third cache lines */ + vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); } ip_adjacency_t; +static inline uword +vnet_ip_adjacency_signature (ip_adjacency_t * adj) +{ + uword signature = 0xfeedfaceULL; + + /* Skip heap handle, sum everything up to but not including share_count */ + signature = hash_memory64 + (STRUCT_MARK_PTR(adj, signature_start), + STRUCT_OFFSET_OF(ip_adjacency_t, signature_end) + - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start), + signature); + + /* and the rewrite */ + signature = hash_memory64 (&adj->rewrite_header, VLIB_BUFFER_PRE_DATA_SIZE, + signature); + return signature; +} + +static inline int +vnet_ip_adjacency_share_compare (ip_adjacency_t * a1, ip_adjacency_t *a2) +{ + if (memcmp (STRUCT_MARK_PTR(a1, signature_start), + STRUCT_MARK_PTR(a2, signature_start), + STRUCT_OFFSET_OF(ip_adjacency_t, signature_end) + - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start))) + return 0; + if (memcmp (&a1->rewrite_header, &a2->rewrite_header, + VLIB_BUFFER_PRE_DATA_SIZE)) + return 0; + return 1; +} + /* Index into adjacency table. */ typedef u32 ip_adjacency_index_t; @@ -261,6 +306,9 @@ typedef struct ip_lookup_main_t { /* Indexed by heap_handle from ip_adjacency_t. */ ip_multipath_adjacency_t * multipath_adjacencies; + /* Adjacency by signature hash */ + uword * adj_index_by_signature; + /* Temporary vectors for looking up next hops in hash. */ ip_multipath_next_hop_t * next_hop_hash_lookup_key; ip_multipath_next_hop_t * next_hop_hash_lookup_key_normalized; diff --git a/vppinfra/vppinfra/clib.h b/vppinfra/vppinfra/clib.h index e043025a272..9e9a97789db 100644 --- a/vppinfra/vppinfra/clib.h +++ b/vppinfra/vppinfra/clib.h @@ -64,6 +64,8 @@ #define STRUCT_SIZE_OF(t,f) (sizeof (_STRUCT_FIELD (t, f))) #define STRUCT_BITS_OF(t,f) (BITS (_STRUCT_FIELD (t, f))) #define STRUCT_ARRAY_LEN(t,f) ARRAY_LEN (_STRUCT_FIELD (t, f)) +#define STRUCT_MARK(mark) u8 mark[0] +#define STRUCT_MARK_PTR(v, f) &(v)->f /* Stride in bytes between struct array elements. */ #define STRUCT_STRIDE_OF(t,f) \ -- 2.16.6