#include <vnet/ethernet/arp_packet.h>
#include <vnet/fib/fib_walk.h>
-#include <vppinfra/bihash_24_8.h>
-
/*
* Vector Hash tables of neighbour (traditional) adjacencies
* Key: interface(for the vector index), address (and its proto),
* link-type/ether-type.
*/
-static BVT(clib_bihash) **adj_nbr_tables[FIB_PROTOCOL_MAX];
-
-// FIXME SIZE APPROPRIATELY. ASK DAVEB.
-#define ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (64 * 64)
-#define ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+static uword **adj_nbr_tables[FIB_PROTOCOL_IP_MAX];
+typedef struct adj_nbr_key_t_
+{
+ ip46_address_t ank_ip;
+ u64 ank_linkt;
+} adj_nbr_key_t;
#define ADJ_NBR_SET_KEY(_key, _lt, _nh) \
{ \
- _key.key[0] = (_nh)->as_u64[0]; \
- _key.key[1] = (_nh)->as_u64[1]; \
- _key.key[2] = (_lt); \
+ ip46_address_copy(&(_key).ank_ip, (_nh)); \
+ _key.ank_linkt = (_lt); \
}
#define ADJ_NBR_ITF_OK(_proto, _itf) \
(((_itf) < vec_len(adj_nbr_tables[_proto])) && \
(NULL != adj_nbr_tables[_proto][sw_if_index]))
+#define ADJ_NBR_ASSERT_NH_PROTO(nh_proto, err) \
+ do { \
+ ASSERT (nh_proto < FIB_PROTOCOL_IP_MAX); \
+ const fib_protocol_t nh_proto__ = (nh_proto); \
+ if (nh_proto__ >= FIB_PROTOCOL_IP_MAX) \
+ { \
+ clib_warning ("BUG: protocol %d > %d\n", \
+ (int)nh_proto__, \
+ FIB_PROTOCOL_IP_MAX); \
+ return err; \
+ } \
+ } while (0)
+
static void
adj_nbr_insert (fib_protocol_t nh_proto,
vnet_link_t link_type,
u32 sw_if_index,
adj_index_t adj_index)
{
- BVT(clib_bihash_kv) kv;
+ adj_nbr_key_t kv;
+
+ ADJ_NBR_ASSERT_NH_PROTO (nh_proto,);
if (sw_if_index >= vec_len(adj_nbr_tables[nh_proto]))
{
if (NULL == adj_nbr_tables[nh_proto][sw_if_index])
{
adj_nbr_tables[nh_proto][sw_if_index] =
- clib_mem_alloc_aligned(sizeof(BVT(clib_bihash)),
- CLIB_CACHE_LINE_BYTES);
- clib_memset(adj_nbr_tables[nh_proto][sw_if_index],
- 0,
- sizeof(BVT(clib_bihash)));
-
- BV(clib_bihash_init) (adj_nbr_tables[nh_proto][sw_if_index],
- "Adjacency Neighbour table",
- ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
- ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);
+ hash_create_mem(0, sizeof(adj_nbr_key_t), sizeof(adj_index_t));
}
ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
- kv.value = adj_index;
- BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 1);
+ hash_set_mem_alloc (&adj_nbr_tables[nh_proto][sw_if_index],
+ &kv, adj_index);
}
void
const ip46_address_t *nh_addr,
u32 sw_if_index)
{
- BVT(clib_bihash_kv) kv;
+ adj_nbr_key_t kv;
+
+ ADJ_NBR_ASSERT_NH_PROTO (nh_proto,);
if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
return;
ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
- kv.value = ai;
- BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 0);
+ hash_unset_mem_free(&adj_nbr_tables[nh_proto][sw_if_index], &kv);
+
+ if (0 == hash_elts(adj_nbr_tables[nh_proto][sw_if_index]))
+ {
+ hash_free(adj_nbr_tables[nh_proto][sw_if_index]);
+ }
}
adj_index_t
const ip46_address_t *nh_addr,
u32 sw_if_index)
{
- BVT(clib_bihash_kv) kv;
+ adj_nbr_key_t kv;
+ uword *p;
+
+ ADJ_NBR_ASSERT_NH_PROTO (nh_proto, ADJ_INDEX_INVALID);
ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
return (ADJ_INDEX_INVALID);
- if (BV(clib_bihash_search)(adj_nbr_tables[nh_proto][sw_if_index],
- &kv, &kv) < 0)
- {
- return (ADJ_INDEX_INVALID);
- }
- else
+ p = hash_get_mem(adj_nbr_tables[nh_proto][sw_if_index], &kv);
+
+ if (p)
{
- return (kv.value);
+ return (p[0]);
}
+ return (ADJ_INDEX_INVALID);
}
static inline u32
{
feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index];
if (feature_count > 0)
+ {
+ vnet_feature_config_main_t *cm;
+
adj->rewrite_header.flags |= VNET_REWRITE_HAS_FEATURES;
- }
+ cm = &fm->feature_config_mains[arc_index];
+ adj->ia_cfg_index = vec_elt (cm->config_index_by_sw_if_index,
+ sw_if_index);
+ }
+ }
return;
}
u32 sw_if_index)
{
adj_index_t adj_index;
- ip_adjacency_t *adj;
adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
if (ADJ_INDEX_INVALID == adj_index)
{
+ ip_adjacency_t *adj;
vnet_main_t *vnm;
vnm = vnet_get_main();
adj_lock(adj_index);
}
+ adj_delegate_adj_created(adj_get(adj_index));
return (adj_index);
}
ADJ_NBR_REWRITE_FLAG_COMPLETE,
rewrite);
+ adj_delegate_adj_created(adj_get(adj_index));
+
return (adj_index);
}
* adj_nbr_update_rewrite
*
* Update the adjacency's rewrite string. A NULL string implies the
- * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * rewrite is reset (i.e. when ARP/ND entry is gone).
* NB: the adj being updated may be handling traffic in the DP.
*/
void
* adj_nbr_update_rewrite_internal
*
* Update the adjacency's rewrite string. A NULL string implies the
- * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * rewrite is reset (i.e. when ARP/ND entry is gone).
* NB: the adj being updated may be handling traffic in the DP.
*/
void
u8 *rewrite)
{
ip_adjacency_t *walk_adj;
- adj_index_t walk_ai;
+ adj_index_t walk_ai, ai;
vlib_main_t * vm;
u32 old_next;
int do_walk;
vm = vlib_get_main();
old_next = adj->lookup_next_index;
- walk_ai = adj_get_index(adj);
+ ai = walk_ai = adj_get_index(adj);
if (VNET_LINK_MPLS == adj->ia_link)
{
/*
* The link type MPLS has no children in the control plane graph, it only
- * has children in the data-palne graph. The backwalk is up the former.
+ * has children in the data-plane graph. The backwalk is up the former.
* So we need to walk from its IP cousin.
*/
walk_ai = adj_nbr_find(adj->ia_nh_proto,
* lock the adjacencies that are affected by updates this walk will provoke.
* Since the aim of the walk is to update children to link to a different
* DPO, this adj will no longer be in use and its lock count will drop to 0.
- * We don't want it to be deleted as part of this endevour.
+ * We don't want it to be deleted as part of this endeavour.
*/
- adj_lock(adj_get_index(adj));
+ adj_lock(ai);
adj_lock(walk_ai);
/*
* ideally we would only want to suspend forwarding via this adj whilst we
* do this, but we do not have that level of granularity - it's suspend all
* worker threads or nothing.
- * The other chioces are:
+ * The other choices are:
* - to mark the adj down and back walk so child load-balances drop this adj
* from the set.
* - update the next_node index of this adj to point to error-drop
* node. So from the options above, updating the next_node of the adj to
* be drop will work, but it relies on each graph node v4/v6/mpls, rewrite/
* arp/midchain always be valid w.r.t. a mis-match of adj type and node type
- * (i.e. a rewrite adj in the arp node). This is not enforcable. Getting it
+ * (i.e. a rewrite adj in the arp node). This is not enforceable. Getting it
* wrong will lead to hard to find bugs since its a race condition. So we
* choose the more reliable method of updating the children to use the drop,
* then switching adj's type, then updating the children again. Did I mention
fib_node_back_walk_ctx_t bw_ctx = {
.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_DOWN,
/*
- * force this walk to be synchrous. if we don't and a node in the graph
+ * force this walk to be synchronous. if we don't and a node in the graph
* (a heavily shared path-list) chooses to back-ground the walk (make it
* async) then it will pause and we will do the adj update below, before
* all the children are updated. not good.
};
fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
+ /*
+ * fib_walk_sync may allocate a new adjacency and potentially cuase a
+ * realloc for adj_pool. When that happens, adj pointer is no longer
+ * valid here. We refresh the adj pointer accordingly.
+ */
+ adj = adj_get (ai);
}
/*
vlib_worker_thread_barrier_sync(vm);
adj->lookup_next_index = adj_next_index;
+ adj->ia_node_index = this_node;
if (NULL != rewrite)
{
next_node);
/*
- * done with the rewirte update - let the workers loose.
+ * done with the rewrite update - let the workers loose.
*/
vlib_worker_thread_barrier_release(vm);
*/
if (do_walk)
{
+ walk_adj = adj_get(walk_ai);
walk_adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
}
- adj_unlock(adj_get_index(adj));
+ adj_delegate_adj_modified(adj);
+ adj_unlock(ai);
adj_unlock(walk_ai);
}
-typedef struct adj_db_count_ctx_t_ {
- u64 count;
-} adj_db_count_ctx_t;
-
-static void
-adj_db_count (BVT(clib_bihash_kv) * kvp,
- void *arg)
-{
- adj_db_count_ctx_t * ctx = arg;
- ctx->count++;
-}
-
u32
adj_nbr_db_size (void)
{
- adj_db_count_ctx_t ctx = {
- .count = 0,
- };
fib_protocol_t proto;
u32 sw_if_index = 0;
+ u64 count = 0;
for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
{
{
if (NULL != adj_nbr_tables[proto][sw_if_index])
{
- BV(clib_bihash_foreach_key_value_pair) (
- adj_nbr_tables[proto][sw_if_index],
- adj_db_count,
- &ctx);
+ count += hash_elts(adj_nbr_tables[proto][sw_if_index]);
}
}
}
- return (ctx.count);
+ return (count);
}
/**
- * @brief Context for a walk of the adjacency neighbour DB
+ * @brief Walk all adjacencies on a link for a given next-hop protocol
*/
-typedef struct adj_walk_ctx_t_
-{
- adj_walk_cb_t awc_cb;
- void *awc_ctx;
-} adj_walk_ctx_t;
-
-static void
-adj_nbr_walk_cb (BVT(clib_bihash_kv) * kvp,
- void *arg)
-{
- adj_walk_ctx_t *ctx = arg;
-
- // FIXME: can't stop early...
- ctx->awc_cb(kvp->value, ctx->awc_ctx);
-}
-
void
adj_nbr_walk (u32 sw_if_index,
fib_protocol_t adj_nh_proto,
adj_walk_cb_t cb,
void *ctx)
{
+ adj_index_t ai, *ais, *aip;
+ adj_nbr_key_t *key;
+
+ ADJ_NBR_ASSERT_NH_PROTO (adj_nh_proto,);
+
if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
return;
- adj_walk_ctx_t awc = {
- .awc_ctx = ctx,
- .awc_cb = cb,
- };
+ ais = NULL;
+
+ /* elements may be removed from the table during the walk, so
+ * collect the set first then process them */
+ hash_foreach_mem (key, ai, adj_nbr_tables[adj_nh_proto][sw_if_index],
+ ({
+ vec_add1(ais, ai);
+ }));
- BV(clib_bihash_foreach_key_value_pair) (
- adj_nbr_tables[adj_nh_proto][sw_if_index],
- adj_nbr_walk_cb,
- &awc);
+ vec_foreach(aip, ais)
+ {
+ /* An adj may be deleted during the walk so check first */
+ if (!pool_is_free_index(adj_pool, *aip))
+ cb(*aip, ctx);
+ }
+ vec_free(ais);
}
/**
adj_walk_cb_t cb,
void *ctx)
{
+ ADJ_NBR_ASSERT_NH_PROTO (adj_nh_proto,);
+
if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
return;
- vnet_link_t linkt;
- adj_index_t ai;
-
- FOR_EACH_VNET_LINK(linkt)
+ switch (adj_nh_proto)
{
- ai = adj_nbr_find (FIB_PROTOCOL_IP4, linkt, nh, sw_if_index);
-
- if (INDEX_INVALID != ai)
- cb(ai, ctx);
+ case FIB_PROTOCOL_IP4:
+ adj_nbr_walk_nh4(sw_if_index, &nh->ip4, cb, ctx);
+ break;
+ case FIB_PROTOCOL_IP6:
+ adj_nbr_walk_nh6(sw_if_index, &nh->ip6, cb, ctx);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ ASSERT(0);
+ break;
}
}
};
ip_adjacency_t *adj;
+ adj_lock(ai);
+
adj = adj_get(ai);
adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
+ adj_unlock(ai);
return (ADJ_WALK_RC_CONTINUE);
}
vlib_cli_command_t * cmd)
{
adj_index_t ai = ADJ_INDEX_INVALID;
+ ip46_address_t nh = ip46_address_initializer;
u32 sw_if_index = ~0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "%d", &ai))
+ if (unformat (input, "%U",
+ unformat_vnet_sw_interface, vnet_get_main(),
+ &sw_if_index))
;
else if (unformat (input, "%U",
- unformat_vnet_sw_interface, vnet_get_main(),
- &sw_if_index))
+ unformat_ip46_address, &nh, IP46_TYPE_ANY))
+ ;
+ else if (unformat (input, "%d", &ai))
;
else
break;
{
fib_protocol_t proto;
- for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
- {
- adj_nbr_walk(sw_if_index, proto,
- adj_nbr_show_one,
- vm);
- }
+ if (ip46_address_is_zero(&nh))
+ {
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_show_one,
+ vm);
+ }
+ }
+ else
+ {
+ proto = (ip46_address_is_ip4(&nh) ?
+ FIB_PROTOCOL_IP4 :
+ FIB_PROTOCOL_IP6);
+ adj_nbr_walk_nh(sw_if_index, proto, &nh,
+ adj_nbr_show_one,
+ vm);
+ }
}
else
{