2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/adj/adj_nbr.h>
17 #include <vnet/adj/adj_internal.h>
18 #include <vnet/ethernet/arp_packet.h>
19 #include <vnet/fib/fib_walk.h>
22 * Vector Hash tables of neighbour (traditional) adjacencies
23 * Key: interface(for the vector index), address (and its proto),
24 * link-type/ether-type.
26 static BVT(clib_bihash) **adj_nbr_tables[FIB_PROTOCOL_MAX];
28 // FIXME SIZE APPROPRIATELY. ASK DAVEB.
29 #define ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (64 * 64)
30 #define ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (32<<20)
33 #define ADJ_NBR_SET_KEY(_key, _lt, _nh) \
35 _key.key[0] = (_nh)->as_u64[0]; \
36 _key.key[1] = (_nh)->as_u64[1]; \
37 _key.key[2] = (_lt); \
40 #define ADJ_NBR_ITF_OK(_proto, _itf) \
41 (((_itf) < vec_len(adj_nbr_tables[_proto])) && \
42 (NULL != adj_nbr_tables[_proto][sw_if_index]))
45 adj_nbr_insert (fib_protocol_t nh_proto,
47 const ip46_address_t *nh_addr,
49 adj_index_t adj_index)
51 BVT(clib_bihash_kv) kv;
53 if (sw_if_index >= vec_len(adj_nbr_tables[nh_proto]))
55 vec_validate(adj_nbr_tables[nh_proto], sw_if_index);
57 if (NULL == adj_nbr_tables[nh_proto][sw_if_index])
59 adj_nbr_tables[nh_proto][sw_if_index] =
60 clib_mem_alloc_aligned(sizeof(BVT(clib_bihash)),
61 CLIB_CACHE_LINE_BYTES);
62 memset(adj_nbr_tables[nh_proto][sw_if_index],
64 sizeof(BVT(clib_bihash)));
66 BV(clib_bihash_init) (adj_nbr_tables[nh_proto][sw_if_index],
67 "Adjacency Neighbour table",
68 ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
69 ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);
72 ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
75 BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 1);
79 adj_nbr_remove (fib_protocol_t nh_proto,
81 const ip46_address_t *nh_addr,
84 BVT(clib_bihash_kv) kv;
86 if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
89 ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
91 BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 0);
95 adj_nbr_find (fib_protocol_t nh_proto,
97 const ip46_address_t *nh_addr,
100 BVT(clib_bihash_kv) kv;
102 ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
104 if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
105 return (ADJ_INDEX_INVALID);
107 if (BV(clib_bihash_search)(adj_nbr_tables[nh_proto][sw_if_index],
110 return (ADJ_INDEX_INVALID);
118 static inline vlib_node_registration_t*
119 adj_get_nd_node (fib_protocol_t proto)
122 case FIB_PROTOCOL_IP4:
123 return (&ip4_arp_node);
124 case FIB_PROTOCOL_IP6:
125 return (&ip6_discover_neighbor_node);
126 case FIB_PROTOCOL_MPLS:
134 adj_ip4_nbr_probe (ip_adjacency_t *adj)
136 vnet_main_t * vnm = vnet_get_main();
137 ip4_main_t * im = &ip4_main;
138 ip_interface_address_t * ia;
139 ethernet_arp_header_t * h;
140 vnet_hw_interface_t * hi;
141 vnet_sw_interface_t * si;
147 vm = vlib_get_main();
149 si = vnet_get_sw_interface (vnm,
150 adj->rewrite_header.sw_if_index);
152 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
158 ip4_interface_address_matching_destination(im,
159 &adj->sub_type.nbr.next_hop.ip4,
160 adj->rewrite_header.sw_if_index,
167 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
169 hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
171 clib_memcpy (h->ip4_over_ethernet[0].ethernet,
173 sizeof (h->ip4_over_ethernet[0].ethernet));
175 h->ip4_over_ethernet[0].ip4 = src[0];
176 h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
178 b = vlib_get_buffer (vm, bi);
179 vnet_buffer (b)->sw_if_index[VLIB_RX] =
180 vnet_buffer (b)->sw_if_index[VLIB_TX] =
181 adj->rewrite_header.sw_if_index;
183 /* Add encapsulation string for software interface (e.g. ethernet header). */
184 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
185 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
188 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
189 u32 * to_next = vlib_frame_vector_args (f);
192 vlib_put_frame_to_node (vm, hi->output_node_index, f);
197 adj_ip6_nbr_probe (ip_adjacency_t *adj)
199 icmp6_neighbor_solicitation_header_t * h;
200 vnet_main_t * vnm = vnet_get_main();
201 ip6_main_t * im = &ip6_main;
202 ip_interface_address_t * ia;
203 ip6_address_t * dst, *src;
204 vnet_hw_interface_t * hi;
205 vnet_sw_interface_t * si;
211 vm = vlib_get_main();
213 si = vnet_get_sw_interface(vnm, adj->rewrite_header.sw_if_index);
214 dst = &adj->sub_type.nbr.next_hop.ip6;
216 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
220 src = ip6_interface_address_matching_destination(im, dst,
221 adj->rewrite_header.sw_if_index,
228 h = vlib_packet_template_get_packet(vm,
229 &im->discover_neighbor_packet_template,
232 hi = vnet_get_sup_hw_interface(vnm, adj->rewrite_header.sw_if_index);
234 h->ip.dst_address.as_u8[13] = dst->as_u8[13];
235 h->ip.dst_address.as_u8[14] = dst->as_u8[14];
236 h->ip.dst_address.as_u8[15] = dst->as_u8[15];
237 h->ip.src_address = src[0];
238 h->neighbor.target_address = dst[0];
240 clib_memcpy (h->link_layer_option.ethernet_address,
242 vec_len(hi->hw_address));
244 h->neighbor.icmp.checksum =
245 ip6_tcp_udp_icmp_compute_checksum(vm, 0, &h->ip, &bogus_length);
246 ASSERT(bogus_length == 0);
248 b = vlib_get_buffer (vm, bi);
249 vnet_buffer (b)->sw_if_index[VLIB_RX] =
250 vnet_buffer (b)->sw_if_index[VLIB_TX] =
251 adj->rewrite_header.sw_if_index;
253 /* Add encapsulation string for software interface (e.g. ethernet header). */
254 vnet_rewrite_one_header(adj[0], h, sizeof (ethernet_header_t));
255 vlib_buffer_advance(b, -adj->rewrite_header.data_bytes);
258 vlib_frame_t * f = vlib_get_frame_to_node(vm, hi->output_node_index);
259 u32 * to_next = vlib_frame_vector_args(f);
262 vlib_put_frame_to_node(vm, hi->output_node_index, f);
266 static ip_adjacency_t*
267 adj_nbr_alloc (fib_protocol_t nh_proto,
268 fib_link_t link_type,
269 const ip46_address_t *nh_addr,
274 adj = adj_alloc(nh_proto);
276 adj_nbr_insert(nh_proto, link_type, nh_addr,
281 * since we just added the ADJ we have no rewrite string for it,
284 adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
285 adj->sub_type.nbr.next_hop = *nh_addr;
286 adj->ia_link = link_type;
287 adj->ia_nh_proto = nh_proto;
288 memset(&adj->sub_type.midchain.next_dpo, 0,
289 sizeof(adj->sub_type.midchain.next_dpo));
297 * Add an adjacency for the neighbour requested.
299 * The key for an adj is:
300 * - the Next-hops protocol (i.e. v4 or v6)
301 * - the address of the next-hop
302 * - the interface the next-hop is reachable through
303 * - fib_index; this is broken. i will fix it.
304 * the adj lookup currently occurs in the FIB.
307 adj_nbr_add_or_lock (fib_protocol_t nh_proto,
308 fib_link_t link_type,
309 const ip46_address_t *nh_addr,
312 adj_index_t adj_index;
315 adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
317 if (ADJ_INDEX_INVALID == adj_index)
319 adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
322 * If there is no next-hop, this is the 'auto-adj' used on p2p
323 * links instead of a glean.
325 if (ip46_address_is_zero(nh_addr))
327 adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
329 vnet_rewrite_for_sw_interface(vnet_get_main(),
330 adj_fib_link_2_vnet(link_type),
332 adj_get_rewrite_node(link_type)->index,
333 VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
334 &adj->rewrite_header,
335 sizeof (adj->rewrite_data));
339 vnet_rewrite_for_sw_interface(vnet_get_main(),
340 adj_fib_proto_2_nd(nh_proto),
342 adj_get_nd_node(nh_proto)->index,
343 VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
344 &adj->rewrite_header,
345 sizeof (adj->rewrite_data));
349 case FIB_PROTOCOL_IP4:
350 adj_ip4_nbr_probe(adj);
352 case FIB_PROTOCOL_IP6:
353 adj_ip6_nbr_probe(adj);
355 case FIB_PROTOCOL_MPLS:
362 adj = adj_get(adj_index);
365 adj_lock(adj->heap_handle);
367 return (adj->heap_handle);
371 adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
372 fib_link_t link_type,
373 const ip46_address_t *nh_addr,
377 adj_index_t adj_index;
380 adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
382 if (ADJ_INDEX_INVALID == adj_index)
384 adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
385 adj->rewrite_header.sw_if_index = sw_if_index;
389 adj = adj_get(adj_index);
392 adj_lock(adj->heap_handle);
393 adj_nbr_update_rewrite(adj->heap_handle, rewrite);
395 return (adj->heap_handle);
399 * adj_nbr_update_rewrite
401 * Update the adjacency's rewrite string. A NULL string implies the
402 * rewirte is reset (i.e. when ARP/ND etnry is gone).
403 * NB: the adj being updated may be handling traffic in the DP.
406 adj_nbr_update_rewrite (adj_index_t adj_index,
411 ASSERT(ADJ_INDEX_INVALID != adj_index);
413 adj = adj_get(adj_index);
418 * new rewrite provided.
419 * use a dummy rewrite header to get the interface to print into.
421 ip_adjacency_t dummy;
423 vnet_rewrite_for_sw_interface(vnet_get_main(),
424 adj_fib_link_2_vnet(adj->ia_link),
425 adj->rewrite_header.sw_if_index,
426 adj_get_rewrite_node(adj->ia_link)->index,
428 &dummy.rewrite_header,
429 sizeof (dummy.rewrite_data));
431 if (IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index)
434 * this is an update of an existing rewrite.
435 * we can't just paste in the new rewrite as that is not atomic.
436 * So we briefly swap the ADJ to ARP type, paste, then swap back.
438 adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
439 CLIB_MEMORY_BARRIER();
443 * this is the first time the rewrite is added.
444 * paste it on then swap the next type.
446 clib_memcpy(&adj->rewrite_header,
447 &dummy.rewrite_header,
448 VLIB_BUFFER_PRE_DATA_SIZE);
450 adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
457 adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
458 CLIB_MEMORY_BARRIER();
460 adj->rewrite_header.data_bytes = 0;
464 * time for walkies fido.
465 * The link type MPLS Adj never has children. So if it is this adj
466 * that is updated, we need to walk from its IP sibling.
468 if (FIB_LINK_MPLS == adj->ia_link)
470 adj_index = adj_nbr_find(adj->ia_nh_proto,
471 fib_proto_to_link(adj->ia_nh_proto),
472 &adj->sub_type.nbr.next_hop,
473 adj->rewrite_header.sw_if_index);
475 ASSERT(ADJ_INDEX_INVALID != adj_index);
478 fib_node_back_walk_ctx_t bw_ctx = {
479 .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
481 * This walk only needs to go back one level, but there is no control here.
482 * the first receiving fib_entry_t will quash the walk
486 fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_index, &bw_ctx);
489 typedef struct adj_db_count_ctx_t_ {
491 } adj_db_count_ctx_t;
494 adj_db_count (BVT(clib_bihash_kv) * kvp,
497 adj_db_count_ctx_t * ctx = arg;
502 adj_nbr_db_size (void)
504 adj_db_count_ctx_t ctx = {
507 fib_protocol_t proto;
510 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
512 vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
514 if (NULL != adj_nbr_tables[proto][sw_if_index])
516 BV(clib_bihash_foreach_key_value_pair) (
517 adj_nbr_tables[proto][sw_if_index],
527 * Context for the state change walk of the DB
529 typedef struct adj_nbr_interface_state_change_ctx_t_
532 * Flags passed from the vnet notifiy function
535 } adj_nbr_interface_state_change_ctx_t;
538 adj_nbr_interface_state_change_one (BVT(clib_bihash_kv) * kvp,
542 * Back walk the graph to inform the forwarding entries
543 * that this interface state has changed.
545 adj_nbr_interface_state_change_ctx_t *ctx = arg;
547 fib_node_back_walk_ctx_t bw_ctx = {
548 .fnbw_reason = (ctx->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
549 FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
550 FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
553 fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx);
556 static clib_error_t *
557 adj_nbr_interface_state_change (vnet_main_t * vnm,
561 fib_protocol_t proto;
564 * walk each adj on the interface and trigger a walk from that adj
566 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
568 if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
571 adj_nbr_interface_state_change_ctx_t ctx = {
575 BV(clib_bihash_foreach_key_value_pair) (
576 adj_nbr_tables[proto][sw_if_index],
577 adj_nbr_interface_state_change_one,
584 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_nbr_interface_state_change);
587 adj_nbr_interface_delete_one (BVT(clib_bihash_kv) * kvp,
591 * Back walk the graph to inform the forwarding entries
592 * that this interface has been deleted.
594 fib_node_back_walk_ctx_t bw_ctx = {
595 .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
598 fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx);
602 * adj_nbr_interface_add_del
604 * Registered to receive interface Add and delete notifications
606 static clib_error_t *
607 adj_nbr_interface_add_del (vnet_main_t * vnm,
611 fib_protocol_t proto;
616 * not interested in interface additions. we will not back walk
617 * to resolve paths through newly added interfaces. Why? The control
618 * plane should have the brains to add interfaces first, then routes.
619 * So the case where there are paths with a interface that matches
620 * one just created is the case where the path resolved through an
621 * interface that was deleted, and still has not been removed. The
622 * new interface added, is NO GUARANTEE that the interface being
623 * added now, even though it may have the same sw_if_index, is the
624 * same interface that the path needs. So tough!
625 * If the control plane wants these routes to resolve it needs to
626 * remove and add them again.
631 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
633 if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
636 BV(clib_bihash_foreach_key_value_pair) (
637 adj_nbr_tables[proto][sw_if_index],
638 adj_nbr_interface_delete_one,
646 VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
650 adj_nbr_show_one (BVT(clib_bihash_kv) * kvp,
653 vlib_cli_output (arg, "[@%d] %U",
656 vnet_get_main(), kvp->value,
657 FORMAT_IP_ADJACENCY_NONE);
660 static clib_error_t *
661 adj_nbr_show (vlib_main_t * vm,
662 unformat_input_t * input,
663 vlib_cli_command_t * cmd)
665 adj_index_t ai = ADJ_INDEX_INVALID;
667 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
669 if (unformat (input, "%d", &ai))
675 if (ADJ_INDEX_INVALID != ai)
677 vlib_cli_output (vm, "[@%d] %U",
682 FORMAT_IP_ADJACENCY_DETAIL);
686 fib_protocol_t proto;
688 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
692 vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
694 if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
697 BV(clib_bihash_foreach_key_value_pair) (
698 adj_nbr_tables[proto][sw_if_index],
708 VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
709 .path = "show adj nbr",
710 .short_help = "show adj nbr [<adj_index>] [sw_if_index <index>]",
711 .function = adj_nbr_show,
715 format_adj_nbr_incomplete (u8* s, va_list *ap)
717 index_t index = va_arg(ap, index_t);
718 CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
719 vnet_main_t * vnm = vnet_get_main();
720 ip_adjacency_t * adj = adj_get(index);
722 s = format (s, "arp-%U", format_fib_link, adj->ia_link);
723 s = format (s, ": via %U",
724 format_ip46_address, &adj->sub_type.nbr.next_hop);
725 s = format (s, " %U",
726 format_vnet_sw_interface_name,
728 vnet_get_sw_interface(vnm,
729 adj->rewrite_header.sw_if_index));
735 format_adj_nbr (u8* s, va_list *ap)
737 index_t index = va_arg(ap, index_t);
738 CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
739 vnet_main_t * vnm = vnet_get_main();
740 ip_adjacency_t * adj = adj_get(index);
742 s = format (s, "%U", format_fib_link, adj->ia_link);
743 s = format (s, " via %U ",
744 format_ip46_address, &adj->sub_type.nbr.next_hop);
747 vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
753 adj_dpo_lock (dpo_id_t *dpo)
755 adj_lock(dpo->dpoi_index);
758 adj_dpo_unlock (dpo_id_t *dpo)
760 adj_unlock(dpo->dpoi_index);
763 const static dpo_vft_t adj_nbr_dpo_vft = {
764 .dv_lock = adj_dpo_lock,
765 .dv_unlock = adj_dpo_unlock,
766 .dv_format = format_adj_nbr,
768 const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
769 .dv_lock = adj_dpo_lock,
770 .dv_unlock = adj_dpo_unlock,
771 .dv_format = format_adj_nbr_incomplete,
775 * @brief The per-protocol VLIB graph nodes that are assigned to an adjacency
778 * this means that these graph nodes are ones from which a nbr is the
779 * parent object in the DPO-graph.
781 const static char* const nbr_ip4_nodes[] =
783 "ip4-rewrite-transit",
786 const static char* const nbr_ip6_nodes[] =
791 const static char* const nbr_mpls_nodes[] =
796 const static char* const * const nbr_nodes[DPO_PROTO_NUM] =
798 [DPO_PROTO_IP4] = nbr_ip4_nodes,
799 [DPO_PROTO_IP6] = nbr_ip6_nodes,
800 [DPO_PROTO_MPLS] = nbr_mpls_nodes,
803 const static char* const nbr_incomplete_ip4_nodes[] =
808 const static char* const nbr_incomplete_ip6_nodes[] =
810 "ip6-discover-neighbor",
813 const static char* const nbr_incomplete_mpls_nodes[] =
815 "mpls-adj-incomplete",
819 const static char* const * const nbr_incomplete_nodes[DPO_PROTO_NUM] =
821 [DPO_PROTO_IP4] = nbr_incomplete_ip4_nodes,
822 [DPO_PROTO_IP6] = nbr_incomplete_ip6_nodes,
823 [DPO_PROTO_MPLS] = nbr_incomplete_mpls_nodes,
827 adj_nbr_module_init (void)
829 dpo_register(DPO_ADJACENCY,
832 dpo_register(DPO_ADJACENCY_INCOMPLETE,
833 &adj_nbr_incompl_dpo_vft,
834 nbr_incomplete_nodes);