2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <sys/socket.h>
19 //#include <vlib/vlib.h>
20 #include <vlib/unix/plugin.h>
21 #include <linux-cp/lcp_nl.h>
22 #include <linux-cp/lcp_interface.h>
24 #include <netlink/msg.h>
25 #include <netlink/netlink.h>
26 #include <netlink/socket.h>
27 #include <netlink/route/link.h>
28 #include <netlink/route/route.h>
29 #include <netlink/route/neighbour.h>
30 #include <netlink/route/addr.h>
31 #include <netlink/route/link/vlan.h>
33 #include <vnet/fib/fib_table.h>
34 #include <vnet/mfib/mfib_table.h>
35 #include <vnet/ip/ip6_ll_table.h>
36 #include <vnet/ip-neighbor/ip_neighbor.h>
37 #include <vnet/ip/ip6_link.h>
39 typedef struct lcp_router_table_t_
42 fib_protocol_t nlt_proto;
48 static uword *lcp_router_table_db[FIB_PROTOCOL_MAX];
49 static lcp_router_table_t *lcp_router_table_pool;
50 static vlib_log_class_t lcp_router_logger;
52 const static fib_prefix_t pfx_all1s = {
58 .fp_proto = FIB_PROTOCOL_IP4,
62 static fib_source_t lcp_rt_fib_src;
63 static fib_source_t lcp_rt_fib_src_dynamic;
65 #define LCP_ROUTER_DBG(...) vlib_log_debug (lcp_router_logger, __VA_ARGS__);
67 #define LCP_ROUTER_INFO(...) vlib_log_notice (lcp_router_logger, __VA_ARGS__);
69 #define LCP_ROUTER_ERROR(...) vlib_log_err (lcp_router_logger, __VA_ARGS__);
71 static const mfib_prefix_t ip4_specials[] = {
72 /* ALL prefixes are in network order */
74 /* (*,224.0.0.0)/24 - all local subnet */
76 .ip4.data_u32 = 0x000000e0,
79 .fp_proto = FIB_PROTOCOL_IP4,
83 static const mfib_prefix_t ip6_specials[] = {
84 /* ALL prefixes are in network order */
86 /* (*,ff00::)/8 - all local subnet */
88 .ip6.as_u64[0] = 0x00000000000000ff,
91 .fp_proto = FIB_PROTOCOL_IP6,
95 /* VIF to PHY DB of managed interfaces */
96 static uword *lcp_routing_itf_db;
99 lcp_router_intf_h2p (u32 host)
106 * first check the linux side created interface (i.e. vlans, tunnels etc)
108 p = hash_get (lcp_routing_itf_db, host);
114 * then check the paired phys
116 lipi = lcp_itf_pair_find_by_vif (host);
118 if (INDEX_INVALID == lipi)
121 lip = lcp_itf_pair_get (lipi);
123 return lip->lip_phy_sw_if_index;
127 * Check timestamps on netlink message and interface pair to decide whether
128 * the message should be applied. See the declaration of nl_msg_info_t for
129 * an explanation on why this is necessary.
130 * If timestamps are good (message ts is newer than intf pair ts), return 0.
134 lcp_router_lip_ts_check (nl_msg_info_t *msg_info, lcp_itf_pair_t *lip)
139 if (msg_info->ts > lip->lip_create_ts)
142 LCP_ROUTER_INFO ("Early message received for %U",
143 format_vnet_sw_if_index_name, vnet_get_main (),
144 lip->lip_phy_sw_if_index);
149 lcp_router_link_del (struct rtnl_link *rl, void *ctx)
153 if (!lcp_auto_subint ())
156 lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
158 if (INDEX_INVALID != lipi)
162 lip = lcp_itf_pair_get (lipi);
164 if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
167 LCP_ROUTER_INFO ("delete link: %s - %U", rtnl_link_get_type (rl),
168 format_vnet_sw_if_index_name, vnet_get_main (),
169 lip->lip_phy_sw_if_index);
170 lcp_itf_pair_delete (lip->lip_phy_sw_if_index);
172 if (rtnl_link_is_vlan (rl))
174 LCP_ROUTER_INFO ("delete vlan: %s -> %U", rtnl_link_get_name (rl),
175 format_vnet_sw_if_index_name, vnet_get_main (),
176 lip->lip_phy_sw_if_index);
177 vnet_delete_sub_interface (lip->lip_phy_sw_if_index);
178 vnet_delete_sub_interface (lip->lip_host_sw_if_index);
182 LCP_ROUTER_INFO ("ignore link del: %s - %s", rtnl_link_get_type (rl),
183 rtnl_link_get_name (rl));
187 lcp_router_ip4_mroutes_add_del (u32 sw_if_index, u8 is_add)
189 const fib_route_path_t path = {
190 .frp_proto = DPO_PROTO_IP4,
191 .frp_addr = zero_addr,
192 .frp_sw_if_index = sw_if_index,
195 .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
201 mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
203 for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
207 mfib_table_entry_path_update (mfib_index, &ip4_specials[ii],
208 MFIB_SOURCE_PLUGIN_LOW,
209 MFIB_ENTRY_FLAG_NONE, &path);
213 mfib_table_entry_path_remove (mfib_index, &ip4_specials[ii],
214 MFIB_SOURCE_PLUGIN_LOW, &path);
220 lcp_router_ip6_mroutes_add_del (u32 sw_if_index, u8 is_add)
222 const fib_route_path_t path = {
223 .frp_proto = DPO_PROTO_IP6,
224 .frp_addr = zero_addr,
225 .frp_sw_if_index = sw_if_index,
228 .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
234 mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
236 for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
240 mfib_table_entry_path_update (mfib_index, &ip6_specials[ii],
241 MFIB_SOURCE_PLUGIN_LOW,
242 MFIB_ENTRY_FLAG_NONE, &path);
246 mfib_table_entry_path_remove (mfib_index, &ip6_specials[ii],
247 MFIB_SOURCE_PLUGIN_LOW, &path);
253 lcp_router_link_mtu (struct rtnl_link *rl, u32 sw_if_index)
255 vnet_main_t *vnm = vnet_get_main ();
257 vnet_sw_interface_t *sw;
258 vnet_hw_interface_t *hw;
260 mtu = rtnl_link_get_mtu (rl);
264 sw = vnet_get_sw_interface (vnm, sw_if_index);
265 hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
267 /* If HW interface, try to change hw link */
268 if ((sw->sw_if_index == sw->sup_sw_if_index) &&
269 (hw->hw_class_index == ethernet_hw_interface_class.index))
270 vnet_hw_interface_set_mtu (vnm, hw->hw_if_index, mtu);
272 vnet_sw_interface_set_mtu (vnm, sw->sw_if_index, mtu);
276 lcp_router_link_addr_adj_upd_cb (vnet_main_t *vnm, u32 sw_if_index, void *arg)
280 lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
283 return WALK_CONTINUE;
286 vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
287 lip->lip_phy_adjs.adj_index[AF_IP4]);
288 vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
289 lip->lip_phy_adjs.adj_index[AF_IP6]);
291 return WALK_CONTINUE;
295 lcp_router_link_addr (struct rtnl_link *rl, lcp_itf_pair_t *lip)
297 vnet_main_t *vnm = vnet_get_main ();
298 struct nl_addr *mac_addr;
299 vnet_sw_interface_t *sw;
300 vnet_hw_interface_t *hw;
301 void *mac_addr_bytes;
303 mac_addr = rtnl_link_get_addr (rl);
304 if (!mac_addr || (nl_addr_get_family (mac_addr) != AF_LLC))
307 sw = vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index);
309 /* can only change address on hw interface */
310 if (sw->sw_if_index != sw->sup_sw_if_index)
313 hw = vnet_get_sup_hw_interface (vnm, lip->lip_phy_sw_if_index);
314 if (!vec_len (hw->hw_address))
317 mac_addr_bytes = nl_addr_get_binary_addr (mac_addr);
318 if (clib_memcmp (mac_addr_bytes, hw->hw_address, nl_addr_get_len (mac_addr)))
319 vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
322 /* mcast adjacencies need to be updated */
323 vnet_hw_interface_walk_sw (vnm, hw->hw_if_index,
324 lcp_router_link_addr_adj_upd_cb, NULL);
327 static void lcp_router_table_flush (lcp_router_table_t *nlt,
328 u32 *sw_if_index_to_bool,
329 fib_source_t source);
332 lcp_router_link_add (struct rtnl_link *rl, void *ctx)
336 vnet_main_t *vnm = vnet_get_main ();
338 lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
339 up = IFF_UP & rtnl_link_get_flags (rl);
341 if (INDEX_INVALID != lipi)
347 lip = lcp_itf_pair_get (lipi);
348 if (!vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index))
351 if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
355 vnet_sw_interface_get_flags (vnm, lip->lip_phy_sw_if_index);
356 sw_if_up = (sw_if_flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
360 vnet_sw_interface_admin_up (vnet_get_main (),
361 lip->lip_phy_sw_if_index);
363 else if (sw_if_up && !up)
365 vnet_sw_interface_admin_down (vnet_get_main (),
366 lip->lip_phy_sw_if_index);
368 /* When an interface is brought down administratively, the kernel
369 * removes routes which resolve through that interface. For IPv4
370 * routes, the kernel will not send any explicit RTM_DELROUTE
371 * messages about removing them. In order to synchronize with the
372 * kernel, affected IPv4 routes need to be manually removed from the
373 * FIB. The behavior is different for IPv6 routes. Explicit
374 * RTM_DELROUTE messages are sent about IPv6 routes being removed.
377 lcp_router_table_t *nlt;
379 fib_index = fib_table_get_index_for_sw_if_index (
380 FIB_PROTOCOL_IP4, lip->lip_phy_sw_if_index);
382 pool_foreach (nlt, lcp_router_table_pool)
384 if (fib_index == nlt->nlt_fib_index &&
385 FIB_PROTOCOL_IP4 == nlt->nlt_proto)
387 u32 *sw_if_index_to_bool = NULL;
389 vec_validate_init_empty (sw_if_index_to_bool,
390 lip->lip_phy_sw_if_index, false);
391 sw_if_index_to_bool[lip->lip_phy_sw_if_index] = true;
393 lcp_router_table_flush (nlt, sw_if_index_to_bool,
395 lcp_router_table_flush (nlt, sw_if_index_to_bool,
396 lcp_rt_fib_src_dynamic);
398 vec_free (sw_if_index_to_bool);
404 LCP_ROUTER_DBG ("link: %s (%d) -> %U/%U %s", rtnl_link_get_name (rl),
405 rtnl_link_get_ifindex (rl), format_vnet_sw_if_index_name,
406 vnm, lip->lip_phy_sw_if_index,
407 format_vnet_sw_if_index_name, vnm,
408 lip->lip_host_sw_if_index, (up ? "up" : "down"));
410 lcp_router_link_mtu (rl, lip->lip_phy_sw_if_index);
411 lcp_router_link_addr (rl, lip);
413 else if (lcp_auto_subint () && rtnl_link_is_vlan (rl))
415 /* Find the pair based on the parent VIF */
416 lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_link (rl));
418 if (INDEX_INVALID != lipi)
420 u32 sub_phy_sw_if_index, sub_host_sw_if_index;
421 const lcp_itf_pair_t *lip;
423 u8 *ns = 0; /* FIXME */
425 lip = lcp_itf_pair_get (lipi);
427 vlan = rtnl_link_vlan_get_id (rl);
429 /* create the vlan interface on the parent phy */
430 if (vnet_create_sub_interface (lip->lip_phy_sw_if_index, vlan, 18, 0,
431 vlan, &sub_phy_sw_if_index))
433 LCP_ROUTER_INFO ("failed create phy vlan: %s on %U",
434 rtnl_link_get_name (rl),
435 format_vnet_sw_if_index_name, vnet_get_main (),
436 lip->lip_phy_sw_if_index);
440 /* pool could grow during the previous operation */
441 lip = lcp_itf_pair_get (lipi);
443 /* create the vlan interface on the parent host */
444 if (vnet_create_sub_interface (lip->lip_host_sw_if_index, vlan, 18,
445 0, vlan, &sub_host_sw_if_index))
447 LCP_ROUTER_INFO ("failed create vlan: %s on %U",
448 rtnl_link_get_name (rl),
449 format_vnet_sw_if_index_name, vnet_get_main (),
450 lip->lip_host_sw_if_index);
458 "create vlan: %s -> (%U, %U) : (%U, %U)", rtnl_link_get_name (rl),
459 format_vnet_sw_if_index_name, vnet_get_main (),
460 lip->lip_phy_sw_if_index, format_vnet_sw_if_index_name,
461 vnet_get_main (), sub_phy_sw_if_index,
462 format_vnet_sw_if_index_name, vnet_get_main (),
463 lip->lip_host_sw_if_index, format_vnet_sw_if_index_name,
464 vnet_get_main (), sub_host_sw_if_index);
466 if ((if_name = rtnl_link_get_name (rl)) != NULL)
467 vec_validate_init_c_string (if_namev, if_name,
468 strnlen (if_name, IFNAMSIZ));
469 lcp_itf_pair_add (sub_host_sw_if_index, sub_phy_sw_if_index,
470 if_namev, rtnl_link_get_ifindex (rl),
471 lip->lip_host_type, ns);
473 vnet_sw_interface_admin_up (vnet_get_main (), sub_phy_sw_if_index);
474 vnet_sw_interface_admin_up (vnet_get_main (), sub_host_sw_if_index);
480 LCP_ROUTER_INFO ("ignore parent-link add: %s - %s",
481 rtnl_link_get_type (rl), rtnl_link_get_name (rl));
485 LCP_ROUTER_INFO ("ignore link add: %s - %s", rtnl_link_get_type (rl),
486 rtnl_link_get_name (rl));
490 lcp_router_link_sync_begin (void)
492 LCP_ROUTER_INFO ("Begin synchronization of interface configurations");
496 lcp_router_link_sync_end (void)
498 LCP_ROUTER_INFO ("End synchronization of interface configurations");
501 static clib_error_t *
502 lcp_router_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
504 vnet_hw_interface_t *hi;
507 hi = vnet_get_hw_interface_or_null (vnm, hw_if_index);
511 lipi = lcp_itf_pair_find_by_phy (hi->sw_if_index);
512 if (lipi == INDEX_INVALID)
515 /* When the link goes down on an interface, the kernel processes routes which
516 * resolve through that interface depending on how they were created:
517 * - Legacy Route API: the kernel retains the routes and marks them as
519 * - Nexthop API: the kernel removes the next-hop objects and the routes
520 * which reference them.
522 * For IPv4 routes created with Nexthop API, the kernel will not send any
523 * explicit RTM_DELROUTE messages about removing them. In order to
524 * synchronize with the kernel, affected routes need to be manually removed
527 * The behavior is different for IPv6 routes created with Nexthop API. The
528 * kernel will send explicit RTM_DELROUTE messages about IPv6 routes being
531 if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP) &&
532 (lcp_get_del_static_on_link_down () ||
533 lcp_get_del_dynamic_on_link_down ()))
536 u32 **fib_index_to_sw_if_index_to_bool = NULL;
538 lcp_router_table_t *nlt;
540 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
543 vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
545 vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
546 hi->sw_if_index, false);
547 fib_index_to_sw_if_index_to_bool[fib_index][hi->sw_if_index] = true;
549 /* clang-format off */
550 hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
552 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
554 vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
556 vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
558 fib_index_to_sw_if_index_to_bool[fib_index][sw_if_index] = true;
560 /* clang-format on */
562 vec_foreach_index (fib_index, fib_index_to_sw_if_index_to_bool)
564 u32 *sw_if_index_to_bool;
566 sw_if_index_to_bool = fib_index_to_sw_if_index_to_bool[fib_index];
567 if (NULL == sw_if_index_to_bool)
570 pool_foreach (nlt, lcp_router_table_pool)
572 if (fib_index == nlt->nlt_fib_index &&
573 FIB_PROTOCOL_IP4 == nlt->nlt_proto)
575 if (lcp_get_del_static_on_link_down ())
576 lcp_router_table_flush (nlt, sw_if_index_to_bool,
578 if (lcp_get_del_dynamic_on_link_down ())
579 lcp_router_table_flush (nlt, sw_if_index_to_bool,
580 lcp_rt_fib_src_dynamic);
585 vec_free (sw_if_index_to_bool);
588 vec_free (fib_index_to_sw_if_index_to_bool);
594 VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_router_link_up_down);
596 static fib_protocol_t
597 lcp_router_proto_k2f (uint32_t k)
600 return (FIB_PROTOCOL_IP6);
601 return (FIB_PROTOCOL_IP4);
605 lcp_router_mk_addr (const struct nl_addr *rna, ip_address_t *ia)
607 fib_protocol_t fproto;
609 ip_address_reset (ia);
610 fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
612 ip_address_set (ia, nl_addr_get_binary_addr (rna),
613 FIB_PROTOCOL_IP4 == fproto ? AF_IP4 : AF_IP6);
616 static fib_protocol_t
617 lcp_router_mk_addr46 (const struct nl_addr *rna, ip46_address_t *ia)
619 fib_protocol_t fproto;
621 fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
622 ip46_address_reset (ia);
623 if (FIB_PROTOCOL_IP4 == fproto)
624 memcpy (&ia->ip4, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
626 memcpy (&ia->ip6, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
632 lcp_router_link_addr_add_del (struct rtnl_addr *rla, int is_del)
636 sw_if_index = lcp_router_intf_h2p (rtnl_addr_get_ifindex (rla));
638 if (~0 != sw_if_index)
642 lcp_router_mk_addr (rtnl_addr_get_local (rla), &nh);
644 if (AF_IP4 == ip_addr_version (&nh))
646 ip4_add_del_interface_address (
647 vlib_get_main (), sw_if_index, &ip_addr_v4 (&nh),
648 rtnl_addr_get_prefixlen (rla), is_del);
649 lcp_router_ip4_mroutes_add_del (sw_if_index, !is_del);
651 else if (AF_IP6 == ip_addr_version (&nh))
653 if (ip6_address_is_link_local_unicast (&ip_addr_v6 (&nh)))
655 ip6_link_disable (sw_if_index);
658 ip6_link_enable (sw_if_index, NULL);
659 ip6_link_set_local_address (sw_if_index, &ip_addr_v6 (&nh));
662 ip6_add_del_interface_address (
663 vlib_get_main (), sw_if_index, &ip_addr_v6 (&nh),
664 rtnl_addr_get_prefixlen (rla), is_del);
665 lcp_router_ip6_mroutes_add_del (sw_if_index, !is_del);
668 LCP_ROUTER_DBG ("link-addr: %U %U/%d", format_vnet_sw_if_index_name,
669 vnet_get_main (), sw_if_index, format_ip_address, &nh,
670 rtnl_addr_get_prefixlen (rla));
675 lcp_router_link_addr_del (struct rtnl_addr *la)
677 lcp_router_link_addr_add_del (la, 1);
681 lcp_router_link_addr_add (struct rtnl_addr *la)
683 lcp_router_link_addr_add_del (la, 0);
687 lcp_router_address_mark (index_t index, void *ctx)
689 vnet_main_t *vnm = vnet_get_main ();
691 lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
693 return WALK_CONTINUE;
695 ip_interface_address_mark_one_interface (
696 vnm, vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index), 0);
698 return WALK_CONTINUE;
702 lcp_router_link_addr_sync_begin (void)
704 lcp_itf_pair_walk (lcp_router_address_mark, 0);
706 LCP_ROUTER_INFO ("Begin synchronization of interface addresses");
710 lcp_router_link_addr_sync_end (void)
712 ip_interface_address_sweep ();
714 LCP_ROUTER_INFO ("End synchronization of interface addresses");
718 lcp_router_mk_mac_addr (const struct nl_addr *rna, mac_address_t *mac)
720 mac_address_from_bytes (mac, nl_addr_get_binary_addr (rna));
724 lcp_router_neigh_del (struct rtnl_neigh *rn)
728 sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
730 if (~0 != sw_if_index)
736 if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
738 lcp_router_mk_addr (rna, &nh);
740 if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
742 LCP_ROUTER_DBG ("ignore neighbor del: %U %U", format_ip_address, &nh,
743 format_vnet_sw_if_index_name, vnet_get_main (),
748 rv = ip_neighbor_del (&nh, sw_if_index);
753 "Failed to delete neighbor: %U %U", format_ip_address, &nh,
754 format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
758 LCP_ROUTER_DBG ("neighbor del: %U %U", format_ip_address, &nh,
759 format_vnet_sw_if_index_name, vnet_get_main (),
764 LCP_ROUTER_INFO ("ignore neighbour del on: %d",
765 rtnl_neigh_get_ifindex (rn));
770 (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE | \
775 lcp_router_neigh_add (struct rtnl_neigh *rn)
779 sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
781 if (~0 != sw_if_index)
788 if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
790 lcp_router_mk_addr (rna, &nh);
792 if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
794 LCP_ROUTER_DBG ("ignore neighbor add: %U %U", format_ip_address, &nh,
795 format_vnet_sw_if_index_name, vnet_get_main (),
800 ll = rtnl_neigh_get_lladdr (rn);
801 state = rtnl_neigh_get_state (rn);
803 if (ll && (state & NUD_VALID))
806 ip_neighbor_flags_t flags;
809 lcp_router_mk_mac_addr (ll, &mac);
811 if (state & (NUD_NOARP | NUD_PERMANENT))
812 flags = IP_NEIGHBOR_FLAG_STATIC;
814 flags = IP_NEIGHBOR_FLAG_DYNAMIC;
816 rv = ip_neighbor_add (&nh, &mac, sw_if_index, flags, NULL);
821 "Failed to create neighbor: %U %U", format_ip_address, &nh,
822 format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
826 LCP_ROUTER_DBG ("neighbor add: %U %U", format_ip_address, &nh,
827 format_vnet_sw_if_index_name, vnet_get_main (),
833 lcp_router_neigh_del (rn);
836 LCP_ROUTER_INFO ("ignore neighbour add on: %d",
837 rtnl_neigh_get_ifindex (rn));
841 lcp_router_neighbor_mark (index_t index, void *ctx)
843 lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
845 return WALK_CONTINUE;
847 ip_neighbor_walk (AF_IP4, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
848 ip_neighbor_walk (AF_IP6, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
850 return WALK_CONTINUE;
854 lcp_router_neigh_sync_begin (void)
856 lcp_itf_pair_walk (lcp_router_neighbor_mark, 0);
858 LCP_ROUTER_INFO ("Begin synchronization of neighbors");
862 lcp_router_neigh_sync_end (void)
864 ip_neighbor_sweep (AF_IP4);
865 ip_neighbor_sweep (AF_IP6);
867 LCP_ROUTER_INFO ("End synchronization of neighbors");
870 static lcp_router_table_t *
871 lcp_router_table_find (uint32_t id, fib_protocol_t fproto)
875 p = hash_get (lcp_router_table_db[fproto], id);
878 return pool_elt_at_index (lcp_router_table_pool, p[0]);
884 lcp_router_table_k2f (uint32_t k)
886 // the kernel's table ID 255 is the default table
887 if (k == 255 || k == 254)
892 static lcp_router_table_t *
893 lcp_router_table_add_or_lock (uint32_t id, fib_protocol_t fproto)
895 lcp_router_table_t *nlt;
897 id = lcp_router_table_k2f (id);
898 nlt = lcp_router_table_find (id, fproto);
902 pool_get_zero (lcp_router_table_pool, nlt);
905 nlt->nlt_proto = fproto;
907 nlt->nlt_fib_index = fib_table_find_or_create_and_lock (
908 nlt->nlt_proto, nlt->nlt_id, lcp_rt_fib_src);
909 nlt->nlt_mfib_index = mfib_table_find_or_create_and_lock (
910 nlt->nlt_proto, nlt->nlt_id, MFIB_SOURCE_PLUGIN_LOW);
912 hash_set (lcp_router_table_db[fproto], nlt->nlt_id,
913 nlt - lcp_router_table_pool);
915 if (FIB_PROTOCOL_IP4 == fproto)
917 /* Set the all 1s address in this table to punt */
918 fib_table_entry_special_add (nlt->nlt_fib_index, &pfx_all1s,
919 lcp_rt_fib_src, FIB_ENTRY_FLAG_LOCAL);
921 const fib_route_path_t path = {
922 .frp_proto = DPO_PROTO_IP4,
923 .frp_addr = zero_addr,
924 .frp_sw_if_index = ~0,
927 .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
928 .frp_flags = FIB_ROUTE_PATH_LOCAL,
932 for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
934 mfib_table_entry_path_update (
935 nlt->nlt_mfib_index, &ip4_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
936 MFIB_ENTRY_FLAG_NONE, &path);
939 else if (FIB_PROTOCOL_IP6 == fproto)
941 const fib_route_path_t path = {
942 .frp_proto = DPO_PROTO_IP6,
943 .frp_addr = zero_addr,
944 .frp_sw_if_index = ~0,
947 .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
948 .frp_flags = FIB_ROUTE_PATH_LOCAL,
952 for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
954 mfib_table_entry_path_update (
955 nlt->nlt_mfib_index, &ip6_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
956 MFIB_ENTRY_FLAG_NONE, &path);
967 lcp_router_table_unlock (lcp_router_table_t *nlt)
971 if (0 == nlt->nlt_refs)
973 if (FIB_PROTOCOL_IP4 == nlt->nlt_proto)
975 /* Set the all 1s address in this table to punt */
976 fib_table_entry_special_remove (nlt->nlt_fib_index, &pfx_all1s,
980 fib_table_unlock (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
982 hash_unset (lcp_router_table_db[nlt->nlt_proto], nlt->nlt_id);
983 pool_put (lcp_router_table_pool, nlt);
988 lcp_router_route_mk_prefix (struct rtnl_route *r, fib_prefix_t *p)
990 const struct nl_addr *addr = rtnl_route_get_dst (r);
992 p->fp_len = nl_addr_get_prefixlen (addr);
993 p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_addr);
997 lcp_router_route_mk_mprefix (struct rtnl_route *r, mfib_prefix_t *p)
999 const struct nl_addr *addr;
1001 addr = rtnl_route_get_dst (r);
1003 p->fp_len = nl_addr_get_prefixlen (addr);
1004 p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_grp_addr);
1006 addr = rtnl_route_get_src (r);
1008 p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_src_addr);
1011 typedef struct lcp_router_route_path_parse_t_
1013 fib_route_path_t *paths;
1014 fib_protocol_t route_proto;
1016 fib_route_path_flags_t type_flags;
1018 } lcp_router_route_path_parse_t;
1021 lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg)
1023 lcp_router_route_path_parse_t *ctx = arg;
1024 fib_route_path_t *path;
1027 sw_if_index = lcp_router_intf_h2p (rtnl_route_nh_get_ifindex (rnh));
1029 if (~0 != sw_if_index)
1031 fib_protocol_t fproto;
1032 struct nl_addr *addr;
1034 vec_add2 (ctx->paths, path, 1);
1036 path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
1037 path->frp_sw_if_index = sw_if_index;
1038 path->frp_weight = rtnl_route_nh_get_weight (rnh);
1039 path->frp_preference = ctx->preference;
1041 addr = rtnl_route_nh_get_gateway (rnh);
1043 addr = rtnl_route_nh_get_via (rnh);
1046 fproto = lcp_router_mk_addr46 (addr, &path->frp_addr);
1048 fproto = ctx->route_proto;
1050 path->frp_proto = fib_proto_to_dpo (fproto);
1053 path->frp_mitf_flags = MFIB_ITF_FLAG_FORWARD;
1055 LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
1060 * blackhole, unreachable, prohibit will not have a next hop in an
1061 * RTM_NEWROUTE. Add a path for them.
1064 lcp_router_route_path_add_special (struct rtnl_route *rr,
1065 lcp_router_route_path_parse_t *ctx)
1067 fib_route_path_t *path;
1069 if (rtnl_route_get_type (rr) < RTN_BLACKHOLE)
1072 /* if it already has a path, it does not need us to add one */
1073 if (vec_len (ctx->paths) > 0)
1076 vec_add2 (ctx->paths, path, 1);
1078 path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
1079 path->frp_sw_if_index = ~0;
1080 path->frp_proto = fib_proto_to_dpo (ctx->route_proto);
1081 path->frp_preference = ctx->preference;
1083 LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
1087 * Map of supported route types. Some types are omitted:
1088 * RTN_LOCAL - interface address addition creates these automatically
1089 * RTN_BROADCAST - same as RTN_LOCAL
1090 * RTN_UNSPEC, RTN_ANYCAST, RTN_THROW, RTN_NAT, RTN_XRESOLVE -
1091 * There's not a VPP equivalent for these currently.
1093 static const u8 lcp_router_route_type_valid[__RTN_MAX] = {
1094 [RTN_UNICAST] = 1, [RTN_MULTICAST] = 1, [RTN_BLACKHOLE] = 1,
1095 [RTN_UNREACHABLE] = 1, [RTN_PROHIBIT] = 1,
1098 /* Map of fib entry flags by route type */
1099 static const fib_entry_flag_t lcp_router_route_type_feflags[__RTN_MAX] = {
1100 [RTN_LOCAL] = FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED,
1101 [RTN_BROADCAST] = FIB_ENTRY_FLAG_DROP | FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
1102 [RTN_BLACKHOLE] = FIB_ENTRY_FLAG_DROP,
1105 /* Map of fib route path flags by route type */
1106 static const fib_route_path_flags_t
1107 lcp_router_route_type_frpflags[__RTN_MAX] = {
1108 [RTN_UNREACHABLE] = FIB_ROUTE_PATH_ICMP_UNREACH,
1109 [RTN_PROHIBIT] = FIB_ROUTE_PATH_ICMP_PROHIBIT,
1110 [RTN_BLACKHOLE] = FIB_ROUTE_PATH_DROP,
1113 static inline fib_source_t
1114 lcp_router_proto_fib_source (u8 rt_proto)
1116 return (rt_proto <= RTPROT_STATIC) ? lcp_rt_fib_src : lcp_rt_fib_src_dynamic;
1119 static fib_entry_flag_t
1120 lcp_router_route_mk_entry_flags (uint8_t rtype, int table_id, uint8_t rproto)
1122 fib_entry_flag_t fef = FIB_ENTRY_FLAG_NONE;
1124 fef |= lcp_router_route_type_feflags[rtype];
1125 if ((rproto == RTPROT_KERNEL) || PREDICT_FALSE (255 == table_id))
1126 /* kernel proto is interface prefixes, 255 is linux's 'local' table */
1127 fef |= FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED;
1133 lcp_router_route_del (struct rtnl_route *rr)
1135 fib_entry_flag_t entry_flags;
1138 lcp_router_table_t *nlt;
1139 uint8_t rtype, rproto;
1141 rtype = rtnl_route_get_type (rr);
1142 table_id = rtnl_route_get_table (rr);
1143 rproto = rtnl_route_get_protocol (rr);
1145 /* skip unsupported route types and local table */
1146 if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
1149 lcp_router_route_mk_prefix (rr, &pfx);
1150 entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
1151 nlt = lcp_router_table_find (lcp_router_table_k2f (table_id), pfx.fp_proto);
1153 LCP_ROUTER_DBG ("route del: %d:%U %U", rtnl_route_get_table (rr),
1154 format_fib_prefix, &pfx, format_fib_entry_flags,
1160 lcp_router_route_path_parse_t np = {
1161 .route_proto = pfx.fp_proto,
1162 .type_flags = lcp_router_route_type_frpflags[rtype],
1165 rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
1166 lcp_router_route_path_add_special (rr, &np);
1168 if (0 != vec_len (np.paths))
1170 fib_source_t fib_src;
1172 fib_src = lcp_router_proto_fib_source (rproto);
1174 if (pfx.fp_proto == FIB_PROTOCOL_IP6)
1175 fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src);
1177 fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
1181 vec_free (np.paths);
1183 lcp_router_table_unlock (nlt);
1187 lcp_router_route_add (struct rtnl_route *rr, int is_replace)
1189 fib_entry_flag_t entry_flags;
1192 lcp_router_table_t *nlt;
1193 uint8_t rtype, rproto;
1195 rtype = rtnl_route_get_type (rr);
1196 table_id = rtnl_route_get_table (rr);
1197 rproto = rtnl_route_get_protocol (rr);
1199 /* skip unsupported route types and local table */
1200 if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
1203 lcp_router_route_mk_prefix (rr, &pfx);
1204 entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
1206 nlt = lcp_router_table_add_or_lock (table_id, pfx.fp_proto);
1207 /* Skip any kernel routes and IPv6 LL or multicast routes */
1208 if (rproto == RTPROT_KERNEL ||
1209 (FIB_PROTOCOL_IP6 == pfx.fp_proto &&
1210 (ip6_address_is_multicast (&pfx.fp_addr.ip6) ||
1211 ip6_address_is_link_local_unicast (&pfx.fp_addr.ip6))))
1213 LCP_ROUTER_DBG ("route skip: %d:%U %U", rtnl_route_get_table (rr),
1214 format_fib_prefix, &pfx, format_fib_entry_flags,
1218 LCP_ROUTER_DBG ("route %s: %d:%U %U", is_replace ? "replace" : "add",
1219 rtnl_route_get_table (rr), format_fib_prefix, &pfx,
1220 format_fib_entry_flags, entry_flags);
1222 lcp_router_route_path_parse_t np = {
1223 .route_proto = pfx.fp_proto,
1224 .is_mcast = (rtype == RTN_MULTICAST),
1225 .type_flags = lcp_router_route_type_frpflags[rtype],
1226 .preference = (u8) rtnl_route_get_priority (rr),
1229 rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
1230 lcp_router_route_path_add_special (rr, &np);
1232 if (0 != vec_len (np.paths))
1234 if (rtype == RTN_MULTICAST)
1236 /* it's not clear to me how linux expresses the RPF paramters
1237 * so we'll allow from all interfaces and hope for the best */
1238 mfib_prefix_t mpfx = {};
1240 lcp_router_route_mk_mprefix (rr, &mpfx);
1242 mfib_table_entry_update (nlt->nlt_mfib_index, &mpfx,
1243 MFIB_SOURCE_PLUGIN_LOW, MFIB_RPF_ID_NONE,
1244 MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
1246 mfib_table_entry_paths_update (nlt->nlt_mfib_index, &mpfx,
1247 MFIB_SOURCE_PLUGIN_LOW,
1248 MFIB_ENTRY_FLAG_NONE, np.paths);
1252 fib_source_t fib_src;
1253 const fib_route_path_t *rpath;
1255 vec_foreach (rpath, np.paths)
1257 if (fib_route_path_is_attached (rpath))
1259 entry_flags |= FIB_ENTRY_FLAG_ATTACHED;
1264 fib_src = lcp_router_proto_fib_source (rproto);
1267 fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
1268 entry_flags, np.paths);
1270 fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
1271 entry_flags, np.paths);
1276 LCP_ROUTER_DBG ("no paths for route: %d:%U %U",
1277 rtnl_route_get_table (rr), format_fib_prefix, &pfx,
1278 format_fib_entry_flags, entry_flags);
1280 vec_free (np.paths);
1284 lcp_router_route_sync_begin (void)
1286 lcp_router_table_t *nlt;
1288 pool_foreach (nlt, lcp_router_table_pool)
1290 fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
1291 fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto,
1292 lcp_rt_fib_src_dynamic);
1294 LCP_ROUTER_INFO ("Begin synchronization of %U routes in table %u",
1295 format_fib_protocol, nlt->nlt_proto,
1296 nlt->nlt_fib_index);
1301 lcp_router_route_sync_end (void)
1303 lcp_router_table_t *nlt;
1305 pool_foreach (nlt, lcp_router_table_pool)
1307 fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
1308 fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto,
1309 lcp_rt_fib_src_dynamic);
1311 LCP_ROUTER_INFO ("End synchronization of %U routes in table %u",
1312 format_fib_protocol, nlt->nlt_proto,
1313 nlt->nlt_fib_index);
1317 typedef struct lcp_router_table_flush_ctx_t_
1319 fib_node_index_t *lrtf_entries;
1320 u32 *lrtf_sw_if_index_to_bool;
1321 fib_source_t lrtf_source;
1322 } lcp_router_table_flush_ctx_t;
1324 static fib_table_walk_rc_t
1325 lcp_router_table_flush_cb (fib_node_index_t fib_entry_index, void *arg)
1327 lcp_router_table_flush_ctx_t *ctx = arg;
1330 sw_if_index = fib_entry_get_resolving_interface_for_source (
1331 fib_entry_index, ctx->lrtf_source);
1333 if (sw_if_index < vec_len (ctx->lrtf_sw_if_index_to_bool) &&
1334 ctx->lrtf_sw_if_index_to_bool[sw_if_index])
1336 vec_add1 (ctx->lrtf_entries, fib_entry_index);
1338 return (FIB_TABLE_WALK_CONTINUE);
1342 lcp_router_table_flush (lcp_router_table_t *nlt, u32 *sw_if_index_to_bool,
1343 fib_source_t source)
1345 fib_node_index_t *fib_entry_index;
1346 lcp_router_table_flush_ctx_t ctx = {
1347 .lrtf_entries = NULL,
1348 .lrtf_sw_if_index_to_bool = sw_if_index_to_bool,
1349 .lrtf_source = source,
1353 "Flush table: proto %U, fib-index %u, max sw_if_index %u, source %U",
1354 format_fib_protocol, nlt->nlt_proto, nlt->nlt_fib_index,
1355 vec_len (sw_if_index_to_bool) - 1, format_fib_source, source);
1357 fib_table_walk (nlt->nlt_fib_index, nlt->nlt_proto,
1358 lcp_router_table_flush_cb, &ctx);
1360 LCP_ROUTER_DBG ("Flush table: entries number to delete %u",
1361 vec_len (ctx.lrtf_entries));
1363 vec_foreach (fib_entry_index, ctx.lrtf_entries)
1365 fib_table_entry_delete_index (*fib_entry_index, source);
1366 lcp_router_table_unlock (nlt);
1369 vec_free (ctx.lrtf_entries);
1372 const nl_vft_t lcp_router_vft = {
1373 .nvl_rt_link_add = { .is_mp_safe = 0, .cb = lcp_router_link_add },
1374 .nvl_rt_link_del = { .is_mp_safe = 0, .cb = lcp_router_link_del },
1375 .nvl_rt_link_sync_begin = { .is_mp_safe = 0,
1376 .cb = lcp_router_link_sync_begin },
1377 .nvl_rt_link_sync_end = { .is_mp_safe = 0, .cb = lcp_router_link_sync_end },
1378 .nvl_rt_addr_add = { .is_mp_safe = 0, .cb = lcp_router_link_addr_add },
1379 .nvl_rt_addr_del = { .is_mp_safe = 0, .cb = lcp_router_link_addr_del },
1380 .nvl_rt_addr_sync_begin = { .is_mp_safe = 0,
1381 .cb = lcp_router_link_addr_sync_begin },
1382 .nvl_rt_addr_sync_end = { .is_mp_safe = 0,
1383 .cb = lcp_router_link_addr_sync_end },
1384 .nvl_rt_neigh_add = { .is_mp_safe = 0, .cb = lcp_router_neigh_add },
1385 .nvl_rt_neigh_del = { .is_mp_safe = 0, .cb = lcp_router_neigh_del },
1386 .nvl_rt_neigh_sync_begin = { .is_mp_safe = 0,
1387 .cb = lcp_router_neigh_sync_begin },
1388 .nvl_rt_neigh_sync_end = { .is_mp_safe = 0,
1389 .cb = lcp_router_neigh_sync_end },
1390 .nvl_rt_route_add = { .is_mp_safe = 1, .cb = lcp_router_route_add },
1391 .nvl_rt_route_del = { .is_mp_safe = 1, .cb = lcp_router_route_del },
1392 .nvl_rt_route_sync_begin = { .is_mp_safe = 0,
1393 .cb = lcp_router_route_sync_begin },
1394 .nvl_rt_route_sync_end = { .is_mp_safe = 0,
1395 .cb = lcp_router_route_sync_end },
1398 static clib_error_t *
1399 lcp_router_init (vlib_main_t *vm)
1401 lcp_router_logger = vlib_log_register_class ("linux-cp", "router");
1403 nl_register_vft (&lcp_router_vft);
1406 * allocate 2 route sources. The low priority source will be for
1407 * dynamic routes. If a dynamic route daemon (FRR) tries to remove its
1408 * route, it will use the low priority source to ensure it will not
1409 * remove static routes which were added with the higher priority source.
1412 fib_source_allocate ("lcp-rt", FIB_SOURCE_PRIORITY_HI, FIB_SOURCE_BH_API);
1414 lcp_rt_fib_src_dynamic = fib_source_allocate (
1415 "lcp-rt-dynamic", FIB_SOURCE_PRIORITY_HI + 1, FIB_SOURCE_BH_API);
1420 VLIB_INIT_FUNCTION (lcp_router_init) = {
1421 .runs_before = VLIB_INITS ("lcp_nl_init"),
1425 * fd.io coding-style-patch-verification: ON
1428 * eval: (c-set-style "gnu")