linux-cp: fix setting mtu on hardware interfaces
[vpp.git] / src / plugins / linux-cp / lcp_router.c
index c70acff..af01b54 100644 (file)
@@ -133,6 +133,9 @@ lcp_router_intf_h2p (u32 host)
 static int
 lcp_router_lip_ts_check (nl_msg_info_t *msg_info, lcp_itf_pair_t *lip)
 {
+  if (!msg_info)
+    return 0;
+
   if (msg_info->ts > lip->lip_create_ts)
     return 0;
 
@@ -262,7 +265,7 @@ lcp_router_link_mtu (struct rtnl_link *rl, u32 sw_if_index)
   hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
 
   /* If HW interface, try to change hw link */
-  if ((sw->type == sw->sup_sw_if_index) &&
+  if ((sw->sw_if_index == sw->sup_sw_if_index) &&
       (hw->hw_class_index == ethernet_hw_interface_class.index))
     vnet_hw_interface_set_mtu (vnm, hw->hw_if_index, mtu);
   else
@@ -304,6 +307,10 @@ lcp_router_link_addr (struct rtnl_link *rl, lcp_itf_pair_t *lip)
                                          lip->lip_phy_adjs.adj_index[AF_IP6]);
 }
 
+static void lcp_router_table_flush (lcp_router_table_t *nlt,
+                                   u32 *sw_if_index_to_bool,
+                                   fib_source_t source);
+
 static void
 lcp_router_link_add (struct rtnl_link *rl, void *ctx)
 {
@@ -317,6 +324,8 @@ lcp_router_link_add (struct rtnl_link *rl, void *ctx)
   if (INDEX_INVALID != lipi)
     {
       lcp_itf_pair_t *lip;
+      u32 sw_if_flags;
+      u32 sw_if_up;
 
       lip = lcp_itf_pair_get (lipi);
       if (!vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index))
@@ -325,16 +334,56 @@ lcp_router_link_add (struct rtnl_link *rl, void *ctx)
       if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
        return;
 
-      if (up)
+      sw_if_flags =
+       vnet_sw_interface_get_flags (vnm, lip->lip_phy_sw_if_index);
+      sw_if_up = (sw_if_flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+      if (!sw_if_up && up)
        {
          vnet_sw_interface_admin_up (vnet_get_main (),
                                      lip->lip_phy_sw_if_index);
        }
-      else
+      else if (sw_if_up && !up)
        {
          vnet_sw_interface_admin_down (vnet_get_main (),
                                        lip->lip_phy_sw_if_index);
+
+         /* When an interface is brought down administratively, the kernel
+          * removes routes which resolve through that interface. For IPv4
+          * routes, the kernel will not send any explicit RTM_DELROUTE
+          * messages about removing them. In order to synchronize with the
+          * kernel, affected IPv4 routes need to be manually removed from the
+          * FIB. The behavior is different for IPv6 routes. Explicit
+          * RTM_DELROUTE messages are sent about IPv6 routes being removed.
+          */
+         u32 fib_index;
+         lcp_router_table_t *nlt;
+
+         fib_index = fib_table_get_index_for_sw_if_index (
+           FIB_PROTOCOL_IP4, lip->lip_phy_sw_if_index);
+
+         pool_foreach (nlt, lcp_router_table_pool)
+           {
+             if (fib_index == nlt->nlt_fib_index &&
+                 FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+               {
+                 u32 *sw_if_index_to_bool = NULL;
+
+                 vec_validate_init_empty (sw_if_index_to_bool,
+                                          lip->lip_phy_sw_if_index, false);
+                 sw_if_index_to_bool[lip->lip_phy_sw_if_index] = true;
+
+                 lcp_router_table_flush (nlt, sw_if_index_to_bool,
+                                         lcp_rt_fib_src);
+                 lcp_router_table_flush (nlt, sw_if_index_to_bool,
+                                         lcp_rt_fib_src_dynamic);
+
+                 vec_free (sw_if_index_to_bool);
+                 break;
+               }
+           }
        }
+
       LCP_ROUTER_DBG ("link: %s (%d) -> %U/%U %s", rtnl_link_get_name (rl),
                      rtnl_link_get_ifindex (rl), format_vnet_sw_if_index_name,
                      vnm, lip->lip_phy_sw_if_index,
@@ -416,6 +465,113 @@ lcp_router_link_add (struct rtnl_link *rl, void *ctx)
                     rtnl_link_get_name (rl));
 }
 
+static void
+lcp_router_link_sync_begin (void)
+{
+  LCP_ROUTER_INFO ("Begin synchronization of interface configurations");
+}
+
+static void
+lcp_router_link_sync_end (void)
+{
+  LCP_ROUTER_INFO ("End synchronization of interface configurations");
+}
+
+static clib_error_t *
+lcp_router_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+  vnet_hw_interface_t *hi;
+  index_t lipi;
+
+  hi = vnet_get_hw_interface_or_null (vnm, hw_if_index);
+  if (!hi)
+    return 0;
+
+  lipi = lcp_itf_pair_find_by_phy (hi->sw_if_index);
+  if (lipi == INDEX_INVALID)
+    return 0;
+
+  /* When the link goes down on an interface, the kernel processes routes which
+   * resolve through that interface depending on how they were created:
+   *   - Legacy Route API: the kernel retains the routes and marks them as
+   *     "linkdown";
+   *   - Nexthop API: the kernel removes the next-hop objects and the routes
+   *     which reference them.
+   *
+   * For IPv4 routes created with Nexthop API, the kernel will not send any
+   * explicit RTM_DELROUTE messages about removing them. In order to
+   * synchronize with the kernel, affected routes need to be manually removed
+   * from the FIB.
+   *
+   * The behavior is different for IPv6 routes created with Nexthop API. The
+   * kernel will send explicit RTM_DELROUTE messages about IPv6 routes being
+   * removed.
+   */
+  if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP) &&
+      (lcp_get_del_static_on_link_down () ||
+       lcp_get_del_dynamic_on_link_down ()))
+    {
+      u32 fib_index;
+      u32 **fib_index_to_sw_if_index_to_bool = NULL;
+      u32 id, sw_if_index;
+      lcp_router_table_t *nlt;
+
+      fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+                                                      hi->sw_if_index);
+
+      vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+                              NULL);
+      vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+                              hi->sw_if_index, false);
+      fib_index_to_sw_if_index_to_bool[fib_index][hi->sw_if_index] = true;
+
+      /* clang-format off */
+      hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+      ({
+       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+                                                        sw_if_index);
+       vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+                                NULL);
+       vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+                                sw_if_index, false);
+       fib_index_to_sw_if_index_to_bool[fib_index][sw_if_index] = true;
+      }));
+      /* clang-format on */
+
+      vec_foreach_index (fib_index, fib_index_to_sw_if_index_to_bool)
+       {
+         u32 *sw_if_index_to_bool;
+
+         sw_if_index_to_bool = fib_index_to_sw_if_index_to_bool[fib_index];
+         if (NULL == sw_if_index_to_bool)
+           continue;
+
+         pool_foreach (nlt, lcp_router_table_pool)
+           {
+             if (fib_index == nlt->nlt_fib_index &&
+                 FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+               {
+                 if (lcp_get_del_static_on_link_down ())
+                   lcp_router_table_flush (nlt, sw_if_index_to_bool,
+                                           lcp_rt_fib_src);
+                 if (lcp_get_del_dynamic_on_link_down ())
+                   lcp_router_table_flush (nlt, sw_if_index_to_bool,
+                                           lcp_rt_fib_src_dynamic);
+                 break;
+               }
+           }
+
+         vec_free (sw_if_index_to_bool);
+       }
+
+      vec_free (fib_index_to_sw_if_index_to_bool);
+    }
+
+  return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_router_link_up_down);
+
 static fib_protocol_t
 lcp_router_proto_k2f (uint32_t k)
 {
@@ -506,6 +662,22 @@ lcp_router_link_addr_add (struct rtnl_addr *la)
   lcp_router_link_addr_add_del (la, 0);
 }
 
+static void
+lcp_router_link_addr_sync_begin (void)
+{
+  ip_interface_address_mark ();
+
+  LCP_ROUTER_INFO ("Begin synchronization of interface addresses");
+}
+
+static void
+lcp_router_link_addr_sync_end (void)
+{
+  ip_interface_address_sweep ();
+
+  LCP_ROUTER_INFO ("End synchronization of interface addresses");
+}
+
 static void
 lcp_router_mk_mac_addr (const struct nl_addr *rna, mac_address_t *mac)
 {
@@ -526,6 +698,14 @@ lcp_router_neigh_del (struct rtnl_neigh *rn)
 
       lcp_router_mk_addr (rtnl_neigh_get_dst (rn), &nh);
 
+      if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+       {
+         LCP_ROUTER_DBG ("ignore neighbor del: %U %U", format_ip_address, &nh,
+                         format_vnet_sw_if_index_name, vnet_get_main (),
+                         sw_if_index);
+         return;
+       }
+
       rv = ip_neighbor_del (&nh, sw_if_index);
 
       if (rv)
@@ -566,6 +746,15 @@ lcp_router_neigh_add (struct rtnl_neigh *rn)
       int state;
 
       lcp_router_mk_addr (rtnl_neigh_get_dst (rn), &nh);
+
+      if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+       {
+         LCP_ROUTER_DBG ("ignore neighbor add: %U %U", format_ip_address, &nh,
+                         format_vnet_sw_if_index_name, vnet_get_main (),
+                         sw_if_index);
+         return;
+       }
+
       ll = rtnl_neigh_get_lladdr (rn);
       state = rtnl_neigh_get_state (rn);
 
@@ -606,6 +795,24 @@ lcp_router_neigh_add (struct rtnl_neigh *rn)
                     rtnl_neigh_get_ifindex (rn));
 }
 
+static void
+lcp_router_neigh_sync_begin (void)
+{
+  ip_neighbor_mark (AF_IP4);
+  ip_neighbor_mark (AF_IP6);
+
+  LCP_ROUTER_INFO ("Begin synchronization of neighbors");
+}
+
+static void
+lcp_router_neigh_sync_end (void)
+{
+  ip_neighbor_sweep (AF_IP4);
+  ip_neighbor_sweep (AF_IP6);
+
+  LCP_ROUTER_INFO ("End synchronization of neighbors");
+}
+
 static lcp_router_table_t *
 lcp_router_table_find (uint32_t id, fib_protocol_t fproto)
 {
@@ -1008,15 +1215,119 @@ lcp_router_route_add (struct rtnl_route *rr)
     }
 }
 
+static void
+lcp_router_route_sync_begin (void)
+{
+  lcp_router_table_t *nlt;
+
+  pool_foreach (nlt, lcp_router_table_pool)
+    {
+      fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+      fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto,
+                     lcp_rt_fib_src_dynamic);
+
+      LCP_ROUTER_INFO ("Begin synchronization of %U routes in table %u",
+                      format_fib_protocol, nlt->nlt_proto,
+                      nlt->nlt_fib_index);
+    }
+}
+
+static void
+lcp_router_route_sync_end (void)
+{
+  lcp_router_table_t *nlt;
+
+  pool_foreach (nlt, lcp_router_table_pool)
+    {
+      fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+      fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto,
+                      lcp_rt_fib_src_dynamic);
+
+      LCP_ROUTER_INFO ("End synchronization of %U routes in table %u",
+                      format_fib_protocol, nlt->nlt_proto,
+                      nlt->nlt_fib_index);
+    }
+}
+
+typedef struct lcp_router_table_flush_ctx_t_
+{
+  fib_node_index_t *lrtf_entries;
+  u32 *lrtf_sw_if_index_to_bool;
+  fib_source_t lrtf_source;
+} lcp_router_table_flush_ctx_t;
+
+static fib_table_walk_rc_t
+lcp_router_table_flush_cb (fib_node_index_t fib_entry_index, void *arg)
+{
+  lcp_router_table_flush_ctx_t *ctx = arg;
+  u32 sw_if_index;
+
+  sw_if_index = fib_entry_get_resolving_interface_for_source (
+    fib_entry_index, ctx->lrtf_source);
+
+  if (sw_if_index < vec_len (ctx->lrtf_sw_if_index_to_bool) &&
+      ctx->lrtf_sw_if_index_to_bool[sw_if_index])
+    {
+      vec_add1 (ctx->lrtf_entries, fib_entry_index);
+    }
+  return (FIB_TABLE_WALK_CONTINUE);
+}
+
+static void
+lcp_router_table_flush (lcp_router_table_t *nlt, u32 *sw_if_index_to_bool,
+                       fib_source_t source)
+{
+  fib_node_index_t *fib_entry_index;
+  lcp_router_table_flush_ctx_t ctx = {
+    .lrtf_entries = NULL,
+    .lrtf_sw_if_index_to_bool = sw_if_index_to_bool,
+    .lrtf_source = source,
+  };
+
+  LCP_ROUTER_DBG (
+    "Flush table: proto %U, fib-index %u, max sw_if_index %u, source %U",
+    format_fib_protocol, nlt->nlt_proto, nlt->nlt_fib_index,
+    vec_len (sw_if_index_to_bool) - 1, format_fib_source, source);
+
+  fib_table_walk (nlt->nlt_fib_index, nlt->nlt_proto,
+                 lcp_router_table_flush_cb, &ctx);
+
+  LCP_ROUTER_DBG ("Flush table: entries number to delete %u",
+                 vec_len (ctx.lrtf_entries));
+
+  vec_foreach (fib_entry_index, ctx.lrtf_entries)
+    {
+      fib_table_entry_delete_index (*fib_entry_index, source);
+      lcp_router_table_unlock (nlt);
+    }
+
+  vec_free (ctx.lrtf_entries);
+}
+
 const nl_vft_t lcp_router_vft = {
   .nvl_rt_link_add = { .is_mp_safe = 0, .cb = lcp_router_link_add },
   .nvl_rt_link_del = { .is_mp_safe = 0, .cb = lcp_router_link_del },
+  .nvl_rt_link_sync_begin = { .is_mp_safe = 0,
+                             .cb = lcp_router_link_sync_begin },
+  .nvl_rt_link_sync_end = { .is_mp_safe = 0, .cb = lcp_router_link_sync_end },
   .nvl_rt_addr_add = { .is_mp_safe = 0, .cb = lcp_router_link_addr_add },
   .nvl_rt_addr_del = { .is_mp_safe = 0, .cb = lcp_router_link_addr_del },
+  .nvl_rt_addr_sync_begin = { .is_mp_safe = 0,
+                             .cb = lcp_router_link_addr_sync_begin },
+  .nvl_rt_addr_sync_end = { .is_mp_safe = 0,
+                           .cb = lcp_router_link_addr_sync_end },
   .nvl_rt_neigh_add = { .is_mp_safe = 0, .cb = lcp_router_neigh_add },
   .nvl_rt_neigh_del = { .is_mp_safe = 0, .cb = lcp_router_neigh_del },
+  .nvl_rt_neigh_sync_begin = { .is_mp_safe = 0,
+                              .cb = lcp_router_neigh_sync_begin },
+  .nvl_rt_neigh_sync_end = { .is_mp_safe = 0,
+                            .cb = lcp_router_neigh_sync_end },
   .nvl_rt_route_add = { .is_mp_safe = 1, .cb = lcp_router_route_add },
   .nvl_rt_route_del = { .is_mp_safe = 1, .cb = lcp_router_route_del },
+  .nvl_rt_route_sync_begin = { .is_mp_safe = 0,
+                              .cb = lcp_router_route_sync_begin },
+  .nvl_rt_route_sync_end = { .is_mp_safe = 0,
+                            .cb = lcp_router_route_sync_end },
 };
 
 static clib_error_t *