Thread-safe ARP / ND throttling
[vpp.git] / src / vnet / ip / ip4_forward.c
index 3444eb8..11bcf3a 100644 (file)
@@ -114,14 +114,14 @@ VLIB_REGISTER_NODE (ip4_lookup_node) =
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
 
-always_inline uword
+static uword
 ip4_load_balance (vlib_main_t * vm,
                  vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 thread_index = vlib_get_thread_index ();
+  u32 thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -340,6 +340,45 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
   return result;
 }
 
+static void
+ip4_add_subnet_bcast_route (u32 fib_index,
+                            fib_prefix_t *pfx,
+                            u32 sw_if_index)
+{
+  vnet_sw_interface_flags_t iflags;
+
+  iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
+
+  fib_table_entry_special_remove(fib_index,
+                                 pfx,
+                                 FIB_SOURCE_INTERFACE);
+
+  if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
+    {
+      fib_table_entry_update_one_path (fib_index, pfx,
+                                       FIB_SOURCE_INTERFACE,
+                                       FIB_ENTRY_FLAG_NONE,
+                                       DPO_PROTO_IP4,
+                                       /* No next-hop address */
+                                       &ADJ_BCAST_ADDR,
+                                       sw_if_index,
+                                       // invalid FIB index
+                                       ~0,
+                                       1,
+                                       // no out-label stack
+                                       NULL,
+                                       FIB_ROUTE_PATH_FLAG_NONE);
+    }
+  else
+    {
+        fib_table_entry_special_add(fib_index,
+                                    pfx,
+                                    FIB_SOURCE_INTERFACE,
+                                    (FIB_ENTRY_FLAG_DROP |
+                                     FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
+    }
+}
+
 static void
 ip4_add_interface_routes (u32 sw_if_index,
                          ip4_main_t * im, u32 fib_index,
@@ -385,11 +424,7 @@ ip4_add_interface_routes (u32 sw_if_index,
                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
-        fib_table_entry_special_add(fib_index,
-                                    &net_pfx,
-                                    FIB_SOURCE_INTERFACE,
-                                    (FIB_ENTRY_FLAG_DROP |
-                                     FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
+        ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
     }
   else if (pfx.fp_len == 31)
     {
@@ -637,6 +672,45 @@ ip4_add_del_interface_address (vlib_main_t * vm,
     (vm, sw_if_index, address, address_length, is_del);
 }
 
+void
+ip4_directed_broadcast (u32 sw_if_index, u8 enable)
+{
+  ip_interface_address_t *ia;
+  ip4_main_t *im;
+
+  im = &ip4_main;
+
+  /*
+   * when directed broadcast is enabled, the subnet braodcast route will forward
+   * packets using an adjacency with a broadcast MAC. otherwise it drops
+   */
+  /* *INDENT-OFF* */
+  foreach_ip_interface_address(&im->lookup_main, ia,
+                               sw_if_index, 0,
+     ({
+       if (ia->address_length <= 30)
+         {
+           ip4_address_t *ipa;
+
+           ipa = ip_interface_address_get_address (&im->lookup_main, ia);
+
+           fib_prefix_t pfx = {
+             .fp_len = 32,
+             .fp_proto = FIB_PROTOCOL_IP4,
+             .fp_addr = {
+               .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
+             },
+           };
+
+           ip4_add_subnet_bcast_route
+             (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
+                                                  sw_if_index),
+              &pfx, sw_if_index);
+         }
+     }));
+  /* *INDENT-ON* */
+}
+
 /* Built-in ip4 unicast rx feature path definition */
 /* *INDENT-OFF* */
 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
@@ -1648,29 +1722,23 @@ ip4_arp_inline (vlib_main_t * vm,
   ip_lookup_main_t *lm = &im->lookup_main;
   u32 *from, *to_next_drop;
   uword n_left_from, n_left_to_next_drop, next_index;
-  static f64 time_last_seed_change = -1e100;
-  static u32 hash_seeds[3];
-  static uword hash_bitmap[256 / BITS (uword)];
+  u32 thread_index = vm->thread_index;
+  u32 seed;
   f64 time_now;
 
   if (node->flags & VLIB_NODE_FLAG_TRACE)
     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
 
   time_now = vlib_time_now (vm);
-  if (time_now - time_last_seed_change > 1e-3)
+  if (time_now - im->arp_throttle_last_seed_change_time[thread_index] > 1e-3)
     {
-      uword i;
-      u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
-                                           sizeof (hash_seeds));
-      for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
-       hash_seeds[i] = r[i];
-
-      /* Mark all hash keys as been no-seen before. */
-      for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
-       hash_bitmap[i] = 0;
+      (void) random_u32 (&im->arp_throttle_seeds[thread_index]);
+      memset (im->arp_throttle_bitmaps[thread_index], 0,
+             ARP_THROTTLE_BITS / BITS (u8));
 
-      time_last_seed_change = time_now;
+      im->arp_throttle_last_seed_change_time[thread_index] = time_now;
     }
+  seed = im->arp_throttle_seeds[thread_index];
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -1685,11 +1753,11 @@ ip4_arp_inline (vlib_main_t * vm,
 
       while (n_left_from > 0 && n_left_to_next_drop > 0)
        {
-         u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+         u32 pi0, adj_index0, r0, w0, sw_if_index0, drop0;
+         uword m0;
          ip_adjacency_t *adj0;
          vlib_buffer_t *p0;
          ip4_header_t *ip0;
-         uword bm0;
 
          pi0 = from[0];
 
@@ -1699,39 +1767,30 @@ ip4_arp_inline (vlib_main_t * vm,
          adj0 = adj_get (adj_index0);
          ip0 = vlib_buffer_get_current (p0);
 
-         a0 = hash_seeds[0];
-         b0 = hash_seeds[1];
-         c0 = hash_seeds[2];
-
          sw_if_index0 = adj0->rewrite_header.sw_if_index;
          vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
 
-         if (is_glean)
+         if (PREDICT_TRUE (is_glean))
            {
              /*
               * this is the Glean case, so we are ARPing for the
               * packet's destination
               */
-             a0 ^= ip0->dst_address.data_u32;
+             r0 = ip0->dst_address.data_u32;
            }
          else
            {
-             a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
+             r0 = adj0->sub_type.nbr.next_hop.ip4.data_u32;
            }
-         b0 ^= sw_if_index0;
-
-         hash_v3_mix32 (a0, b0, c0);
-         hash_v3_finalize32 (a0, b0, c0);
-
-         c0 &= BITS (hash_bitmap) - 1;
-         m0 = (uword) 1 << (c0 % BITS (uword));
-         c0 = c0 / BITS (uword);
 
-         bm0 = hash_bitmap[c0];
-         drop0 = (bm0 & m0) != 0;
+         r0 ^= seed;
+         /* Select bit number */
+         r0 &= ARP_THROTTLE_BITS - 1;
+         w0 = r0 / BITS (uword);
+         m0 = (uword) 1 << (r0 % BITS (uword));
 
-         /* Mark it as seen. */
-         hash_bitmap[c0] = bm0 | m0;
+         drop0 = (im->arp_throttle_bitmaps[thread_index][w0] & m0) != 0;
+         im->arp_throttle_bitmaps[thread_index][w0] |= m0;
 
          from += 1;
          n_left_from -= 1;
@@ -1918,7 +1977,8 @@ VLIB_INIT_FUNCTION (arp_notrace_init);
 
 /* Send an ARP request to see if given destination is reachable on given interface. */
 clib_error_t *
-ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
+ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
+                   u8 refresh)
 {
   vnet_main_t *vnm = vnet_get_main ();
   ip4_main_t *im = &ip4_main;
@@ -1931,6 +1991,7 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
   vlib_buffer_t *b;
   adj_index_t ai;
   u32 bi = 0;
+  u8 unicast_rewrite = 0;
 
   si = vnet_get_sw_interface (vnm, sw_if_index);
 
@@ -1958,6 +2019,9 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
                                       &im->ip4_arp_request_packet_template,
                                       &bi);
 
+  if (!h)
+    return clib_error_return (0, "ARP request packet allocation failed");
+
   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
   if (PREDICT_FALSE (!hi->hw_address))
     {
@@ -1988,14 +2052,24 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
   /* Peer has been previously resolved, retrieve glean adj instead */
   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
     {
-      adj_unlock (ai);
-      ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
-                                 VNET_LINK_IP4, sw_if_index, &nh);
-      adj = adj_get (ai);
+      if (refresh)
+       unicast_rewrite = 1;
+      else
+       {
+         adj_unlock (ai);
+         ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
+                                     VNET_LINK_IP4, sw_if_index, &nh);
+         adj = adj_get (ai);
+       }
     }
 
   /* Add encapsulation string for software interface (e.g. ethernet header). */
   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+  if (unicast_rewrite)
+    {
+      u16 *etype = vlib_buffer_get_current (b) - 2;
+      etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
+    }
   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
 
   {
@@ -2067,7 +2141,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 thread_index = vlib_get_thread_index ();
+  u32 thread_index = vm->thread_index;
 
   while (n_left_from > 0)
     {
@@ -2506,6 +2580,16 @@ ip4_rewrite (vlib_main_t * vm,
     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
 }
 
+static uword
+ip4_rewrite_bcast (vlib_main_t * vm,
+                  vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  if (adj_are_counters_enabled ())
+    return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
+  else
+    return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
+}
+
 static uword
 ip4_midchain (vlib_main_t * vm,
              vlib_node_runtime_t * node, vlib_frame_t * frame)
@@ -2551,7 +2635,16 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = {
     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
   },
 };
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
+
+VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
+  .function = ip4_rewrite_bcast,
+  .name = "ip4-rewrite-bcast",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_ip4_rewrite_trace,
+  .sibling_of = "ip4-rewrite",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_bcast_node, ip4_rewrite_bcast)
 
 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
   .function = ip4_rewrite_mcast,