bonding: add support for numa-only in lacp mode
[vpp.git] / src / vnet / bonding / cli.c
index bccbb2c..371e3c1 100644 (file)
@@ -28,6 +28,8 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
   bond_if_t *bif;
   int i;
   uword p;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hw;
   u8 switching_active = 0;
 
   bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
@@ -37,22 +39,64 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
     p = *vec_elt_at_index (bif->active_slaves, i);
     if (p == sif->sw_if_index)
       {
-       /* Are we disabling the very 1st slave? */
-       if (sif->sw_if_index == *vec_elt_at_index (bif->active_slaves, 0))
-         switching_active = 1;
-
+       if (sif->sw_if_index == bif->sw_if_index_working)
+         {
+           switching_active = 1;
+           if (bif->mode == BOND_MODE_ACTIVE_BACKUP)
+             bif->is_local_numa = 0;
+         }
        vec_del1 (bif->active_slaves, i);
        hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index);
-
-       /* We got a new slave just becoming active? */
-       if ((vec_len (bif->active_slaves) >= 1) &&
-           (bif->mode == BOND_MODE_ACTIVE_BACKUP) && switching_active)
-         vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
-                                    BOND_SEND_GARP_NA, bif->hw_if_index);
+       if (sif->lacp_enabled && bif->numa_only)
+         {
+           /* For lacp mode, if we check it is a slave on local numa node,
+              bif->n_numa_slaves should be decreased by 1 becasue the first
+              bif->n_numa_slaves are all slaves on local numa node */
+           if (i < bif->n_numa_slaves)
+             {
+               bif->n_numa_slaves--;
+               ASSERT (bif->n_numa_slaves >= 0);
+             }
+         }
        break;
       }
   }
+
+  /* We get a new slave just becoming active */
+  if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && switching_active)
+    {
+      if ((vec_len (bif->active_slaves) >= 1))
+       {
+         /* scan all slaves and try to find the first slave with local numa node. */
+         vec_foreach_index (i, bif->active_slaves)
+         {
+           p = *vec_elt_at_index (bif->active_slaves, i);
+           hw = vnet_get_sup_hw_interface (vnm, p);
+           if (vm->numa_node == hw->numa_node)
+             {
+               bif->sw_if_index_working = p;
+               bif->is_local_numa = 1;
+               vlib_process_signal_event (bm->vlib_main,
+                                          bond_process_node.index,
+                                          BOND_SEND_GARP_NA,
+                                          bif->hw_if_index);
+               break;
+             }
+         }
+       }
+
+      /* No local numa node is found in the active slave set. Use the first slave */
+      if ((bif->is_local_numa == 0) && (vec_len (bif->active_slaves) >= 1))
+       {
+         p = *vec_elt_at_index (bif->active_slaves, 0);
+         bif->sw_if_index_working = p;
+         vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
+                                    BOND_SEND_GARP_NA, bif->hw_if_index);
+       }
+    }
   clib_spinlock_unlock_if_init (&bif->lockp);
+
+  return;
 }
 
 void
@@ -60,6 +104,10 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
 {
   bond_if_t *bif;
   bond_main_t *bm = &bond_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index);
+  int i;
+  uword p;
 
   bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
   clib_spinlock_lock_if_init (&bif->lockp);
@@ -67,15 +115,57 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
     {
       hash_set (bif->active_slave_by_sw_if_index, sif->sw_if_index,
                sif->sw_if_index);
-      vec_add1 (bif->active_slaves, sif->sw_if_index);
+
+      if ((sif->lacp_enabled && bif->numa_only)
+         && (vm->numa_node == hw->numa_node))
+       {
+         vec_insert_elts (bif->active_slaves, &sif->sw_if_index, 1,
+                          bif->n_numa_slaves);
+         bif->n_numa_slaves++;
+       }
+      else
+       {
+         vec_add1 (bif->active_slaves, sif->sw_if_index);
+       }
 
       /* First slave becomes active? */
       if ((vec_len (bif->active_slaves) == 1) &&
          (bif->mode == BOND_MODE_ACTIVE_BACKUP))
-       vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
-                                  BOND_SEND_GARP_NA, bif->hw_if_index);
+       {
+         bif->sw_if_index_working = sif->sw_if_index;
+         bif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0;
+         vlib_process_signal_event (bm->vlib_main, bond_process_node.index,
+                                    BOND_SEND_GARP_NA, bif->hw_if_index);
+       }
+      else if ((vec_len (bif->active_slaves) > 1)
+              && (bif->mode == BOND_MODE_ACTIVE_BACKUP)
+              && bif->is_local_numa == 0)
+       {
+         if (vm->numa_node == hw->numa_node)
+           {
+             vec_foreach_index (i, bif->active_slaves)
+             {
+               p = *vec_elt_at_index (bif->active_slaves, 0);
+               if (p == sif->sw_if_index)
+                 break;
+
+               vec_del1 (bif->active_slaves, 0);
+               hash_unset (bif->active_slave_by_sw_if_index, p);
+               vec_add1 (bif->active_slaves, p);
+               hash_set (bif->active_slave_by_sw_if_index, p, p);
+             }
+             bif->sw_if_index_working = sif->sw_if_index;
+             bif->is_local_numa = 1;
+             vlib_process_signal_event (bm->vlib_main,
+                                        bond_process_node.index,
+                                        BOND_SEND_GARP_NA, bif->hw_if_index);
+
+           }
+       }
     }
   clib_spinlock_unlock_if_init (&bif->lockp);
+
+  return;
 }
 
 int
@@ -100,6 +190,7 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs)
                      strlen ((const char *) hi->name)));
     bondif->mode = bif->mode;
     bondif->lb = bif->lb;
+    bondif->numa_only = bif->numa_only;
     bondif->active_slaves = vec_len (bif->active_slaves);
     bondif->slaves = vec_len (bif->slaves);
   );
@@ -320,6 +411,7 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
   sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index);
   bif->sw_if_index = sw->sw_if_index;
   bif->group = bif->sw_if_index;
+  bif->numa_only = args->numa_only;
   if (vlib_get_thread_main ()->n_vlib_mains > 1)
     clib_spinlock_init (&bif->lockp);
 
@@ -360,6 +452,14 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
        args.hw_addr_set = 1;
       else if (unformat (line_input, "id %u", &args.id))
        ;
+      else if (unformat (line_input, "numa-only"))
+       {
+         if (args.mode == BOND_MODE_LACP)
+           args.numa_only = 1;
+         else
+           return clib_error_return (0,
+                                     "Only lacp mode supports numa-only so far!");
+       }
       else
        return clib_error_return (0, "unknown input `%U'",
                                  format_unformat_error, input);
@@ -378,7 +478,7 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
 VLIB_CLI_COMMAND (bond_create_command, static) = {
   .path = "create bond",
   .short_help = "create bond mode {round-robin | active-backup | broadcast | "
-    "{lacp | xor} [load-balance { l2 | l23 | l34 }]} [hw-addr <mac-address>] "
+    "{lacp | xor} [load-balance { l2 | l23 | l34 } {numa-only}]} [hw-addr <mac-address>] "
     "[id <if-id>]",
   .function = bond_create_command_fn,
 };