X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fbonding%2Fcli.c;h=09ef10f1b0598c53b998bad6ac06e08b9f893570;hb=073d74d0b;hp=cb344c611c28363901d88f79018771d9cbb0d0fa;hpb=6865d3c2ac01d78a2f313c4fb3e74cf409328e58;p=vpp.git diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c index cb344c611c2..09ef10f1b05 100644 --- a/src/vnet/bonding/cli.c +++ b/src/vnet/bonding/cli.c @@ -20,6 +20,7 @@ #include #include #include +#include void bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) @@ -28,8 +29,6 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) bond_if_t *bif; int i; uword p; - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hw; u8 switching_active = 0; bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); @@ -39,53 +38,96 @@ bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) p = *vec_elt_at_index (bif->active_slaves, i); if (p == sif->sw_if_index) { - if (sif->sw_if_index == bif->sw_if_index_working) + if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && (i == 0) && + (vec_len (bif->active_slaves) > 1)) + /* deleting the active slave for active-backup */ + switching_active = 1; + vec_del1 (bif->active_slaves, i); + if (sif->lacp_enabled && bif->numa_only) { - switching_active = 1; - if (bif->mode == BOND_MODE_ACTIVE_BACKUP) - bif->is_local_numa = 0; + /* For lacp mode, if we check it is a slave on local numa node, + bif->n_numa_slaves should be decreased by 1 becasue the first + bif->n_numa_slaves are all slaves on local numa node */ + if (i < bif->n_numa_slaves) + { + bif->n_numa_slaves--; + ASSERT (bif->n_numa_slaves >= 0); + } } - vec_del1 (bif->active_slaves, i); - hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index); break; } } /* We get a new slave just becoming active */ - if ((bif->mode == BOND_MODE_ACTIVE_BACKUP) && switching_active) - { - if ((vec_len (bif->active_slaves) >= 1)) - { - /* scan all slaves and try to find the first slave with local numa node. */ - vec_foreach_index (i, bif->active_slaves) - { - p = *vec_elt_at_index (bif->active_slaves, i); - hw = vnet_get_sup_hw_interface (vnm, p); - if (vm->numa_node == hw->numa_node) - { - bif->sw_if_index_working = p; - bif->is_local_numa = 1; - vlib_process_signal_event (bm->vlib_main, - bond_process_node.index, - BOND_SEND_GARP_NA, - bif->hw_if_index); - break; - } - } - } + if (switching_active) + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); + clib_spinlock_unlock_if_init (&bif->lockp); +} + +/* + * return 1 if s2 is preferred. + * return -1 if s1 is preferred. + */ +static int +bond_slave_sort (void *a1, void *a2) +{ + u32 *s1 = a1; + u32 *s2 = a2; + slave_if_t *sif1 = bond_get_slave_by_sw_if_index (*s1); + slave_if_t *sif2 = bond_get_slave_by_sw_if_index (*s2); + bond_if_t *bif; - /* No local numa node is found in the active slave set. Use the first slave */ - if ((bif->is_local_numa == 0) && (vec_len (bif->active_slaves) >= 1)) + ALWAYS_ASSERT (sif1); + ALWAYS_ASSERT (sif2); + /* + * sort entries according to preference rules: + * 1. biggest weight + * 2. numa-node + * 3. current active slave (to prevent churning) + * 4. lowest sw_if_index (for deterministic behavior) + * + */ + if (sif2->weight > sif1->weight) + return 1; + if (sif2->weight < sif1->weight) + return -1; + else + { + if (sif2->is_local_numa > sif1->is_local_numa) + return 1; + if (sif2->is_local_numa < sif1->is_local_numa) + return -1; + else { - p = *vec_elt_at_index (bif->active_slaves, 0); - bif->sw_if_index_working = p; - vlib_process_signal_event (bm->vlib_main, bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); + bif = bond_get_master_by_dev_instance (sif1->bif_dev_instance); + /* Favor the current active slave to avoid churning */ + if (bif->active_slaves[0] == sif2->sw_if_index) + return 1; + if (bif->active_slaves[0] == sif1->sw_if_index) + return -1; + /* go for the tiebreaker as the last resort */ + if (sif1->sw_if_index > sif2->sw_if_index) + return 1; + if (sif1->sw_if_index < sif2->sw_if_index) + return -1; + else + ASSERT (0); } } - clib_spinlock_unlock_if_init (&bif->lockp); + return 0; +} + +static void +bond_sort_slaves (bond_if_t * bif) +{ + bond_main_t *bm = &bond_main; + u32 old_active = bif->active_slaves[0]; - return; + vec_sort_with_function (bif->active_slaves, bond_slave_sort); + if (old_active != bif->active_slaves[0]) + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); } void @@ -100,50 +142,35 @@ bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif) bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); clib_spinlock_lock_if_init (&bif->lockp); - if (!hash_get (bif->active_slave_by_sw_if_index, sif->sw_if_index)) - { - hash_set (bif->active_slave_by_sw_if_index, sif->sw_if_index, - sif->sw_if_index); - vec_add1 (bif->active_slaves, sif->sw_if_index); + vec_foreach_index (i, bif->active_slaves) + { + p = *vec_elt_at_index (bif->active_slaves, i); + if (p == sif->sw_if_index) + goto done; + } - /* First slave becomes active? */ - if ((vec_len (bif->active_slaves) == 1) && - (bif->mode == BOND_MODE_ACTIVE_BACKUP)) - { - bif->sw_if_index_working = sif->sw_if_index; - bif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0; - vlib_process_signal_event (bm->vlib_main, bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); - } - else if ((vec_len (bif->active_slaves) > 1) - && (bif->mode == BOND_MODE_ACTIVE_BACKUP) - && bif->is_local_numa == 0) - { - if (vm->numa_node == hw->numa_node) - { - vec_foreach_index (i, bif->active_slaves) - { - p = *vec_elt_at_index (bif->active_slaves, 0); - if (p == sif->sw_if_index) - break; - - vec_del1 (bif->active_slaves, 0); - hash_unset (bif->active_slave_by_sw_if_index, p); - vec_add1 (bif->active_slaves, p); - hash_set (bif->active_slave_by_sw_if_index, p, p); - } - bif->sw_if_index_working = sif->sw_if_index; - bif->is_local_numa = 1; - vlib_process_signal_event (bm->vlib_main, - bond_process_node.index, - BOND_SEND_GARP_NA, bif->hw_if_index); + if (sif->lacp_enabled && bif->numa_only && (vm->numa_node == hw->numa_node)) + { + vec_insert_elts (bif->active_slaves, &sif->sw_if_index, 1, + bif->n_numa_slaves); + bif->n_numa_slaves++; + } + else + vec_add1 (bif->active_slaves, sif->sw_if_index); - } - } + sif->is_local_numa = (vm->numa_node == hw->numa_node) ? 1 : 0; + if (bif->mode == BOND_MODE_ACTIVE_BACKUP) + { + if (vec_len (bif->active_slaves) == 1) + /* First slave becomes active? */ + vlib_process_signal_event (bm->vlib_main, bond_process_node.index, + BOND_SEND_GARP_NA, bif->hw_if_index); + else + bond_sort_slaves (bif); } - clib_spinlock_unlock_if_init (&bif->lockp); - return; +done: + clib_spinlock_unlock_if_init (&bif->lockp); } int @@ -165,9 +192,12 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs) hi = vnet_get_hw_interface (vnm, bif->hw_if_index); clib_memcpy(bondif->interface_name, hi->name, MIN (ARRAY_LEN (bondif->interface_name) - 1, - strlen ((const char *) hi->name))); + vec_len ((const char *) hi->name))); + /* enforce by memset() above */ + ASSERT(0 == bondif->interface_name[ARRAY_LEN (bondif->interface_name) - 1]); bondif->mode = bif->mode; bondif->lb = bif->lb; + bondif->numa_only = bif->numa_only; bondif->active_slaves = vec_len (bif->active_slaves); bondif->slaves = vec_len (bif->slaves); ); @@ -206,10 +236,16 @@ bond_dump_slave_ifs (slave_interface_details_t ** out_slaveifs, hi = vnet_get_hw_interface (vnm, sw->hw_if_index); clib_memcpy (slaveif->interface_name, hi->name, MIN (ARRAY_LEN (slaveif->interface_name) - 1, - strlen ((const char *) hi->name))); + vec_len ((const char *) hi->name))); + /* enforce by memset() above */ + ASSERT (0 == + slaveif->interface_name[ARRAY_LEN (slaveif->interface_name) - + 1]); slaveif->sw_if_index = sif->sw_if_index; slaveif->is_passive = sif->is_passive; slaveif->is_long_timeout = sif->is_long_timeout; + slaveif->is_local_numa = sif->is_local_numa; + slaveif->weight = sif->weight; } } *out_slaveifs = r_slaveifs; @@ -217,6 +253,31 @@ bond_dump_slave_ifs (slave_interface_details_t ** out_slaveifs, return 0; } +/* + * Manage secondary mac addresses when attaching/detaching a slave. + * If adding, copies any secondary addresses from master to slave + * If deleting, deletes the master's secondary addresses from the slave + * + */ +static void +bond_slave_add_del_mac_addrs (bond_if_t * bif, u32 sif_sw_if_index, u8 is_add) +{ + vnet_main_t *vnm = vnet_get_main (); + ethernet_interface_t *b_ei; + mac_address_t *sec_mac; + vnet_hw_interface_t *s_hwif; + + b_ei = ethernet_get_interface (ðernet_main, bif->hw_if_index); + if (!b_ei || !b_ei->secondary_addrs) + return; + + s_hwif = vnet_get_sup_hw_interface (vnm, sif_sw_if_index); + + vec_foreach (sec_mac, b_ei->secondary_addrs) + vnet_hw_interface_add_del_mac_address (vnm, s_hwif->hw_if_index, + sec_mac->bytes, is_add); +} + static void bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, slave_if_t * sif) { @@ -246,15 +307,27 @@ bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, slave_if_t * sif) bond_disable_collecting_distributing (vm, sif); vnet_feature_enable_disable ("device-input", "bond-input", - sif_hw->hw_if_index, 0, 0, 0); + sif->sw_if_index, 0, 0, 0); /* Put back the old mac */ vnet_hw_interface_change_mac_address (vnm, sif_hw->hw_if_index, sif->persistent_hw_address); + /* delete the bond's secondary/virtual mac addrs from the slave */ + bond_slave_add_del_mac_addrs (bif, sif->sw_if_index, 0 /* is_add */ ); + + if ((bif->mode == BOND_MODE_LACP) && bm->lacp_enable_disable) (*bm->lacp_enable_disable) (vm, bif, sif, 0); + if (bif->mode == BOND_MODE_LACP) + { + stat_segment_deregister_state_counter + (bm->stats[bif->sw_if_index][sif->sw_if_index].actor_state); + stat_segment_deregister_state_counter + (bm->stats[bif->sw_if_index][sif->sw_if_index].partner_state); + } + pool_put (bm->neighbors, sif); } @@ -267,8 +340,7 @@ bond_delete_if (vlib_main_t * vm, u32 sw_if_index) slave_if_t *sif; vnet_hw_interface_t *hw; u32 *sif_sw_if_index; - u32 **s_list = 0; - u32 i; + u32 *s_list = 0; hw = vnet_get_sup_hw_interface (vnm, sw_if_index); if (hw == NULL || bond_dev_class.index != hw->dev_class_index) @@ -276,21 +348,14 @@ bond_delete_if (vlib_main_t * vm, u32 sw_if_index) bif = bond_get_master_by_dev_instance (hw->dev_instance); - vec_foreach (sif_sw_if_index, bif->slaves) + vec_append (s_list, bif->slaves); + vec_foreach (sif_sw_if_index, s_list) { - vec_add1 (s_list, sif_sw_if_index); + sif = bond_get_slave_by_sw_if_index (*sif_sw_if_index); + if (sif) + bond_delete_neighbor (vm, bif, sif); } - - for (i = 0; i < vec_len (s_list); i++) - { - sif_sw_if_index = s_list[i]; - sif = bond_get_slave_by_sw_if_index (*sif_sw_if_index); - if (sif) - bond_delete_neighbor (vm, bif, sif); - } - - if (s_list) - vec_free (s_list); + vec_free (s_list); /* bring down the interface */ vnet_hw_interface_set_flags (vnm, bif->hw_if_index, 0); @@ -314,6 +379,7 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args) vnet_main_t *vnm = vnet_get_main (); vnet_sw_interface_t *sw; bond_if_t *bif; + vnet_hw_interface_t *hw; if ((args->mode == BOND_MODE_LACP) && bm->lacp_plugin_loaded == 0) { @@ -339,6 +405,7 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args) bif->id = args->id; bif->lb = args->lb; bif->mode = args->mode; + bif->gso = args->gso; // Adjust requested interface id if (bif->id == ~0) @@ -388,6 +455,11 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args) sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index); bif->sw_if_index = sw->sw_if_index; bif->group = bif->sw_if_index; + bif->numa_only = args->numa_only; + + hw = vnet_get_hw_interface (vnm, bif->hw_if_index); + hw->flags |= (VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO | + VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD); if (vlib_get_thread_main ()->n_vlib_mains > 1) clib_spinlock_init (&bif->lockp); @@ -398,6 +470,7 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args) // for return args->sw_if_index = bif->sw_if_index; + args->rv = 0; } static clib_error_t * @@ -415,6 +488,7 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.id = ~0; args.mode = -1; args.lb = BOND_LB_L2; + args.rv = -1; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "mode %U", unformat_bond_mode, &args.mode)) @@ -428,6 +502,16 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.hw_addr_set = 1; else if (unformat (line_input, "id %u", &args.id)) ; + else if (unformat (line_input, "gso")) + args.gso = 1; + else if (unformat (line_input, "numa-only")) + { + if (args.mode == BOND_MODE_LACP) + args.numa_only = 1; + else + return clib_error_return (0, + "Only lacp mode supports numa-only so far!"); + } else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); @@ -439,6 +523,10 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input, bond_create_if (vm, &args); + if (!args.rv) + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, + vnet_get_main (), args.sw_if_index); + return args.error; } @@ -446,8 +534,8 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input, VLIB_CLI_COMMAND (bond_create_command, static) = { .path = "create bond", .short_help = "create bond mode {round-robin | active-backup | broadcast | " - "{lacp | xor} [load-balance { l2 | l23 | l34 }]} [hw-addr ] " - "[id ]", + "{lacp | xor} [load-balance { l2 | l23 | l34 } [numa-only]]} " + "[hw-addr ] [id ] [gso]", .function = bond_create_command_fn, }; /* *INDENT-ON* */ @@ -535,10 +623,48 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args) clib_error_return (0, "bond interface cannot be enslaved"); return; } + if (bif->gso && !(sif_hw->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = + clib_error_return (0, "slave interface is not gso capable"); + return; + } + if (bif->mode == BOND_MODE_LACP) + { + u8 *name = format (0, "/if/lacp/%u/%u/state%c", bif->sw_if_index, + args->slave, 0); + + vec_validate (bm->stats, bif->sw_if_index); + vec_validate (bm->stats[bif->sw_if_index], args->slave); + + args->error = stat_segment_register_state_counter + (name, &bm->stats[bif->sw_if_index][args->slave].actor_state); + if (args->error != 0) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + vec_free (name); + return; + } + + vec_reset_length (name); + name = format (0, "/if/lacp/%u/%u/partner-state%c", bif->sw_if_index, + args->slave, 0); + args->error = stat_segment_register_state_counter + (name, &bm->stats[bif->sw_if_index][args->slave].partner_state); + vec_free (name); + if (args->error != 0) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + return; + } + } + pool_get (bm->neighbors, sif); clib_memset (sif, 0, sizeof (*sif)); sw = pool_elt_at_index (im->sw_interfaces, args->slave); - sif->port_enabled = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP; + /* port_enabled is both admin up and hw link up */ + sif->port_enabled = vnet_sw_interface_is_up (vnm, sw->sw_if_index); sif->sw_if_index = sw->sw_if_index; sif->hw_if_index = sw->hw_if_index; sif->packet_template_index = (u8) ~ 0; @@ -591,6 +717,9 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args) } } + /* if there are secondary/virtual mac addrs, propagate to the slave */ + bond_slave_add_del_mac_addrs (bif, sif->sw_if_index, 1 /* is_add */ ); + if (bif_hw->l2_if_count) { ethernet_set_flags (vnm, sif_hw->hw_if_index, @@ -604,8 +733,7 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args) if (bm->lacp_enable_disable) (*bm->lacp_enable_disable) (vm, bif, sif, 1); } - else if (sif->port_enabled && - (sif_hw->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + else if (sif->port_enabled) { bond_enable_collecting_distributing (vm, sif); } @@ -625,7 +753,7 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args) } args->rv = vnet_feature_enable_disable ("device-input", "bond-input", - sif_hw->hw_if_index, 1, 0, 0); + sif->sw_if_index, 1, 0, 0); if (args->rv) { @@ -788,6 +916,8 @@ show_bond_details (vlib_main_t * vm) format_bond_mode, bif->mode); vlib_cli_output (vm, " load balance: %U", format_bond_load_balance, bif->lb); + if (bif->gso) + vlib_cli_output (vm, " gso enable"); if (bif->mode == BOND_MODE_ROUND_ROBIN) vlib_cli_output (vm, " last xmit slave index: %u", bif->lb_rr_last_index); @@ -797,6 +927,14 @@ show_bond_details (vlib_main_t * vm) { vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnet_get_main (), *sw_if_index); + if (bif->mode == BOND_MODE_ACTIVE_BACKUP) + { + slave_if_t *sif = bond_get_slave_by_sw_if_index (*sw_if_index); + if (sif) + vlib_cli_output (vm, " weight: %u, is_local_numa: %u, " + "sw_if_index: %u", sif->weight, + sif->is_local_numa, sif->sw_if_index); + } } vlib_cli_output (vm, " number of slaves: %d", vec_len (bif->slaves)); vec_foreach (sw_if_index, bif->slaves) @@ -845,6 +983,113 @@ VLIB_CLI_COMMAND (show_bond_command, static) = { }; /* *INDENT-ON* */ +void +bond_set_intf_weight (vlib_main_t * vm, bond_set_intf_weight_args_t * args) +{ + slave_if_t *sif; + bond_if_t *bif; + vnet_main_t *vnm; + u32 old_weight; + + sif = bond_get_slave_by_sw_if_index (args->sw_if_index); + if (!sif) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = clib_error_return (0, "Interface not enslaved"); + return; + } + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); + if (!bif) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = clib_error_return (0, "bond interface not found"); + return; + } + if (bif->mode != BOND_MODE_ACTIVE_BACKUP) + { + args->rv = VNET_API_ERROR_INVALID_ARGUMENT; + args->error = + clib_error_return (0, "Weight valid for active-backup only"); + return; + } + + old_weight = sif->weight; + sif->weight = args->weight; + vnm = vnet_get_main (); + /* + * No need to sort the list if the affected slave is not up (not in active + * slave set), active slave count is 1, or the current slave is already the + * primary slave and new weight > old weight. + */ + if (!vnet_sw_interface_is_up (vnm, sif->sw_if_index) || + (vec_len (bif->active_slaves) == 1) || + ((bif->active_slaves[0] == sif->sw_if_index) && + (sif->weight >= old_weight))) + return; + + bond_sort_slaves (bif); +} + +static clib_error_t * +bond_set_intf_cmd (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + bond_set_intf_weight_args_t args = { 0 }; + u32 sw_if_index = (u32) ~ 0; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + u8 weight_enter = 0; + u32 weight = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "Missing required arguments."); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (line_input, "weight %u", &weight)) + weight_enter = 1; + else + { + clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + break; + } + } + + unformat_free (line_input); + if (sw_if_index == (u32) ~ 0) + { + args.rv = VNET_API_ERROR_INVALID_INTERFACE; + clib_error_return (0, "Interface name is invalid!"); + } + if (weight_enter == 0) + { + args.rv = VNET_API_ERROR_INVALID_ARGUMENT; + clib_error_return (0, "weight missing"); + } + + args.sw_if_index = sw_if_index; + args.weight = weight; + bond_set_intf_weight (vm, &args); + + return args.error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND(set_interface_bond_cmd, static) = { + .path = "set interface bond", + .short_help = "set interface bond | sw_if_index " + " weight ", + .function = bond_set_intf_cmd, +}; +/* *INDENT-ON* */ + clib_error_t * bond_cli_init (vlib_main_t * vm) {