X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fdpdk%2Fdevice%2Finit.c;h=8a7080352e7937eb7982c398c1a031340e9bc41b;hb=2c0a4f407f565d8dd33ff3a9fada346860d30ad2;hp=e20b25852cde62ce77ae83320aeec438e2b5f363;hpb=63c7e14f2e62caa1246349cfe341a93176ec4a43;p=vpp.git diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index e20b25852cd..8a7080352e7 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -37,8 +37,7 @@ dpdk_main_t dpdk_main; #define LINK_STATE_ELOGS 0 -#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" -#define VPP_RUN_DIR "/run/vpp" +#define DEFAULT_HUGE_DIR (VPP_RUN_DIR "/hugepages") /* Port configuration, mildly modified Intel app values */ @@ -55,81 +54,27 @@ static struct rte_eth_conf port_conf_template = { }, }; -clib_error_t * -dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) +static dpdk_port_type_t +port_type_from_speed_capa (struct rte_eth_dev_info *dev_info) { - int rv; - int j; - ASSERT (vlib_get_thread_index () == 0); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - } - - rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, - xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - /* Set up one TX-queue per worker thread */ - for (j = 0; j < xd->tx_q_used; j++) - { - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - xd->cpu_socket, &xd->tx_conf); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - SOCKET_ID_ANY, &xd->tx_conf); - if (rv < 0) - break; - } - - if (rv < 0) - return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", - xd->device_index, rv); - - for (j = 0; j < xd->rx_q_used; j++) - { - uword tidx = vnet_get_device_input_thread_index (dm->vnet_main, - xd->hw_if_index, j); - unsigned lcore = vlib_worker_threads[tidx].lcore_id; - u16 socket_id = rte_lcore_to_socket_id (lcore); - - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - xd->cpu_socket, 0, - dm->pktmbuf_pools[socket_id]); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - SOCKET_ID_ANY, 0, - dm->pktmbuf_pools[socket_id]); - if (rv < 0) - return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", - xd->device_index, rv); - } - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv; - rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - return 0; + if (dev_info->speed_capa & ETH_LINK_SPEED_100G) + return VNET_DPDK_PORT_TYPE_ETH_100G; + else if (dev_info->speed_capa & ETH_LINK_SPEED_50G) + return VNET_DPDK_PORT_TYPE_ETH_50G; + else if (dev_info->speed_capa & ETH_LINK_SPEED_40G) + return VNET_DPDK_PORT_TYPE_ETH_40G; + else if (dev_info->speed_capa & ETH_LINK_SPEED_25G) + return VNET_DPDK_PORT_TYPE_ETH_25G; + else if (dev_info->speed_capa & ETH_LINK_SPEED_10G) + return VNET_DPDK_PORT_TYPE_ETH_10G; + else if (dev_info->speed_capa & ETH_LINK_SPEED_1G) + return VNET_DPDK_PORT_TYPE_ETH_1G; + + return VNET_DPDK_PORT_TYPE_UNKNOWN; } + static u32 dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) { @@ -161,7 +106,7 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - rte_eth_dev_stop (xd->device_index); + dpdk_device_stop (xd); rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); @@ -174,16 +119,7 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } + dpdk_device_start (xd); } return old; @@ -260,8 +196,8 @@ dpdk_lib_init (dpdk_main_t * dm) "dpdk rx"); if (dm->conf->enable_tcp_udp_checksum) - dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT - | IP_BUFFER_L4_CHECKSUM_COMPUTED); + dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT + | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); /* vlib_buffer_t template */ vec_validate_aligned (dm->buffer_templates, tm->n_vlib_mains - 1, @@ -284,7 +220,6 @@ dpdk_lib_init (dpdk_main_t * dm) u8 vlan_strip = 0; int j; struct rte_eth_dev_info dev_info; - clib_error_t *rv; struct rte_eth_link l; dpdk_device_config_t *devconf = 0; vlib_pci_addr_t pci_addr; @@ -411,19 +346,34 @@ dpdk_lib_init (dpdk_main_t * dm) switch (xd->pmd) { - /* 1G adapters */ + /* Drivers with valid speed_capa set */ case VNET_DPDK_PMD_E1000EM: case VNET_DPDK_PMD_IGB: - case VNET_DPDK_PMD_IGBVF: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + case VNET_DPDK_PMD_IXGBE: + case VNET_DPDK_PMD_I40E: + xd->port_type = port_type_from_speed_capa (&dev_info); + xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD | + DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM; + + break; + case VNET_DPDK_PMD_CXGBE: + case VNET_DPDK_PMD_MLX4: + case VNET_DPDK_PMD_MLX5: + xd->port_type = port_type_from_speed_capa (&dev_info); break; - /* 10G adapters */ - case VNET_DPDK_PMD_IXGBE: + /* SR-IOV VFs */ + case VNET_DPDK_PMD_IGBVF: case VNET_DPDK_PMD_IXGBEVF: + case VNET_DPDK_PMD_I40EVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF; + xd->port_conf.rxmode.hw_strip_crc = 1; + break; + case VNET_DPDK_PMD_THUNDERX: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF; break; + case VNET_DPDK_PMD_DPAA2: xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; break; @@ -431,116 +381,16 @@ dpdk_lib_init (dpdk_main_t * dm) /* Cisco VIC */ case VNET_DPDK_PMD_ENIC: rte_eth_link_get_nowait (i, &l); - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; if (l.link_speed == 40000) xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; else xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; break; - /* Intel Fortville */ - case VNET_DPDK_PMD_I40E: - case VNET_DPDK_PMD_I40EVF: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - - switch (dev_info.pci_dev->id.device_id) - { - case I40E_DEV_ID_10G_BASE_T: - case I40E_DEV_ID_SFP_XL710: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case I40E_DEV_ID_QSFP_A: - case I40E_DEV_ID_QSFP_B: - case I40E_DEV_ID_QSFP_C: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case I40E_DEV_ID_VF: - rte_eth_link_get_nowait (i, &l); - xd->port_type = l.link_speed == 10000 ? - VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_CXGBE: - switch (dev_info.pci_dev->id.device_id) - { - case 0x540d: /* T580-CR */ - case 0x5410: /* T580-LP-cr */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case 0x5403: /* T540-CR */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_MLX5: - { - char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", - "MCX556A-ECAT", "MCX556A-EDAT", "MCX555A-ECAT", - "MCX515A-CCAT", "MCX516A-CCAT", "MCX516A-CDAT", 0 - }; - char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", - "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 - }; - char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; - - vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); - u8 *pn = 0; - char **c; - int found = 0; - pn = format (0, "%U%c", - format_vlib_pci_vpd, pd->vpd_r, "PN", 0); - - if (!pn) - break; - - c = pn_100g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; - break; - } - c++; - } - - c = pn_40g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - } - c++; - } - - c = pn_10g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - } - c++; - } - - vec_free (pn); - } - - break; /* Intel Red Rock Canyon */ case VNET_DPDK_PMD_FM10K: xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; + xd->port_conf.rxmode.hw_strip_crc = 1; break; /* virtio */ @@ -562,11 +412,14 @@ dpdk_lib_init (dpdk_main_t * dm) break; case VNET_DPDK_PMD_BOND: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; xd->port_id = bond_ether_port_id++; break; + case VNET_DPDK_PMD_VIRTIO_USER: + xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER; + break; + default: xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; } @@ -704,8 +557,8 @@ dpdk_lib_init (dpdk_main_t * dm) sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->hw_if_index); xd->vlib_sw_if_index = sw->sw_if_index; - vnet_set_device_input_node (dm->vnet_main, xd->hw_if_index, - dpdk_input_node.index); + vnet_hw_interface_set_input_node (dm->vnet_main, xd->hw_if_index, + dpdk_input_node.index); if (devconf->workers) { @@ -713,7 +566,7 @@ dpdk_lib_init (dpdk_main_t * dm) q = 0; /* *INDENT-OFF* */ clib_bitmap_foreach (i, devconf->workers, ({ - vnet_device_input_assign_thread (dm->vnet_main, xd->hw_if_index, q++, + vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q++, vdm->first_worker_thread_index + i); })); /* *INDENT-ON* */ @@ -721,19 +574,25 @@ dpdk_lib_init (dpdk_main_t * dm) else for (q = 0; q < xd->rx_q_used; q++) { - vnet_device_input_assign_thread (dm->vnet_main, xd->hw_if_index, q, /* any */ - ~1); + vnet_hw_interface_assign_rx_thread (dm->vnet_main, xd->hw_if_index, q, /* any */ + ~1); } hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index); - rv = dpdk_port_setup (dm, xd); + if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) + hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; - if (rv) - return rv; + dpdk_device_setup (xd); + + if (vec_len (xd->errors)) + clib_warning ("setup failed for device %U. Errors:\n %U", + format_dpdk_device_name, i, + format_dpdk_device_errors, xd); if (devconf->hqos_enabled) { + clib_error_t *rv; rv = dpdk_port_setup_hqos (xd, &devconf->hqos); if (rv) return rv; @@ -964,6 +823,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) dpdk_device_config_t *devconf; vlib_pci_addr_t pci_addr; unformat_input_t sub_input; + uword x; u8 *s, *tmp = 0; u8 *rte_cmd = 0, *ethname = 0; u32 log_level; @@ -976,6 +836,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) u8 *socket_mem = 0; conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); + log_level = RTE_LOG_NOTICE; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -992,6 +853,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "decimal-interface-names")) conf->interface_name_format_decimal = 1; + else if (unformat (input, "log-level %U", unformat_dpdk_log_level, &x)) + log_level = x; + else if (unformat (input, "no-multi-seg")) conf->no_multi_seg = 1; @@ -1028,8 +892,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) ; - else if (unformat (input, "kni %d", &conf->num_kni)) - ; else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) ; else if (unformat (input, "socket-mem %s", &socket_mem)) @@ -1194,13 +1056,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_free (mem_by_socket); - rv = mkdir (VPP_RUN_DIR, 0755); - if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - VPP_RUN_DIR, errno); - goto done; - } + /* Make sure VPP_RUN_DIR exists */ + error = unix_make_vpp_run_dir (); + if (error) + goto done; rv = mkdir (DEFAULT_HUGE_DIR, 0755); if (rv && errno != EEXIST) @@ -1331,9 +1190,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) /* Set up DPDK eal and packet mbuf pool early. */ - log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; - +#if RTE_VERSION >= RTE_VERSION_NUM(17, 5, 0, 0) + rte_log_set_global_level (log_level); +#else rte_set_log_level (log_level); +#endif vm = vlib_get_main (); @@ -1357,13 +1218,13 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) rte_dump_physmem_layout (stdout); /* main thread 1st */ - error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); + error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); if (error) return error; for (i = 0; i < RTE_MAX_LCORE; i++) { - error = vlib_buffer_pool_create (vm, conf->num_mbufs, + error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_lcore_to_socket_id (i)); if (error) return error; @@ -1415,9 +1276,9 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now) ed->new_link_state = (u8) xd->link.link_status; } - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && - ((xd->link.link_status != 0) ^ - vnet_hw_interface_is_link_up (vnm, xd->hw_if_index))) + if ((xd->flags & (DPDK_DEVICE_FLAG_ADMIN_UP | DPDK_DEVICE_FLAG_BOND_SLAVE)) + && ((xd->link.link_status != 0) ^ + vnet_hw_interface_is_link_up (vnm, xd->hw_if_index))) { hw_flags_chg = 1; hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); @@ -1518,8 +1379,10 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* * Extra set up for bond interfaces: * 1. Setup MACs for bond interfaces and their slave links which was set - * in dpdk_port_setup() but needs to be done again here to take effect. - * 2. Set up info for bond interface related CLI support. + * in dpdk_device_setup() but needs to be done again here to take + * effect. + * 2. Set up info and register slave link state change callback handling. + * 3. Set up info for bond interface related CLI support. */ int nports = rte_eth_dev_count (); if (nports > 0) @@ -1544,7 +1407,8 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) (slink[0], (struct ether_addr *) addr); /* Set MAC of bounded interface to that of 1st slave link */ - clib_warning ("Set MAC for bond dev# %d", i); + clib_warning ("Set MAC for bond port %d BondEthernet%d", + i, xd->port_id); rv = rte_eth_bond_mac_address_set (i, (struct ether_addr *) addr); if (rv) @@ -1573,34 +1437,38 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* Add MAC to all slave links except the first one */ if (nlink) { - clib_warning ("Add MAC for slave dev# %d", slave); + clib_warning ("Add MAC for slave port %d", slave); rv = rte_eth_dev_mac_addr_add (slave, (struct ether_addr *) addr, 0); if (rv) clib_warning ("Add MAC addr failure rv=%d", rv); } + /* Setup slave link state change callback handling */ + rte_eth_dev_callback_register + (slave, RTE_ETH_EVENT_INTR_LSC, + dpdk_port_state_callback, NULL); + dpdk_device_t *sxd = &dm->devices[slave]; + sxd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE; + sxd->bond_port = i; /* Set slaves bitmap for bonded interface */ bhi->bond_info = clib_bitmap_set (bhi->bond_info, sdev->hw_if_index, 1); - /* Set slave link flags on slave interface */ + /* Set MACs and slave link flags on slave interface */ shi = vnet_get_hw_interface (vnm, sdev->hw_if_index); ssi = vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); sei = pool_elt_at_index (em->interfaces, shi->hw_instance); - shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; clib_memcpy (shi->hw_address, addr, 6); clib_memcpy (sei->address, addr, 6); - /* Set l3 packet size allowed as the lowest of slave */ if (bhi->max_l3_packet_bytes[VLIB_RX] > shi->max_l3_packet_bytes[VLIB_RX]) bhi->max_l3_packet_bytes[VLIB_RX] = bhi->max_l3_packet_bytes[VLIB_TX] = shi->max_l3_packet_bytes[VLIB_RX]; - /* Set max packet size allowed as the lowest of slave */ if (bhi->max_packet_bytes > shi->max_packet_bytes) bhi->max_packet_bytes = shi->max_packet_bytes; @@ -1679,17 +1547,13 @@ dpdk_init (vlib_main_t * vm) dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); - dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); - - /* $$$ use n_thread_stacks since it's known-good at this point */ vec_validate (dm->recycle, tm->n_thread_stacks - 1); /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID - | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); + | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | + VNET_BUFFER_F_L4_CHECKSUM_CORRECT); dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;