X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=vnet%2Fvnet%2Fdevices%2Fdpdk%2Finit.c;h=8bb253a3a66be724923e8283e7ee0576a2de7276;hb=ad8b4728cbe40057f9a5809cdb0cd5adc629ad67;hp=a4b0f01475fda86df2b1f5ae1d8e5c2236f2145b;hpb=cb9cadad578297ffd78fa8a33670bdf1ab669e7e;p=vpp.git diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c index a4b0f01475f..8bb253a3a66 100644 --- a/vnet/vnet/devices/dpdk/init.c +++ b/vnet/vnet/devices/dpdk/init.c @@ -158,23 +158,6 @@ static u32 dpdk_flag_change (vnet_main_t * vnm, { int rv; - /* - * DAW-FIXME: The DPDK VMXNET3 driver does not currently support - * multi-buffer packets. Max out at 1518 bytes for now. - * - * If/when the driver gets fixed, then this should be - * removed. - */ - if ((xd->pmd == VNET_DPDK_PMD_VMXNET3) && - (hi->max_packet_bytes > 1518)) - { - hi->max_packet_bytes = 1518; - - vlib_cli_output (vlib_get_main(), - "VMXNET3 driver does not support jumbo frames " - "yet -- setting mtu to 1518!"); - } - xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; if (xd->admin_up) @@ -204,6 +187,32 @@ static u32 dpdk_flag_change (vnet_main_t * vnm, extern int rte_netmap_probe(void); #endif +void +dpdk_device_lock_init(dpdk_device_t * xd) +{ + int q; + vec_validate(xd->lockp, xd->tx_q_used - 1); + for (q = 0; q < xd->tx_q_used; q++) + { + xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); + } + xd->need_txlock = 1; +} + +void +dpdk_device_lock_free(dpdk_device_t * xd) +{ + int q; + + for (q = 0; q < vec_len(xd->lockp); q++) + clib_mem_free((void *) xd->lockp[q]); + vec_free(xd->lockp); + xd->lockp = 0; + xd->need_txlock = 0; +} + static clib_error_t * dpdk_lib_init (dpdk_main_t * dm) { @@ -304,8 +313,10 @@ dpdk_lib_init (dpdk_main_t * dm) memcpy(&xd->port_conf, &port_conf_template, sizeof(struct rte_eth_conf)); - xd->tx_q_used = dev_info.max_tx_queues < tm->n_vlib_mains ? - 1 : tm->n_vlib_mains; + xd->tx_q_used = clib_min(dev_info.max_tx_queues, tm->n_vlib_mains); + + if (dm->max_tx_queues) + xd->tx_q_used = clib_min(xd->tx_q_used, dm->max_tx_queues); if (dm->use_rss > 1 && dev_info.max_rx_queues >= dm->use_rss) { @@ -317,6 +328,12 @@ dpdk_lib_init (dpdk_main_t * dm) xd->rx_q_used = 1; xd->dev_type = VNET_DPDK_DEV_ETH; + + /* workaround for drivers not setting driver_name */ + if (!dev_info.driver_name) + dev_info.driver_name = dev_info.pci_dev->driver->name; + ASSERT(dev_info.driver_name); + if (!xd->pmd) { @@ -341,6 +358,7 @@ dpdk_lib_init (dpdk_main_t * dm) /* 10G adapters */ case VNET_DPDK_PMD_IXGBE: case VNET_DPDK_PMD_IXGBEVF: + case VNET_DPDK_PMD_THUNDERX: xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; @@ -349,7 +367,7 @@ dpdk_lib_init (dpdk_main_t * dm) /* Cisco VIC */ case VNET_DPDK_PMD_VICE: case VNET_DPDK_PMD_ENIC: - rte_eth_link_get_nowait(xd->device_index, &l); + rte_eth_link_get_nowait(i, &l); if (l.link_speed == 40000) { xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; @@ -382,7 +400,7 @@ dpdk_lib_init (dpdk_main_t * dm) xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; break; case I40E_DEV_ID_VF: - rte_eth_link_get_nowait(xd->device_index, &l); + rte_eth_link_get_nowait(i, &l); xd->port_type = l.link_speed == 10000 ? VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; break; @@ -391,6 +409,20 @@ dpdk_lib_init (dpdk_main_t * dm) } break; + case VNET_DPDK_PMD_CXGBE: + switch (dev_info.pci_dev->id.device_id) { + case 0x5410: /* T580-LP-cr */ + xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + default: + xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; + xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + /* Intel Red Rock Canyon */ case VNET_DPDK_PMD_FM10K: xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; @@ -416,6 +448,10 @@ dpdk_lib_init (dpdk_main_t * dm) xd->af_packet_port_id = af_packet_port_id++; break; + case VNET_DPDK_PMD_BOND: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; + break; + default: xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; } @@ -463,11 +499,7 @@ dpdk_lib_init (dpdk_main_t * dm) rte_eth_macaddr_get(i,(struct ether_addr *)addr); if (xd->tx_q_used < tm->n_vlib_mains) - { - xd->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) xd->lockp, 0, CLIB_CACHE_LINE_BYTES); - } + dpdk_device_lock_init(xd); xd->device_index = xd - dm->devices; ASSERT(i == xd->device_index); @@ -577,6 +609,7 @@ dpdk_lib_init (dpdk_main_t * dm) rte_eth_dev_set_mtu(xd->device_index, hi->max_packet_bytes); } +#ifdef RTE_LIBRTE_KNI if (dm->num_kni) { clib_warning("Initializing KNI interfaces..."); rte_kni_init(dm->num_kni); @@ -653,6 +686,7 @@ dpdk_lib_init (dpdk_main_t * dm) hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); } } +#endif if (nb_desc > dm->num_mbufs) clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", @@ -664,17 +698,6 @@ dpdk_lib_init (dpdk_main_t * dm) return 0; } -/* - * Tell the vlib physical memory allocator that we've handled - * the initialization. We don't actually do so until - * vlib_main(...) callls the dpdk config function. - */ -int vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, - int physmem_required) -{ - return 1; -} - static clib_error_t * write_sys_fs (char * file_name, char * fmt, ...) { @@ -841,25 +864,9 @@ static clib_error_t * dpdk_bind_eth_kernel_drivers (vlib_main_t * vm, */ if (bind_uio) { - int pci_vendor_id = strtol((char *) pci_vid, NULL, 16); - int pci_device_id = strtol((char *) pci_did, NULL, 16); - - /* - * Set PCI ID to ".../virtio-pci/new_id" for Intel fortvile adapaters - */ - if (pci_vendor_id == 0x8086 && - (pci_device_id == I40E_DEV_ID_10G_BASE_T || - pci_device_id == I40E_DEV_ID_SFP_XL710 || - pci_device_id == I40E_DEV_ID_QSFP_A || - pci_device_id == I40E_DEV_ID_QSFP_B || - pci_device_id == I40E_DEV_ID_QSFP_C)) - { - _vec_len (path) = 0; - path = format (path, "/sys/bus/pci/drivers/%s/new_id%c", driver_name, 0); - error = write_sys_fs ((char *) path, "%s %s", pci_vid, pci_did); - if (error) - continue; - } + _vec_len (path) = 0; + path = format (path, "/sys/bus/pci/drivers/%s/new_id%c", driver_name, 0); + error = write_sys_fs ((char *) path, "%s %s", pci_vid, pci_did); _vec_len (path) = 0; path = format (path, "/sys/bus/pci/drivers/%s/bind%c", driver_name, 0); @@ -883,33 +890,6 @@ static clib_error_t * dpdk_bind_eth_kernel_drivers (vlib_main_t * vm, return error; } -static uword -unformat_socket_mem (unformat_input_t * input, va_list * va) -{ - uword ** r = va_arg (* va, uword **); - int i = 0; - u32 mem; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, ",")) - hash_set (*r, i, 1024); - else if (unformat (input, "%u,", &mem)) - hash_set (*r, i, mem); - else if (unformat (input, "%u", &mem)) - hash_set (*r, i, mem); - else - { - unformat_put_input (input); - goto done; - } - i++; - } - -done: - return 1; -} - static u32 get_node_free_hugepages_num (u32 node, u32 page_size) { @@ -979,6 +959,12 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) no_huge = 1; } + else if (unformat (input, "enable-tcp-udp-checksum")) + { + dm->buffer_flags_template &= + ~(IP_BUFFER_L4_CHECKSUM_CORRECT | IP_BUFFER_L4_CHECKSUM_COMPUTED); + } + else if (unformat (input, "decimal-interface-names")) dm->interface_name_format_decimal = 1; @@ -1026,6 +1012,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "num-mbufs %d", &dm->num_mbufs)) ; + else if (unformat (input, "max-tx-queues %d", &dm->max_tx_queues)) + ; else if (unformat (input, "kni %d", &dm->num_kni)) ; else if (unformat (input, "uio-driver %s", &dm->uio_driver_name)) @@ -1141,11 +1129,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) u32 pages_avail; pages_avail = get_node_free_hugepages_num(c, 1048576); - if (!(pages_avail >= pages_num_1g)) + if (!pages_avail || !(pages_avail >= pages_num_1g)) use_1g = 0; pages_avail = get_node_free_hugepages_num(c, 2048); - if (!(pages_avail >= pages_num_2m)) + if (!pages_avail || !(pages_avail >= pages_num_2m)) use_2m = 0; } } @@ -1296,24 +1284,23 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) if (!dm->coremask_set_manually) { vlib_thread_registration_t * tr; - uword coremask; + uword * coremask = 0; int i; /* main thread core */ - coremask = 1 << tm->main_lcore; + coremask = clib_bitmap_set(coremask, tm->main_lcore, 1); for (i = 0; i < vec_len (tm->registrations); i++) { tr = tm->registrations[i]; - if (clib_bitmap_is_zero(tr->coremask)) - continue; - coremask |= tr->coremask[0]; + coremask = clib_bitmap_or(coremask, tr->coremask); } vec_insert (dm->eal_init_args, 2, 1); dm->eal_init_args[1] = (u8 *) "-c"; - tmp = format (0, "%x%c", coremask, 0); + tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); dm->eal_init_args[2] = tmp; + clib_bitmap_free(coremask); } if (!dm->nchannels_set_manually) @@ -1400,6 +1387,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vm = dm->vlib_main; + /* make copy of args as rte_eal_init tends to mess up with arg array */ + for (i = 1; i < vec_len(dm->eal_init_args); i++) + dm->eal_init_args_str = format(dm->eal_init_args_str, "%s ", + dm->eal_init_args[i]); + ret = rte_eal_init(vec_len(dm->eal_init_args), (char **) dm->eal_init_args); /* lazy umount hugepages */ @@ -1408,14 +1400,18 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) if (ret < 0) return clib_error_return (0, "rte_eal_init returned %d", ret); + /* Dump the physical memory layout prior to creating the mbuf_pool */ + fprintf(stdout, "DPDK physical memory layout:\n"); + rte_dump_physmem_layout(stdout); + /* main thread 1st */ - error = vlib_buffer_pool_create(vm, dm->num_mbufs, MBUF_SIZE, rte_socket_id()); + error = vlib_buffer_pool_create(vm, dm->num_mbufs, rte_socket_id()); if (error) return error; for (i = 0; i < RTE_MAX_LCORE; i++) { - error = vlib_buffer_pool_create(vm, dm->num_mbufs, MBUF_SIZE, + error = vlib_buffer_pool_create(vm, dm->num_mbufs, rte_lcore_to_socket_id(i)); if (error) return error; @@ -1544,7 +1540,9 @@ dpdk_process (vlib_main_t * vm, vlib_frame_t * f) { clib_error_t * error; + vnet_main_t * vnm = vnet_get_main(); dpdk_main_t * dm = &dpdk_main; + ethernet_main_t * em = ðernet_main; dpdk_device_t * xd; vlib_thread_main_t * tm = vlib_get_thread_main(); void *vu_state; @@ -1585,9 +1583,54 @@ dpdk_process (vlib_main_t * vm, dpdk_update_link_state (xd, now); } +{ // Setup MACs for bond interfaces and their links which was initialized in + // dpdk_port_setup() but needs to be done again here to take effect. + int nports = rte_eth_dev_count(); + if (nports > 0) { + for (i = 0; i < nports; i++) { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get(i, &dev_info); + if (!dev_info.driver_name) + dev_info.driver_name = dev_info.pci_dev->driver->name; + ASSERT(dev_info.driver_name); + if (strncmp(dev_info.driver_name, "rte_bond_pmd", 12) == 0) { + u8 addr[6]; + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get(i, slink, 16); + if (nlink > 0) { + vnet_hw_interface_t * hi; + ethernet_interface_t * ei; + /* Get MAC of 1st slave link */ + rte_eth_macaddr_get(slink[0], (struct ether_addr *)addr); + /* Set MAC of bounded interface to that of 1st slave link */ + rte_eth_bond_mac_address_set(i, (struct ether_addr *)addr); + /* Populate MAC of bonded interface in VPP hw tables */ + hi = vnet_get_hw_interface ( + vnm, dm->devices[i].vlib_hw_if_index); + ei = pool_elt_at_index (em->interfaces, hi->hw_instance); + memcpy (hi->hw_address, addr, 6); + memcpy (ei->address, addr, 6); + /* Add MAC to other slave links */ + while (nlink > 1) { + nlink--; + rte_eth_dev_mac_addr_add( + slink[nlink], (struct ether_addr *)addr, 0); + } + } + } + } + } +} + while (1) { - vlib_process_wait_for_event_or_clock (vm, 5.0); + /* + * check each time through the loop in case intervals are changed + */ + f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? + dm->link_state_poll_interval : dm->stat_poll_interval; + + vlib_process_wait_for_event_or_clock (vm, min_wait); if (dpdk_get_admin_up_down_in_progress()) /* skip the poll if an admin up down is in progress (on any interface) */ @@ -1596,9 +1639,9 @@ dpdk_process (vlib_main_t * vm, vec_foreach (xd, dm->devices) { f64 now = vlib_time_now (vm); - if ((now - xd->time_last_stats_update) >= DPDK_STATS_POLL_INTERVAL) + if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) dpdk_update_counters (xd, now); - if ((now - xd->time_last_link_update) >= DPDK_LINK_POLL_INTERVAL) + if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) dpdk_update_link_state (xd, now); if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) @@ -1619,6 +1662,26 @@ VLIB_REGISTER_NODE (dpdk_process_node,static) = { .process_log2_n_stack_bytes = 17, }; +int dpdk_set_stat_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_STATS_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.stat_poll_interval = interval; + + return 0; +} + +int dpdk_set_link_state_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_LINK_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.link_state_poll_interval = interval; + + return 0; +} + clib_error_t * dpdk_init (vlib_main_t * vm) { @@ -1684,6 +1747,30 @@ do { \ _(pmd_af_packet_drv) #endif +#ifdef RTE_LIBRTE_CXGBE_PMD + _(rte_cxgbe_driver) +#endif + +#ifdef RTE_LIBRTE_PMD_BOND + _(bond_drv) +#endif + +#undef _ + +/* + * At the moment, the ThunderX NIC driver doesn't have + * an entry point named "devinitfn_rte_xxx_driver" + */ +#define _(d) \ +do { \ + void d(void); \ + __attribute__((unused)) void (* volatile pf)(void); \ + pf = d; \ +} while(0); + +#ifdef RTE_LIBRTE_THUNDERVNIC_PMD +_(rte_nicvf_pmd_init) +#endif #undef _ dm->vlib_main = vm; @@ -1717,6 +1804,15 @@ do { \ dm->vhost_coalesce_frames = 32; dm->vhost_coalesce_time = 1e-3; + /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ + dm->buffer_flags_template = + (VLIB_BUFFER_TOTAL_LENGTH_VALID + | IP_BUFFER_L4_CHECKSUM_COMPUTED + | IP_BUFFER_L4_CHECKSUM_CORRECT); + + dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; + dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; + /* init CLI */ if ((error = vlib_call_init_function (vm, dpdk_cli_init))) return error;