X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fdpdk%2Fdevice%2Finit.c;h=20272853035698b5079cb40c2041d4b6e37bbfc5;hb=cef87f1a5eb4d69cf11ce1cd3c5506edcfba74c4;hp=ee61f94e39953af9b765748a849669a626c3c4ad;hpb=01914ce45729833cec88c65689de9a0336cd40cc;p=vpp.git diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index ee61f94e399..20272853035 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -24,6 +24,8 @@ #include #include +#include + #include #include #include @@ -35,6 +37,7 @@ #include dpdk_main_t dpdk_main; +dpdk_config_main_t dpdk_config_main; #define LINK_STATE_ELOGS 0 @@ -100,26 +103,8 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) } else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) { - int rv; - xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - dpdk_device_stop (xd); - - rv = rte_eth_dev_configure - (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - vlib_cli_output (vlib_get_main (), - "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - dpdk_device_start (xd); - + dpdk_device_setup (xd); } return old; } @@ -137,6 +122,60 @@ dpdk_device_lock_init (dpdk_device_t * xd) } } +static struct rte_mempool_ops * +get_ops_by_name (i8 * ops_name) +{ + u32 i; + + for (i = 0; i < rte_mempool_ops_table.num_ops; i++) + { + if (!strcmp (ops_name, rte_mempool_ops_table.ops[i].name)) + return &rte_mempool_ops_table.ops[i]; + } + + return 0; +} + +static int +dpdk_ring_alloc (struct rte_mempool *mp) +{ + u32 rg_flags = 0, count; + i32 ret; + i8 rg_name[RTE_RING_NAMESIZE]; + struct rte_ring *r; + + ret = snprintf (rg_name, sizeof (rg_name), RTE_MEMPOOL_MZ_FORMAT, mp->name); + if (ret < 0 || ret >= (i32) sizeof (rg_name)) + return -ENAMETOOLONG; + + /* ring flags */ + if (mp->flags & MEMPOOL_F_SP_PUT) + rg_flags |= RING_F_SP_ENQ; + if (mp->flags & MEMPOOL_F_SC_GET) + rg_flags |= RING_F_SC_DEQ; + + count = rte_align32pow2 (mp->size + 1); + /* + * Allocate the ring that will be used to store objects. + * Ring functions will return appropriate errors if we are + * running as a secondary process etc., so no checks made + * in this function for that condition. + */ + /* XXX can we get memory from the right socket? */ + r = clib_mem_alloc_aligned (rte_ring_get_memsize (count), + CLIB_CACHE_LINE_BYTES); + + /* XXX rte_ring_lookup will not work */ + + ret = rte_ring_init (r, rg_name, count, rg_flags); + if (ret) + return ret; + + mp->pool_data = r; + + return 0; +} + static clib_error_t * dpdk_lib_init (dpdk_main_t * dm) { @@ -209,7 +248,6 @@ dpdk_lib_init (dpdk_main_t * dm) VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); vlib_buffer_init_for_free_list (bt, fl); bt->flags = dm->buffer_flags_template; - bt->current_data = -RTE_PKTMBUF_HEADROOM; vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0; } @@ -275,6 +313,10 @@ dpdk_lib_init (dpdk_main_t * dm) clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, sizeof (struct rte_eth_txconf)); + + if (dm->conf->no_tx_checksum_offload == 0) + xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOXSUMS; + if (dm->conf->no_multi_seg) { xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; @@ -298,11 +340,6 @@ dpdk_lib_init (dpdk_main_t * dm) && devconf->num_tx_queues < xd->tx_q_used) xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); - if (devconf->num_rx_queues > 1 && dm->use_rss == 0) - { - dm->use_rss = 1; - } - if (devconf->num_rx_queues > 1 && dev_info.max_rx_queues >= devconf->num_rx_queues) { @@ -420,10 +457,6 @@ dpdk_lib_init (dpdk_main_t * dm) xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER; break; - case VNET_DPDK_PMD_VHOST_ETHER: - xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER; - break; - default: xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; } @@ -489,34 +522,26 @@ dpdk_lib_init (dpdk_main_t * dm) xd->per_interface_next_index = ~0; /* assign interface to input thread */ - dpdk_device_and_queue_t *dq; int q; if (devconf->hqos_enabled) { xd->flags |= DPDK_DEVICE_FLAG_HQOS; + int cpu; if (devconf->hqos.hqos_thread_valid) { - int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; - if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) return clib_error_return (0, "invalid HQoS thread index"); - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; + cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; } else { - int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; - if (dm->hqos_cpu_count == 0) return clib_error_return (0, "no HQoS threads available"); - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; + cpu = dm->hqos_cpu_first_index + next_hqos_cpu; next_hqos_cpu++; if (next_hqos_cpu == dm->hqos_cpu_count) @@ -525,6 +550,11 @@ dpdk_lib_init (dpdk_main_t * dm) devconf->hqos.hqos_thread_valid = 1; devconf->hqos.hqos_thread = cpu; } + + dpdk_device_and_queue_t *dq; + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; } vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, @@ -584,8 +614,9 @@ dpdk_lib_init (dpdk_main_t * dm) hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index); - if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) - hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; + if (dm->conf->no_tx_checksum_offload == 0) + if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) + hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; dpdk_device_setup (xd); @@ -646,24 +677,37 @@ dpdk_lib_init (dpdk_main_t * dm) static void dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) { - vlib_pci_main_t *pm = &pci_main; clib_error_t *error; - vlib_pci_device_t *d; u8 *pci_addr = 0; int num_whitelisted = vec_len (conf->dev_confs); + vlib_pci_device_info_t *d = 0; + vlib_pci_addr_t *addr = 0, *addrs; + addrs = vlib_pci_get_all_dev_addrs (); /* *INDENT-OFF* */ - pool_foreach (d, pm->pci_devs, ({ + vec_foreach (addr, addrs) + { dpdk_device_config_t * devconf = 0; vec_reset_length (pci_addr); - pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); + pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, addr, 0); + if (d) + { + vlib_pci_free_device_info (d); + d = 0; + } + d = vlib_pci_get_device_info (addr, &error); + if (error) + { + clib_error_report (error); + continue; + } if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) continue; if (num_whitelisted) { - uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); + uword * p = hash_get (conf->device_config_index_by_pci_addr, addr->as_u32); if (!p) continue; @@ -672,7 +716,9 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) } /* virtio */ - if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) + if (d->vendor_id == 0x1af4 && + (d->device_id == VIRTIO_PCI_LEGACY_DEVICEID_NET || + d->device_id == VIRTIO_PCI_MODERN_DEVICEID_NET)) ; /* vmxnet3 */ else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) @@ -692,7 +738,9 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) ; /* Mellanox */ else if (d->vendor_id == 0x15b3 && d->device_id >= 0x1013 && d->device_id <= 0x101a) - ; + { + continue; + } else { clib_warning ("Unsupported PCI device 0x%04x:0x%04x found " @@ -701,23 +749,24 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) continue; } - error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); + error = vlib_pci_bind_to_uio (addr, (char *) conf->uio_driver_name); if (error) { if (devconf == 0) { pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, + hash_set (conf->device_config_index_by_pci_addr, addr->as_u32, devconf - conf->dev_confs); - devconf->pci_addr.as_u32 = d->bus_address.as_u32; + devconf->pci_addr.as_u32 = addr->as_u32; } devconf->is_blacklisted = 1; clib_error_report (error); } - })); + } /* *INDENT-ON* */ vec_free (pci_addr); + vlib_pci_free_device_info (d); } static clib_error_t * @@ -861,6 +910,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "enable-tcp-udp-checksum")) conf->enable_tcp_udp_checksum = 1; + else if (unformat (input, "no-tx-checksum-offload")) + conf->no_tx_checksum_offload = 1; + else if (unformat (input, "decimal-interface-names")) conf->interface_name_format_decimal = 1; @@ -985,9 +1037,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) { u32 x, *mem_by_socket = 0; uword c = 0; - u8 use_1g = 1; - u8 use_2m = 1; - u8 less_than_1g = 1; int rv; umount ((char *) huge_dir_path); @@ -1009,9 +1058,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) break; vec_add1 (mem_by_socket, x); - - if (x > 1023) - less_than_1g = 0; } /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ unformat_free (&in); @@ -1023,39 +1069,22 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { vec_validate(mem_by_socket, c); - mem_by_socket[c] = 256; /* default per-socket mem */ + mem_by_socket[c] = 64; /* default per-socket mem */ } )); /* *INDENT-ON* */ } - /* check if available enough 1GB pages for each socket */ /* *INDENT-OFF* */ clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { - int pages_avail, page_size, mem; - clib_error_t *e = 0; + clib_error_t *e; vec_validate(mem_by_socket, c); - mem = mem_by_socket[c]; - - page_size = 1024; - e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); - - if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) - use_1g = 0; + e = clib_sysfs_prealloc_hugepages(c, 2 << 10, mem_by_socket[c] / 2); if (e) - clib_error_free (e); - - page_size = 2; - e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); - - if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) - use_2m = 0; - - if (e) - clib_error_free (e); + clib_error_report (e); })); /* *INDENT-ON* */ @@ -1080,19 +1109,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) goto done; } - if (use_1g && !(less_than_1g && use_2m)) - { - rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, - "pagesize=1G"); - } - else if (use_2m) - { - rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL); - } - else - { - return clib_error_return (0, "not enough free huge pages"); - } + rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL); if (rv) { @@ -1210,6 +1227,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", conf->eal_init_args[i]); + clib_warning ("EAL init args: %s", conf->eal_init_args_str); ret = rte_eal_init (vec_len (conf->eal_init_args), (char **) conf->eal_init_args); @@ -1226,6 +1244,23 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) fprintf (stdout, "DPDK physical memory layout:\n"); rte_dump_physmem_layout (stdout); + /* set custom ring memory allocator */ + { + struct rte_mempool_ops *ops = NULL; + + ops = get_ops_by_name ("ring_sp_sc"); + ops->alloc = dpdk_ring_alloc; + + ops = get_ops_by_name ("ring_mp_sc"); + ops->alloc = dpdk_ring_alloc; + + ops = get_ops_by_name ("ring_sp_mc"); + ops->alloc = dpdk_ring_alloc; + + ops = get_ops_by_name ("ring_mp_mc"); + ops->alloc = dpdk_ring_alloc; + } + /* main thread 1st */ error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); if (error) @@ -1403,7 +1438,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) if (xd->pmd == VNET_DPDK_PMD_BOND) { u8 addr[6]; - u8 slink[16]; + dpdk_portid_t slink[16]; int nlink = rte_eth_bond_slaves_get (i, slink, 16); if (nlink > 0) { @@ -1529,7 +1564,6 @@ static clib_error_t * dpdk_init (vlib_main_t * vm) { dpdk_main_t *dm = &dpdk_main; - vlib_node_t *ei; clib_error_t *error = 0; vlib_thread_main_t *tm = vlib_get_thread_main (); @@ -1546,12 +1580,6 @@ dpdk_init (vlib_main_t * vm) dm->vnet_main = vnet_get_main (); dm->conf = &dpdk_config_main; - ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); - if (ei == 0) - return clib_error_return (0, "ethernet-input node AWOL"); - - dm->ethernet_input_node_index = ei->index; - dm->conf->nchannels = 4; dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); @@ -1562,7 +1590,7 @@ dpdk_init (vlib_main_t * vm) dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | - VNET_BUFFER_F_L4_CHECKSUM_CORRECT); + VNET_BUFFER_F_L4_CHECKSUM_CORRECT | VNET_BUFFER_F_L2_HDR_OFFSET_VALID); dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;