X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fdpdk%2Fdevice%2Finit.c;h=767a7a4c9a7cb56d014b5a4f3f73098288278856;hb=f4d5fb71cccc281e11cd95eee691232cde142ead;hp=95176fb84907e6b8bb1d0145e724f17bf17be87d;hpb=2504ac699e423f1ca840a63247ce55cb27735e0a;p=vpp.git diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 95176fb8490..767a7a4c9a7 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -17,13 +17,15 @@ #include #include #include -#include +#include #include #include #include #include +#include + #include #include #include @@ -35,6 +37,7 @@ #include dpdk_main_t dpdk_main; +dpdk_config_main_t dpdk_config_main; #define LINK_STATE_ELOGS 0 @@ -100,26 +103,8 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) } else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) { - int rv; - xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - dpdk_device_stop (xd); - - rv = rte_eth_dev_configure - (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - vlib_cli_output (vlib_get_main (), - "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - dpdk_device_start (xd); - + dpdk_device_setup (xd); } return old; } @@ -137,6 +122,60 @@ dpdk_device_lock_init (dpdk_device_t * xd) } } +static struct rte_mempool_ops * +get_ops_by_name (i8 * ops_name) +{ + u32 i; + + for (i = 0; i < rte_mempool_ops_table.num_ops; i++) + { + if (!strcmp (ops_name, rte_mempool_ops_table.ops[i].name)) + return &rte_mempool_ops_table.ops[i]; + } + + return 0; +} + +static int +dpdk_ring_alloc (struct rte_mempool *mp) +{ + u32 rg_flags = 0, count; + i32 ret; + i8 rg_name[RTE_RING_NAMESIZE]; + struct rte_ring *r; + + ret = snprintf (rg_name, sizeof (rg_name), RTE_MEMPOOL_MZ_FORMAT, mp->name); + if (ret < 0 || ret >= (i32) sizeof (rg_name)) + return -ENAMETOOLONG; + + /* ring flags */ + if (mp->flags & MEMPOOL_F_SP_PUT) + rg_flags |= RING_F_SP_ENQ; + if (mp->flags & MEMPOOL_F_SC_GET) + rg_flags |= RING_F_SC_DEQ; + + count = rte_align32pow2 (mp->size + 1); + /* + * Allocate the ring that will be used to store objects. + * Ring functions will return appropriate errors if we are + * running as a secondary process etc., so no checks made + * in this function for that condition. + */ + /* XXX can we get memory from the right socket? */ + r = clib_mem_alloc_aligned (rte_ring_get_memsize (count), + CLIB_CACHE_LINE_BYTES); + + /* XXX rte_ring_lookup will not work */ + + ret = rte_ring_init (r, rg_name, count, rg_flags); + if (ret) + return ret; + + mp->pool_data = r; + + return 0; +} + static clib_error_t * dpdk_lib_init (dpdk_main_t * dm) { @@ -209,7 +248,6 @@ dpdk_lib_init (dpdk_main_t * dm) VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); vlib_buffer_init_for_free_list (bt, fl); bt->flags = dm->buffer_flags_template; - bt->current_data = -RTE_PKTMBUF_HEADROOM; vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0; } @@ -275,6 +313,10 @@ dpdk_lib_init (dpdk_main_t * dm) clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, sizeof (struct rte_eth_txconf)); + + if (dm->conf->no_tx_checksum_offload == 0) + xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOXSUMS; + if (dm->conf->no_multi_seg) { xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; @@ -420,10 +462,6 @@ dpdk_lib_init (dpdk_main_t * dm) xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER; break; - case VNET_DPDK_PMD_VHOST_ETHER: - xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER; - break; - default: xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; } @@ -584,8 +622,9 @@ dpdk_lib_init (dpdk_main_t * dm) hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index); - if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) - hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; + if (dm->conf->no_tx_checksum_offload == 0) + if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) + hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; dpdk_device_setup (xd); @@ -672,7 +711,9 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) } /* virtio */ - if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) + if (d->vendor_id == 0x1af4 && + (d->device_id == VIRTIO_PCI_LEGACY_DEVICEID_NET || + d->device_id == VIRTIO_PCI_MODERN_DEVICEID_NET)) ; /* vmxnet3 */ else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) @@ -692,7 +733,9 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) ; /* Mellanox */ else if (d->vendor_id == 0x15b3 && d->device_id >= 0x1013 && d->device_id <= 0x101a) - ; + { + continue; + } else { clib_warning ("Unsupported PCI device 0x%04x:0x%04x found " @@ -861,6 +904,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "enable-tcp-udp-checksum")) conf->enable_tcp_udp_checksum = 1; + else if (unformat (input, "no-tx-checksum-offload")) + conf->no_tx_checksum_offload = 1; + else if (unformat (input, "decimal-interface-names")) conf->interface_name_format_decimal = 1; @@ -985,9 +1031,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) { u32 x, *mem_by_socket = 0; uword c = 0; - u8 use_1g = 1; - u8 use_2m = 1; - u8 less_than_1g = 1; int rv; umount ((char *) huge_dir_path); @@ -1009,9 +1052,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) break; vec_add1 (mem_by_socket, x); - - if (x > 1023) - less_than_1g = 0; } /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ unformat_free (&in); @@ -1023,39 +1063,22 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { vec_validate(mem_by_socket, c); - mem_by_socket[c] = 256; /* default per-socket mem */ + mem_by_socket[c] = 64; /* default per-socket mem */ } )); /* *INDENT-ON* */ } - /* check if available enough 1GB pages for each socket */ /* *INDENT-OFF* */ clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { - int pages_avail, page_size, mem; - clib_error_t *e = 0; + clib_error_t *e; vec_validate(mem_by_socket, c); - mem = mem_by_socket[c]; - - page_size = 1024; - e = vlib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); - - if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) - use_1g = 0; - - if (e) - clib_error_free (e); - - page_size = 2; - e = vlib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); - - if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) - use_2m = 0; + e = clib_sysfs_prealloc_hugepages(c, 2 << 10, mem_by_socket[c] / 2); if (e) - clib_error_free (e); + clib_error_report (e); })); /* *INDENT-ON* */ @@ -1080,19 +1103,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) goto done; } - if (use_1g && !(less_than_1g && use_2m)) - { - rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, - "pagesize=1G"); - } - else if (use_2m) - { - rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL); - } - else - { - return clib_error_return (0, "not enough free huge pages"); - } + rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL); if (rv) { @@ -1210,6 +1221,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", conf->eal_init_args[i]); + clib_warning ("EAL init args: %s", conf->eal_init_args_str); ret = rte_eal_init (vec_len (conf->eal_init_args), (char **) conf->eal_init_args); @@ -1226,6 +1238,23 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) fprintf (stdout, "DPDK physical memory layout:\n"); rte_dump_physmem_layout (stdout); + /* set custom ring memory allocator */ + { + struct rte_mempool_ops *ops = NULL; + + ops = get_ops_by_name ("ring_sp_sc"); + ops->alloc = dpdk_ring_alloc; + + ops = get_ops_by_name ("ring_mp_sc"); + ops->alloc = dpdk_ring_alloc; + + ops = get_ops_by_name ("ring_sp_mc"); + ops->alloc = dpdk_ring_alloc; + + ops = get_ops_by_name ("ring_mp_mc"); + ops->alloc = dpdk_ring_alloc; + } + /* main thread 1st */ error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); if (error) @@ -1403,7 +1432,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) if (xd->pmd == VNET_DPDK_PMD_BOND) { u8 addr[6]; - u8 slink[16]; + dpdk_portid_t slink[16]; int nlink = rte_eth_bond_slaves_get (i, slink, 16); if (nlink > 0) { @@ -1529,7 +1558,6 @@ static clib_error_t * dpdk_init (vlib_main_t * vm) { dpdk_main_t *dm = &dpdk_main; - vlib_node_t *ei; clib_error_t *error = 0; vlib_thread_main_t *tm = vlib_get_thread_main (); @@ -1546,12 +1574,6 @@ dpdk_init (vlib_main_t * vm) dm->vnet_main = vnet_get_main (); dm->conf = &dpdk_config_main; - ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); - if (ei == 0) - return clib_error_return (0, "ethernet-input node AWOL"); - - dm->ethernet_input_node_index = ei->index; - dm->conf->nchannels = 4; dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); @@ -1562,7 +1584,7 @@ dpdk_init (vlib_main_t * vm) dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | - VNET_BUFFER_F_L4_CHECKSUM_CORRECT); + VNET_BUFFER_F_L4_CHECKSUM_CORRECT | VNET_BUFFER_F_L2_HDR_OFFSET_VALID); dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;