X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=vnet%2Fvnet%2Fdevices%2Fdpdk%2Finit.c;h=7514ff86ac6ec66c9ca2a76b3d631433236027ea;hb=ce3e971;hp=8ee59ff8327eedf044f78abdb88f7424b7487ee5;hpb=1c80e831b728ab378949714d5059a0b5b1822a0a;p=vpp.git diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c index 8ee59ff8327..7514ff86ac6 100644 --- a/vnet/vnet/devices/dpdk/init.c +++ b/vnet/vnet/devices/dpdk/init.c @@ -152,8 +152,7 @@ static u32 dpdk_flag_change (vnet_main_t * vnm, * driver to dynamically change the mtu. If/when the * VIC firmware gets fixed, then this should be removed. */ - if (xd->pmd == VNET_DPDK_PMD_VICE || - xd->pmd == VNET_DPDK_PMD_ENIC) + if (xd->pmd == VNET_DPDK_PMD_ENIC) { struct rte_eth_dev_info dev_info; @@ -196,10 +195,6 @@ static u32 dpdk_flag_change (vnet_main_t * vnm, return old; } -#ifdef NETMAP -extern int rte_netmap_probe(void); -#endif - void dpdk_device_lock_init(dpdk_device_t * xd) { @@ -235,6 +230,7 @@ dpdk_lib_init (dpdk_main_t * dm) clib_error_t * error; vlib_main_t * vm = vlib_get_main(); vlib_thread_main_t * tm = vlib_get_thread_main(); + vlib_node_runtime_t * rt; vnet_sw_interface_t * sw; vnet_hw_interface_t * hi; dpdk_device_t * xd; @@ -247,21 +243,13 @@ dpdk_lib_init (dpdk_main_t * dm) dm->input_cpu_first_index = 0; dm->input_cpu_count = 1; + rt = vlib_node_get_runtime (vm, dpdk_input_node.index); + rt->function = dpdk_input_multiarch_select(); + /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "io"); + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); tr = p ? (vlib_thread_registration_t *) p[0] : 0; - if (!tr || tr->count == 0) - { - /* no io threads, workers doing input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - } - else - { - dm->have_io_threads = 1; - } - if (tr && tr->count > 0) { dm->input_cpu_first_index = tr->first_index; @@ -274,11 +262,6 @@ dpdk_lib_init (dpdk_main_t * dm) vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); -#ifdef NETMAP - if(rte_netmap_probe() < 0) - return clib_error_return (0, "rte netmap probe failed"); -#endif - nports = rte_eth_dev_count(); if (nports < 1) { @@ -296,24 +279,46 @@ dpdk_lib_init (dpdk_main_t * dm) vlib_buffer_get_or_create_free_list ( vm, VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, "dpdk rx"); + if (dm->conf->enable_tcp_udp_checksum) + dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT + | IP_BUFFER_L4_CHECKSUM_COMPUTED); + for (i = 0; i < nports; i++) { u8 addr[6]; + u8 vlan_strip = 0; int j; struct rte_eth_dev_info dev_info; clib_error_t * rv; struct rte_eth_link l; + dpdk_device_config_t * devconf = 0; + vlib_pci_addr_t pci_addr; + uword * p = 0; + + rte_eth_dev_info_get(i, &dev_info); + if (dev_info.pci_dev) /* bonded interface has no pci info */ + { + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + p = hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; /* Create vnet interface */ vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; xd->cpu_socket = (i8) rte_eth_dev_socket_id(i); - rte_eth_dev_info_get(i, &dev_info); clib_memcpy(&xd->tx_conf, &dev_info.default_txconf, sizeof(struct rte_eth_txconf)); - if (dm->no_multi_seg) + if (dm->conf->no_multi_seg) { xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; port_conf_template.rxmode.jumbo_frame = 0; @@ -328,14 +333,23 @@ dpdk_lib_init (dpdk_main_t * dm) xd->tx_q_used = clib_min(dev_info.max_tx_queues, tm->n_vlib_mains); - if (dm->max_tx_queues) - xd->tx_q_used = clib_min(xd->tx_q_used, dm->max_tx_queues); + if (devconf->num_tx_queues > 0 && devconf->num_tx_queues < xd->tx_q_used) + xd->tx_q_used = clib_min(xd->tx_q_used, devconf->num_tx_queues); + + if (devconf->num_rx_queues > 1 && dm->use_rss == 0) + { + rt->function = dpdk_input_rss_multiarch_select(); + dm->use_rss = 1; + } - if (dm->use_rss > 1 && dev_info.max_rx_queues >= dm->use_rss) + if (devconf->num_rx_queues > 1 && dev_info.max_rx_queues >= devconf->num_rx_queues) { - xd->rx_q_used = dm->use_rss; + xd->rx_q_used = devconf->num_rx_queues; xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; + if (devconf->rss_fn == 0) + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; + else + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; } else xd->rx_q_used = 1; @@ -343,7 +357,7 @@ dpdk_lib_init (dpdk_main_t * dm) xd->dev_type = VNET_DPDK_DEV_ETH; /* workaround for drivers not setting driver_name */ - if (!dev_info.driver_name) + if ((!dev_info.driver_name) && (dev_info.pci_dev)) dev_info.driver_name = dev_info.pci_dev->driver->name; ASSERT(dev_info.driver_name); @@ -376,21 +390,22 @@ dpdk_lib_init (dpdk_main_t * dm) xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; break; + case VNET_DPDK_PMD_DPAA2: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; /* Cisco VIC */ - case VNET_DPDK_PMD_VICE: case VNET_DPDK_PMD_ENIC: rte_eth_link_get_nowait(i, &l); + xd->nb_rx_desc = DPDK_NB_RX_DESC_ENIC; if (l.link_speed == 40000) { xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; } else { xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - xd->nb_rx_desc = DPDK_NB_RX_DESC_10GE; xd->nb_tx_desc = DPDK_NB_TX_DESC_10GE; } break; @@ -469,15 +484,11 @@ dpdk_lib_init (dpdk_main_t * dm) xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; } - #ifdef NETMAP - if(strncmp(dev_info.driver_name, "vale", 4) == 0 - || strncmp(dev_info.driver_name, "netmap", 6) == 0) - { - xd->pmd = VNET_DPDK_PMD_NETMAP; - xd->port_type = VNET_DPDK_PORT_TYPE_NETMAP; - } - #endif + if (devconf->num_rx_desc) + xd->nb_rx_desc = devconf->num_rx_desc; + if (devconf->num_tx_desc) + xd->nb_tx_desc = devconf->num_tx_desc; } /* @@ -548,29 +559,44 @@ dpdk_lib_init (dpdk_main_t * dm) dpdk_device_and_queue_t * dq; int q; - for (q = 0; q < xd->rx_q_used; q++) - { - int cpu = dm->input_cpu_first_index + next_cpu; - unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; - - /* - * numa node for worker thread handling this queue - * needed for taking buffers from the right mempool - */ - vec_validate(xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); - - /* - * construct vector of (device,queue) pairs for each worker thread - */ - vec_add2(dm->devices_by_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = q; - - next_cpu++; - if (next_cpu == dm->input_cpu_count) - next_cpu = 0; - } + if (devconf->workers) + { + int i; + q = 0; + clib_bitmap_foreach (i, devconf->workers, ({ + int cpu = dm->input_cpu_first_index + i; + unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; + vec_validate(xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q++; + })); + } + else + for (q = 0; q < xd->rx_q_used; q++) + { + int cpu = dm->input_cpu_first_index + next_cpu; + unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; + + /* + * numa node for worker thread handling this queue + * needed for taking buffers from the right mempool + */ + vec_validate(xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + + /* + * construct vector of (device,queue) pairs for each worker thread + */ + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q; + + next_cpu++; + if (next_cpu == dm->input_cpu_count) + next_cpu = 0; + } vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, CLIB_CACHE_LINE_BYTES); @@ -620,22 +646,30 @@ dpdk_lib_init (dpdk_main_t * dm) * driver to dynamically change the mtu. If/when the * VIC firmware gets fixed, then this should be removed. */ - if (xd->pmd == VNET_DPDK_PMD_VICE || - xd->pmd == VNET_DPDK_PMD_ENIC) + if (xd->pmd == VNET_DPDK_PMD_ENIC) { /* * Initialize mtu to what has been set by CIMC in the firmware cfg. */ hi->max_packet_bytes = dev_info.max_rx_pktlen; - /* - * remove vlan tag from VIC port to fix VLAN0 issue. - * TODO Handle VLAN tagged traffic - */ - int vlan_off; - vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index); - vlan_off |= ETH_VLAN_STRIP_OFFLOAD; - rte_eth_dev_set_vlan_offload(xd->device_index, vlan_off); + if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) + vlan_strip = 1; /* remove vlan tag from VIC port by default */ + else + clib_warning("VLAN strip disabled for interface\n"); } + else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) + vlan_strip = 1; + + if (vlan_strip) + { + int vlan_off; + vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index); + vlan_off |= ETH_VLAN_STRIP_OFFLOAD; + if (rte_eth_dev_set_vlan_offload(xd->device_index, vlan_off) == 0) + clib_warning("VLAN strip enabled for interface\n"); + else + clib_warning("VLAN strip cannot be supported by interface\n"); + } #if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) /* @@ -652,10 +686,10 @@ dpdk_lib_init (dpdk_main_t * dm) } #ifdef RTE_LIBRTE_KNI - if (dm->num_kni) { + if (dm->conf->num_kni) { clib_warning("Initializing KNI interfaces..."); - rte_kni_init(dm->num_kni); - for (i = 0; i < dm->num_kni; i++) + rte_kni_init(dm->conf->num_kni); + for (i = 0; i < dm->conf->num_kni; i++) { u8 addr[6]; int j; @@ -730,9 +764,9 @@ dpdk_lib_init (dpdk_main_t * dm) } #endif - if (nb_desc > dm->num_mbufs) + if (nb_desc > dm->conf->num_mbufs) clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", - dm->num_mbufs, nb_desc); + dm->conf->num_mbufs, nb_desc); /* init next vhost-user if index */ dm->next_vu_if_id = 0; @@ -741,15 +775,17 @@ dpdk_lib_init (dpdk_main_t * dm) } static void -dpdk_bind_devices_to_uio (dpdk_main_t * dm) +dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) { vlib_pci_main_t * pm = &pci_main; clib_error_t * error; vlib_pci_device_t * d; pci_config_header_t * c; u8 * pci_addr = 0; + int num_whitelisted = vec_len (conf->dev_confs); pool_foreach (d, pm->pci_devs, ({ + dpdk_device_config_t * devconf = 0; c = &d->config0.header; vec_reset_length (pci_addr); pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); @@ -757,10 +793,15 @@ dpdk_bind_devices_to_uio (dpdk_main_t * dm) if (c->device_class != PCI_CLASS_NETWORK_ETHERNET) continue; - /* if whitelist exists process only whitelisted devices */ - if (dm->eth_if_whitelist && - !strstr ((char *) dm->eth_if_whitelist, (char *) pci_addr)) - continue; + if (num_whitelisted) + { + uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); + + if (!p) + continue; + + devconf = pool_elt_at_index (conf->dev_confs, p[0]); + } /* virtio */ if (c->vendor_id == 0x1af4 && c->device_id == 0x1000) @@ -785,124 +826,186 @@ dpdk_bind_devices_to_uio (dpdk_main_t * dm) continue; } - error = vlib_pci_bind_to_uio (d, (char *) dm->uio_driver_name); + error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); if (error) { - if (!dm->eth_if_whitelist) - dm->eth_if_blacklist = format (dm->eth_if_blacklist, "%U ", - format_vlib_pci_addr, &d->bus_address); + if (devconf == 0) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, + devconf - conf->dev_confs); + devconf->pci_addr.as_u32 = d->bus_address.as_u32; + } + devconf->is_blacklisted = 1; clib_error_report (error); } })); vec_free (pci_addr); } +static clib_error_t * +dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, unformat_input_t * input, u8 is_default) +{ + clib_error_t * error = 0; + uword * p; + dpdk_device_config_t * devconf; + unformat_input_t sub_input; + + if (is_default) + { + devconf = &conf->default_devconf; + } + else + { + p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); + + if (!p) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, devconf - conf->dev_confs); + } + else + return clib_error_return(0, "duplicate configuration for PCI address %U", + format_vlib_pci_addr, &pci_addr); + } + + devconf->pci_addr.as_u32 = pci_addr.as_u32; + + if (!input) + return 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) + ; + else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) + ; + else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) + ; + else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) + ; + else if (unformat (input, "workers %U", unformat_bitmap_list, + &devconf->workers)) + ; + else if (unformat (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) + { + error = unformat_rss_fn(&sub_input, &devconf->rss_fn); + if (error) + break; + } + else if (unformat (input, "vlan-strip-offload off")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; + else if (unformat (input, "vlan-strip-offload on")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + if (error) + return error; + + if (devconf->workers && devconf->num_rx_queues == 0) + devconf->num_rx_queues = clib_bitmap_count_set_bits(devconf->workers); + else if (devconf->workers && + clib_bitmap_count_set_bits(devconf->workers) != devconf->num_rx_queues) + error = clib_error_return (0, "%U: number of worker threadds must be " + "equal to number of rx queues", + format_vlib_pci_addr, &pci_addr); + + return error; +} + static clib_error_t * dpdk_config (vlib_main_t * vm, unformat_input_t * input) { clib_error_t * error = 0; dpdk_main_t * dm = &dpdk_main; + dpdk_config_main_t * conf = &dpdk_config_main; vlib_thread_main_t * tm = vlib_get_thread_main(); - vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, dpdk_input_node.index); + dpdk_device_config_t * devconf; + vlib_pci_addr_t pci_addr; + unformat_input_t sub_input; u8 * s, * tmp = 0; - u8 * pci_dev_id = 0; u8 * rte_cmd = 0, * ethname = 0; u32 log_level; int ret, i; - char * fmt; -#ifdef NETMAP - int rxrings, txrings, rxslots, txslots, txburst; - char * nmnam; -#endif - unformat_input_t _in; - unformat_input_t * in = &_in; + int num_whitelisted = 0; u8 no_pci = 0; u8 no_huge = 0; u8 huge_dir = 0; u8 file_prefix = 0; u8 * socket_mem = 0; + conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); + // MATT-FIXME: inverted virtio-vhost logic to use virtio by default - dm->use_virtio_vhost = 1; + conf->use_virtio_vhost = 1; while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) { /* Prime the pump */ if (unformat (input, "no-hugetlb")) { - vec_add1 (dm->eal_init_args, (u8 *) "no-huge"); + vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); no_huge = 1; } else if (unformat (input, "enable-tcp-udp-checksum")) - { - dm->buffer_flags_template &= - ~(IP_BUFFER_L4_CHECKSUM_CORRECT | IP_BUFFER_L4_CHECKSUM_COMPUTED); - } + conf->enable_tcp_udp_checksum = 1; else if (unformat (input, "decimal-interface-names")) - dm->interface_name_format_decimal = 1; + conf->interface_name_format_decimal = 1; else if (unformat (input, "no-multi-seg")) - dm->no_multi_seg = 1; + conf->no_multi_seg = 1; - else if (unformat (input, "dev %s", &pci_dev_id)) + else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, + &sub_input)) { - if (dm->eth_if_whitelist) - { - /* - * Don't add duplicate device id's. - */ - if (strstr ((char *)dm->eth_if_whitelist, (char *)pci_dev_id)) - continue; - - _vec_len (dm->eth_if_whitelist) -= 1; // chomp trailing NULL. - dm->eth_if_whitelist = format (dm->eth_if_whitelist, " %s%c", - pci_dev_id, 0); - } - else - dm->eth_if_whitelist = format (0, "%s%c", pci_dev_id, 0); + error = dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~1, &sub_input, 1); + + if (error) + return error; } + else if (unformat (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, + unformat_vlib_cli_sub_input, &sub_input)) + { + error = dpdk_device_config (conf, pci_addr, &sub_input, 0); -#ifdef NETMAP - else if (unformat(input, "netmap %s/%d:%d/%d:%d/%d", - &nmname, &rxrings, &rxslots, &txrings, &txslots, &txburst)) { - char * rv; - rv = (char *) - eth_nm_args(nmname, rxrings, rxslots, txrings, txslots, txburst); - if (rv) { - error = clib_error_return (0, "%s", rv); - goto done; - } - }else if (unformat(input, "netmap %s", &nmname)) { - char * rv; - rv = (char *) - eth_nm_args(nmname, 0, 0, 0, 0, 0); - if (rv) { - error = clib_error_return (0, "%s", rv); - goto done; - } - } -#endif + if (error) + return error; - else if (unformat (input, "num-mbufs %d", &dm->num_mbufs)) - ; - else if (unformat (input, "max-tx-queues %d", &dm->max_tx_queues)) + num_whitelisted++; + } + else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) + { + error = dpdk_device_config (conf, pci_addr, 0, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) ; - else if (unformat (input, "kni %d", &dm->num_kni)) + else if (unformat (input, "kni %d", &conf->num_kni)) ; - else if (unformat (input, "uio-driver %s", &dm->uio_driver_name)) + else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) ; else if (unformat (input, "socket-mem %s", &socket_mem)) ; - else if (unformat (input, "vhost-user-coalesce-frames %d", &dm->vhost_coalesce_frames)) + else if (unformat (input, "vhost-user-coalesce-frames %d", &conf->vhost_coalesce_frames)) ; - else if (unformat (input, "vhost-user-coalesce-time %f", &dm->vhost_coalesce_time)) + else if (unformat (input, "vhost-user-coalesce-time %f", &conf->vhost_coalesce_time)) ; else if (unformat (input, "enable-vhost-user")) - dm->use_virtio_vhost = 0; - else if (unformat (input, "rss %d", &dm->use_rss)) + conf->use_virtio_vhost = 0; + else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) ; #define _(a) \ @@ -911,7 +1014,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) if (!strncmp(#a, "no-pci", 6)) \ no_pci = 1; \ tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (dm->eal_init_args, tmp); \ + vec_add1 (conf->eal_init_args, tmp); \ } foreach_eal_double_hyphen_predicate_arg #undef _ @@ -924,9 +1027,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else if (!strncmp(#a, "file-prefix", 11)) \ file_prefix = 1; \ tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (dm->eal_init_args, tmp); \ + vec_add1 (conf->eal_init_args, tmp); \ vec_add1 (s, 0); \ - vec_add1 (dm->eal_init_args, s); \ + vec_add1 (conf->eal_init_args, s); \ } foreach_eal_double_hyphen_arg #undef _ @@ -935,9 +1038,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat(input, #a " %s", &s)) \ { \ tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (dm->eal_init_args, tmp); \ + vec_add1 (conf->eal_init_args, tmp); \ vec_add1 (s, 0); \ - vec_add1 (dm->eal_init_args, s); \ + vec_add1 (conf->eal_init_args, s); \ } foreach_eal_single_hyphen_arg #undef _ @@ -946,10 +1049,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat(input, #a " %s", &s)) \ { \ tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (dm->eal_init_args, tmp); \ + vec_add1 (conf->eal_init_args, tmp); \ vec_add1 (s, 0); \ - vec_add1 (dm->eal_init_args, s); \ - dm->a##_set_manually = 1; \ + vec_add1 (conf->eal_init_args, s); \ + conf->a##_set_manually = 1; \ } foreach_eal_single_hyphen_mandatory_arg #undef _ @@ -965,8 +1068,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } } - if (!dm->uio_driver_name) - dm->uio_driver_name = format (0, "igb_uio%c", 0); + if (!conf->uio_driver_name) + conf->uio_driver_name = format (0, "igb_uio%c", 0); /* * Use 1G huge pages if available. @@ -1012,7 +1115,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { vec_validate(mem_by_socket, c); - mem_by_socket[c] = 512; /* default per-socket mem */ + mem_by_socket[c] = 256; /* default per-socket mem */ } )); } @@ -1116,15 +1219,15 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) } tmp = format (0, "--huge-dir%c", 0); - vec_add1 (dm->eal_init_args, tmp); + vec_add1 (conf->eal_init_args, tmp); tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); - vec_add1 (dm->eal_init_args, tmp); + vec_add1 (conf->eal_init_args, tmp); if (!file_prefix) { tmp = format (0, "--file-prefix%c", 0); - vec_add1 (dm->eal_init_args, tmp); + vec_add1 (conf->eal_init_args, tmp); tmp = format (0, "vpp%c", 0); - vec_add1 (dm->eal_init_args, tmp); + vec_add1 (conf->eal_init_args, tmp); } } @@ -1135,7 +1238,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) return error; /* I'll bet that -c and -n must be the first and second args... */ - if (!dm->coremask_set_manually) + if (!conf->coremask_set_manually) { vlib_thread_registration_t * tr; uword * coremask = 0; @@ -1150,68 +1253,67 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) coremask = clib_bitmap_or(coremask, tr->coremask); } - vec_insert (dm->eal_init_args, 2, 1); - dm->eal_init_args[1] = (u8 *) "-c"; + vec_insert (conf->eal_init_args, 2, 1); + conf->eal_init_args[1] = (u8 *) "-c"; tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); - dm->eal_init_args[2] = tmp; + conf->eal_init_args[2] = tmp; clib_bitmap_free(coremask); } - if (!dm->nchannels_set_manually) + if (!conf->nchannels_set_manually) { - vec_insert (dm->eal_init_args, 2, 3); - dm->eal_init_args[3] = (u8 *) "-n"; - tmp = format (0, "%d", dm->nchannels); - dm->eal_init_args[4] = tmp; + vec_insert (conf->eal_init_args, 2, 3); + conf->eal_init_args[3] = (u8 *) "-n"; + tmp = format (0, "%d", conf->nchannels); + conf->eal_init_args[4] = tmp; } if (no_pci == 0 && geteuid() == 0) - dpdk_bind_devices_to_uio(dm); + dpdk_bind_devices_to_uio(conf); - /* - * If there are whitelisted devices, - * add the whitelist option & device list to the dpdk arg list... - */ - if (dm->eth_if_whitelist) - { - unformat_init_string (in, (char *)dm->eth_if_whitelist, - vec_len(dm->eth_if_whitelist) - 1); - fmt = "-w%c"; - } +#define _(x) \ + if (devconf->x == 0 && conf->default_devconf.x > 0) \ + devconf->x = conf->default_devconf.x ; - /* - * Otherwise add the blacklisted devices to the dpdk arg list. - */ - else - { - unformat_init_string (in, (char *)dm->eth_if_blacklist, - vec_len(dm->eth_if_blacklist) - 1); - fmt = "-b%c"; - } + pool_foreach (devconf, conf->dev_confs, ({ - while (unformat_check_input (in) != UNFORMAT_END_OF_INPUT) - { - tmp = format (0, fmt, 0); - vec_add1 (dm->eal_init_args, tmp); - unformat (in, "%s", &pci_dev_id); - vec_add1 (dm->eal_init_args, pci_dev_id); - } + /* default per-device config items */ + foreach_dpdk_device_config_item + + /* add DPDK EAL whitelist/blacklist entry */ + if (num_whitelisted > 0 && devconf->is_blacklisted == 0) + { + tmp = format (0, "-w%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) + { + tmp = format (0, "-b%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + })); + +#undef _ /* set master-lcore */ tmp = format (0, "--master-lcore%c", 0); - vec_add1 (dm->eal_init_args, tmp); + vec_add1 (conf->eal_init_args, tmp); tmp = format (0, "%u%c", tm->main_lcore, 0); - vec_add1 (dm->eal_init_args, tmp); + vec_add1 (conf->eal_init_args, tmp); /* set socket-mem */ tmp = format (0, "--socket-mem%c", 0); - vec_add1 (dm->eal_init_args, tmp); + vec_add1 (conf->eal_init_args, tmp); tmp = format (0, "%s%c", socket_mem, 0); - vec_add1 (dm->eal_init_args, tmp); + vec_add1 (conf->eal_init_args, tmp); /* NULL terminate the "argv" vector, in case of stupidity */ - vec_add1 (dm->eal_init_args, 0); - _vec_len(dm->eal_init_args) -= 1; + vec_add1 (conf->eal_init_args, 0); + _vec_len(conf->eal_init_args) -= 1; /* Set up DPDK eal and packet mbuf pool early. */ @@ -1219,14 +1321,14 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) rte_set_log_level (log_level); - vm = dm->vlib_main; + vm = vlib_get_main (); /* make copy of args as rte_eal_init tends to mess up with arg array */ - for (i = 1; i < vec_len(dm->eal_init_args); i++) - dm->eal_init_args_str = format(dm->eal_init_args_str, "%s ", - dm->eal_init_args[i]); + for (i = 1; i < vec_len(conf->eal_init_args); i++) + conf->eal_init_args_str = format(conf->eal_init_args_str, "%s ", + conf->eal_init_args[i]); - ret = rte_eal_init(vec_len(dm->eal_init_args), (char **) dm->eal_init_args); + ret = rte_eal_init(vec_len(conf->eal_init_args), (char **) conf->eal_init_args); /* lazy umount hugepages */ umount2(DEFAULT_HUGE_DIR, MNT_DETACH); @@ -1239,22 +1341,18 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) rte_dump_physmem_layout(stdout); /* main thread 1st */ - error = vlib_buffer_pool_create(vm, dm->num_mbufs, rte_socket_id()); + error = vlib_buffer_pool_create(vm, conf->num_mbufs, rte_socket_id()); if (error) return error; for (i = 0; i < RTE_MAX_LCORE; i++) { - error = vlib_buffer_pool_create(vm, dm->num_mbufs, + error = vlib_buffer_pool_create(vm, conf->num_mbufs, rte_lcore_to_socket_id(i)); if (error) return error; } - if (dm->use_rss) - rt->function = dpdk_input_rss_multiarch_select(); - else - rt->function = dpdk_input_multiarch_select(); done: return error; } @@ -1423,10 +1521,7 @@ dpdk_process (vlib_main_t * vm, if (tm->n_vlib_mains == 1) vlib_node_set_state (vm, dpdk_input_node.index, VLIB_NODE_STATE_POLLING); - else if (tm->main_thread_is_io_node) - vlib_node_set_state (vm, dpdk_io_input_node.index, - VLIB_NODE_STATE_POLLING); - else if (!dm->have_io_threads) + else for (i=0; i < tm->n_vlib_mains; i++) if (vec_len(dm->devices_by_cpu[i]) > 0) vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, @@ -1438,7 +1533,7 @@ dpdk_process (vlib_main_t * vm, dpdk_vhost_user_process_init(&vu_state); - dm->io_thread_release = 1; + tm->worker_thread_release = 1; f64 now = vlib_time_now (vm); vec_foreach (xd, dm->devices) @@ -1586,6 +1681,7 @@ dpdk_init (vlib_main_t * vm) dm->vlib_main = vm; dm->vnet_main = vnet_get_main(); + dm->conf = &dpdk_config_main; ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); if (ei == 0) @@ -1593,9 +1689,9 @@ dpdk_init (vlib_main_t * vm) dm->ethernet_input_node_index = ei->index; - dm->nchannels = 4; - dm->num_mbufs = dm->num_mbufs ? dm->num_mbufs : NB_MBUF; - vec_add1 (dm->eal_init_args, (u8 *) "vnet"); + dm->conf->nchannels = 4; + dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; + vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); @@ -1612,8 +1708,8 @@ dpdk_init (vlib_main_t * vm) DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH; /* vhost-user coalescence frames defaults */ - dm->vhost_coalesce_frames = 32; - dm->vhost_coalesce_time = 1e-3; + dm->conf->vhost_coalesce_frames = 32; + dm->conf->vhost_coalesce_time = 1e-3; /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ dm->buffer_flags_template =