X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=vnet%2Fvnet%2Fdevices%2Fdpdk%2Finit.c;h=7045e454048863ed3ba87fd2e5dcb036883117ef;hb=f530a5526a1f501462ff4247a5bb38e80c13678d;hp=7b657209ccb4ef99c94fdcaef890a985c7012504;hpb=614480262738dc6de382023a03656e1045d17f04;p=vpp.git diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c index 7b657209ccb..7045e454048 100644 --- a/vnet/vnet/devices/dpdk/init.c +++ b/vnet/vnet/devices/dpdk/init.c @@ -221,7 +221,6 @@ dpdk_device_lock_init (dpdk_device_t * xd) CLIB_CACHE_LINE_BYTES); memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); } - xd->need_txlock = 1; } void @@ -233,7 +232,6 @@ dpdk_device_lock_free (dpdk_device_t * xd) clib_mem_free ((void *) xd->lockp[q]); vec_free (xd->lockp); xd->lockp = 0; - xd->need_txlock = 0; } static clib_error_t * @@ -251,10 +249,10 @@ dpdk_lib_init (dpdk_main_t * dm) dpdk_device_t *xd; vlib_pci_addr_t last_pci_addr; u32 last_pci_addr_port = 0; - vlib_thread_registration_t *tr; - uword *p; + vlib_thread_registration_t *tr, *tr_hqos; + uword *p, *p_hqos; - u32 next_cpu = 0; + u32 next_cpu = 0, next_hqos_cpu = 0; u8 af_packet_port_id = 0; last_pci_addr.as_u32 = ~0; @@ -280,6 +278,30 @@ dpdk_lib_init (dpdk_main_t * dm) vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); + dm->hqos_cpu_first_index = 0; + dm->hqos_cpu_count = 0; + + /* find out which cpus will be used for I/O TX */ + p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); + tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; + + if (tr_hqos && tr_hqos->count > 0) + { + dm->hqos_cpu_first_index = tr_hqos->first_index; + dm->hqos_cpu_count = tr_hqos->count; + } + + vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + vec_validate_aligned (dm->hqos_threads, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + +#ifdef NETMAP + if (rte_netmap_probe () < 0) + return clib_error_return (0, "rte netmap probe failed"); +#endif + nports = rte_eth_dev_count (); if (nports < 1) { @@ -405,7 +427,7 @@ dpdk_lib_init (dpdk_main_t * dm) else xd->rx_q_used = 1; - xd->dev_type = VNET_DPDK_DEV_ETH; + xd->flags |= DPDK_DEVICE_FLAG_PMD; /* workaround for drivers not setting driver_name */ if ((!dev_info.driver_name) && (dev_info.pci_dev)) @@ -586,17 +608,6 @@ dpdk_lib_init (dpdk_main_t * dm) } } -#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) - /* - * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts - */ - if (xd->pmd == VNET_DPDK_PMD_VMXNET3) - { - xd->port_conf.rxmode.max_rx_pkt_len = 1518; - xd->port_conf.rxmode.jumbo_frame = 0; - } -#endif - if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) { f64 now = vlib_time_now (vm); @@ -628,7 +639,7 @@ dpdk_lib_init (dpdk_main_t * dm) /* *INDENT-OFF* */ clib_bitmap_foreach (i, devconf->workers, ({ int cpu = dm->input_cpu_first_index + i; - unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; vec_validate(xd->cpu_socket_id_by_queue, q); xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); vec_add2(dm->devices_by_cpu[cpu], dq, 1); @@ -641,7 +652,7 @@ dpdk_lib_init (dpdk_main_t * dm) for (q = 0; q < xd->rx_q_used; q++) { int cpu = dm->input_cpu_first_index + next_cpu; - unsigned lcore = vlib_worker_threads[cpu].dpdk_lcore_id; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; /* * numa node for worker thread handling this queue @@ -662,11 +673,47 @@ dpdk_lib_init (dpdk_main_t * dm) next_cpu = 0; } + + if (devconf->hqos_enabled) + { + xd->flags |= DPDK_DEVICE_FLAG_HQOS; + + if (devconf->hqos.hqos_thread_valid) + { + int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; + + if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) + return clib_error_return (0, "invalid HQoS thread index"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + } + else + { + int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; + + if (dm->hqos_cpu_count == 0) + return clib_error_return (0, "no HQoS threads available"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + + next_hqos_cpu++; + if (next_hqos_cpu == dm->hqos_cpu_count) + next_hqos_cpu = 0; + + devconf->hqos.hqos_thread_valid = 1; + devconf->hqos.hqos_thread = cpu; + } + } + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, CLIB_CACHE_LINE_BYTES); for (j = 0; j < tm->n_vlib_mains; j++) { - vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, + vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); vec_reset_length (xd->tx_vectors[j]); } @@ -685,6 +732,13 @@ dpdk_lib_init (dpdk_main_t * dm) if (rv) return rv; + if (devconf->hqos_enabled) + { + rv = dpdk_port_setup_hqos (xd, &devconf->hqos); + if (rv < 0) + return rv; + } + /* count the number of descriptors used for this device */ nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; @@ -729,101 +783,16 @@ dpdk_lib_init (dpdk_main_t * dm) clib_warning ("VLAN strip cannot be supported by interface\n"); } -#if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) - /* - * Older VMXNET3 driver doesn't support jumbo / multi-buffer pkts - */ - else if (xd->pmd == VNET_DPDK_PMD_VMXNET3) - hi->max_packet_bytes = 1518; -#endif - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); } -#ifdef RTE_LIBRTE_KNI - if (dm->conf->num_kni) - { - clib_warning ("Initializing KNI interfaces..."); - rte_kni_init (dm->conf->num_kni); - for (i = 0; i < dm->conf->num_kni; i++) - { - u8 addr[6]; - int j; - - /* Create vnet interface */ - vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); - xd->dev_type = VNET_DPDK_DEV_KNI; - - xd->device_index = xd - dm->devices; - ASSERT (nports + i == xd->device_index); - xd->per_interface_next_index = ~0; - xd->kni_port_id = i; - xd->cpu_socket = -1; - hash_set (dm->dpdk_device_by_kni_port_id, i, xd - dm->devices); - xd->rx_q_used = 1; - - /* assign interface to input thread */ - dpdk_device_and_queue_t *dq; - vec_add2 (dm->devices_by_cpu[dm->input_cpu_first_index], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - - vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < tm->n_vlib_mains; j++) - { - vec_validate_ha (xd->tx_vectors[j], DPDK_TX_RING_SIZE, - sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->tx_vectors[j]); - } - - vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < xd->rx_q_used; j++) - { - vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->rx_vectors[j]); - } - - /* FIXME Set up one TX-queue per worker thread */ - - { - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - - clib_memcpy (addr + 2, &rnd, sizeof (rnd)); - addr[0] = 2; - addr[1] = 0xfe; - } - - error = ethernet_register_interface - (dm->vnet_main, dpdk_device_class.index, xd->device_index, - /* ethernet address */ addr, - &xd->vlib_hw_if_index, dpdk_flag_change); - - if (error) - return error; - - sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); - xd->vlib_sw_if_index = sw->sw_if_index; - hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); - } - } -#endif - if (nb_desc > dm->conf->num_mbufs) clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", dm->conf->num_mbufs, nb_desc); - /* init next vhost-user if index */ - dm->next_vu_if_id = 0; - return 0; } @@ -927,6 +896,8 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, } devconf->pci_addr.as_u32 = pci_addr.as_u32; + devconf->hqos_enabled = 0; + dpdk_device_config_hqos_default (&devconf->hqos); if (!input) return 0; @@ -956,6 +927,19 @@ dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; else if (unformat (input, "vlan-strip-offload on")) devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; + else + if (unformat + (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) + { + devconf->hqos_enabled = 1; + error = unformat_hqos (&sub_input, &devconf->hqos); + if (error) + break; + } + else if (unformat (input, "hqos")) + { + devconf->hqos_enabled = 1; + } else { error = clib_error_return (0, "unknown input `%U'", @@ -1004,9 +988,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); - // MATT-FIXME: inverted virtio-vhost logic to use virtio by default - conf->use_virtio_vhost = 1; - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { /* Prime the pump */ @@ -1064,18 +1045,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) ; else if (unformat (input, "socket-mem %s", &socket_mem)) ; - else - if (unformat - (input, "vhost-user-coalesce-frames %d", - &conf->vhost_coalesce_frames)) - ; - else - if (unformat - (input, "vhost-user-coalesce-time %f", - &conf->vhost_coalesce_time)) - ; - else if (unformat (input, "enable-vhost-user")) - conf->use_virtio_vhost = 0; else if (unformat (input, "no-pci")) { no_pci = 1; @@ -1131,7 +1100,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "default")) ; - else if (unformat (input, " ")) + else if (unformat_skip_white_space (input)) ; else { @@ -1199,57 +1168,22 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) /* *INDENT-OFF* */ clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( { - u32 pages_avail, page_size, mem; - u8 *s = 0; - u8 *p = 0; - char * numa_path = "/sys/devices/system/node/node%u/"; - char * nonnuma_path = "/sys/kernel/mm/"; - char * suffix = "hugepages/hugepages-%ukB/free_hugepages%c"; - char * path = NULL; - struct stat sb_numa, sb_nonnuma; - - p = format(p, numa_path, c); - if (stat(numa_path, &sb_numa) < 0) - sb_numa.st_mode = 0; - - if (stat(nonnuma_path, &sb_nonnuma) < 0) - sb_nonnuma.st_mode = 0; - - if (S_ISDIR(sb_numa.st_mode)) { - path = (char*)format((u8*)path, "%s%s", p, suffix); - } else if (S_ISDIR(sb_nonnuma.st_mode)) { - path = (char*)format((u8*)path, "%s%s", nonnuma_path, suffix); - } else { - use_1g = 0; - use_2m = 0; - vec_free(p); - break; - } + int pages_avail, page_size, mem; vec_validate(mem_by_socket, c); mem = mem_by_socket[c]; page_size = 1024; - pages_avail = 0; - s = format (s, path, page_size * 1024, 0); - vlib_sysfs_read ((char *) s, "%u", &pages_avail); - vec_reset_length (s); + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - if (page_size * pages_avail < mem) + if (pages_avail < 0 || page_size * pages_avail < mem) use_1g = 0; page_size = 2; - pages_avail = 0; - s = format (s, path, page_size * 1024, 0); - vlib_sysfs_read ((char *) s, "%u", &pages_avail); - vec_reset_length (s); + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - if (page_size * pages_avail < mem) + if (pages_avail < 0 || page_size * pages_avail < mem) use_2m = 0; - - vec_free(s); - vec_free(p); - vec_free(path); })); /* *INDENT-ON* */ @@ -1458,7 +1392,7 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now) u8 hw_flags_chg = 0; /* only update link state for PMD interfaces */ - if (xd->dev_type != VNET_DPDK_DEV_ETH) + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) return; xd->time_last_link_update = now ? now : xd->time_last_link_update; @@ -1512,7 +1446,6 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now) break; } } -#if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) { hw_flags_chg = 1; @@ -1540,35 +1473,6 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now) break; } } -#else - if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) - { - hw_flags_chg = 1; - switch (xd->link.link_speed) - { - case ETH_LINK_SPEED_10: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; - break; - case ETH_LINK_SPEED_100: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; - break; - case ETH_LINK_SPEED_1000: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; - break; - case ETH_LINK_SPEED_10000: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; - break; - case ETH_LINK_SPEED_40G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; - break; - case 0: - break; - default: - clib_warning ("unknown link speed %d", xd->link.link_speed); - break; - } - } -#endif if (hw_flags_chg) { if (LINK_STATE_ELOGS) @@ -1603,9 +1507,6 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) ethernet_main_t *em = ðernet_main; dpdk_device_t *xd; vlib_thread_main_t *tm = vlib_get_thread_main (); -#if DPDK_VHOST_USER - void *vu_state; -#endif int i; error = dpdk_lib_init (dm); @@ -1630,10 +1531,6 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) if (error) clib_error_report (error); -#if DPDK_VHOST_USER - dpdk_vhost_user_process_init (&vu_state); -#endif - tm->worker_thread_release = 1; f64 now = vlib_time_now (vm); @@ -1756,18 +1653,9 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) dpdk_update_link_state (xd, now); -#if DPDK_VHOST_USER - if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER) - if (dpdk_vhost_user_process_if (vm, xd, vu_state) != 0) - continue; -#endif } } -#if DPDK_VHOST_USER - dpdk_vhost_user_process_cleanup (vu_state); -#endif - return 0; } @@ -1844,10 +1732,6 @@ dpdk_init (vlib_main_t * vm) dm->efd.consec_full_frames_hi_thresh = DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH; - /* vhost-user coalescence frames defaults */ - dm->conf->vhost_coalesce_frames = 32; - dm->conf->vhost_coalesce_time = 1e-3; - /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID