X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fdpdk%2Fdevice%2Fcommon.c;h=dc6b0c1c9529049f45f56326c87795226fa7530d;hb=006c071b0a5f0bfc1302bad17cf018792ea2f712;hp=b3de5cf652cbdedaa0bdb6a0ebda95d85003c701;hpb=70e808b1bfc25d1664ddb917457040b419e123c8;p=vpp.git diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index b3de5cf652c..dc6b0c1c952 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -29,11 +29,26 @@ #include #include +/* DPDK TX offload to vnet hw interface caps mapppings */ +static struct +{ + u64 offload; + vnet_hw_if_caps_t caps; +} tx_off_caps_map[] = { + { RTE_ETH_TX_OFFLOAD_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_CKSUM }, + { RTE_ETH_TX_OFFLOAD_TCP_CKSUM, VNET_HW_IF_CAP_TX_TCP_CKSUM }, + { RTE_ETH_TX_OFFLOAD_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_CKSUM }, + { RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_OUTER_CKSUM }, + { RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_OUTER_CKSUM }, + { RTE_ETH_TX_OFFLOAD_TCP_TSO, VNET_HW_IF_CAP_TCP_GSO }, + { RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO, VNET_HW_IF_CAP_VXLAN_TNL_GSO } +}; + void dpdk_device_error (dpdk_device_t * xd, char *str, int rv) { - dpdk_log_err ("Interface %U error %d: %s", - format_dpdk_device_name, xd->port_id, rv, rte_strerror (rv)); + dpdk_log_err ("Interface %U error %d: %s", format_dpdk_device_name, + xd->device_index, rv, rte_strerror (rv)); xd->errors = clib_error_return (xd->errors, "%s[port:%d, errno:%d]: %s", str, xd->port_id, rv, rte_strerror (rv)); } @@ -43,11 +58,14 @@ dpdk_device_setup (dpdk_device_t * xd) { vlib_main_t *vm = vlib_get_main (); vnet_main_t *vnm = vnet_get_main (); - vlib_thread_main_t *tm = vlib_get_thread_main (); vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index); vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index); + u16 buf_sz = vlib_buffer_get_default_data_size (vm); + vnet_hw_if_caps_change_t caps = {}; struct rte_eth_dev_info dev_info; - u64 bitmap; + struct rte_eth_conf conf = {}; + u64 rxo, txo; + u32 max_frame_size; int rv; int j; @@ -62,66 +80,148 @@ dpdk_device_setup (dpdk_device_t * xd) dpdk_device_stop (xd); } - /* Enable flow director when flows exist */ - if (xd->pmd == VNET_DPDK_PMD_I40E) + rte_eth_dev_info_get (xd->port_id, &dev_info); + + dpdk_log_debug ("[%u] configuring device %U", xd->port_id, + format_dpdk_rte_device, dev_info.device); + + /* create rx and tx offload wishlist */ + rxo = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; + txo = 0; + + if (xd->conf.enable_tcp_udp_checksum) + rxo |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM; + + if (xd->conf.disable_tx_checksum_offload == 0 && + xd->conf.enable_outer_checksum_offload) + txo |= + RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM; + + if (xd->conf.disable_tx_checksum_offload == 0) + txo |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM | + RTE_ETH_TX_OFFLOAD_UDP_CKSUM; + + if (xd->conf.disable_multi_seg == 0) { - if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0) - xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT; - else - xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_NONE; + txo |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS; + rxo |= RTE_ETH_RX_OFFLOAD_SCATTER; +#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) + rxo |= DEV_RX_OFFLOAD_JUMBO_FRAME; +#endif } - rte_eth_dev_info_get (xd->port_id, &dev_info); - - bitmap = xd->port_conf.txmode.offloads & ~dev_info.tx_offload_capa; - if (bitmap) + if (xd->conf.enable_lro) + rxo |= RTE_ETH_RX_OFFLOAD_TCP_LRO; + + /* per-device offload config */ + if (xd->conf.enable_tso) + txo |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_TSO | + RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO; + + if (xd->conf.disable_rx_scatter) + rxo &= ~RTE_ETH_RX_OFFLOAD_SCATTER; + + /* mask unsupported offloads */ + rxo &= dev_info.rx_offload_capa; + txo &= dev_info.tx_offload_capa; + + dpdk_log_debug ("[%u] Supported RX offloads: %U", xd->port_id, + format_dpdk_rx_offload_caps, dev_info.rx_offload_capa); + dpdk_log_debug ("[%u] Configured RX offloads: %U", xd->port_id, + format_dpdk_rx_offload_caps, rxo); + dpdk_log_debug ("[%u] Supported TX offloads: %U", xd->port_id, + format_dpdk_tx_offload_caps, dev_info.tx_offload_capa); + dpdk_log_debug ("[%u] Configured TX offloads: %U", xd->port_id, + format_dpdk_tx_offload_caps, txo); + + /* finalize configuration */ + conf.rxmode.offloads = rxo; + conf.txmode.offloads = txo; + if (rxo & RTE_ETH_RX_OFFLOAD_TCP_LRO) + conf.rxmode.max_lro_pkt_size = xd->conf.max_lro_pkt_size; + + if (xd->conf.enable_lsc_int) + conf.intr_conf.lsc = 1; + if (xd->conf.enable_rxq_int) + conf.intr_conf.rxq = 1; + + conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE; + if (xd->conf.n_rx_queues > 1) { - dpdk_log_warn ("unsupported tx offloads requested on port %u: %U", - xd->port_id, format_dpdk_tx_offload_caps, bitmap); - xd->port_conf.txmode.offloads ^= bitmap; + if (xd->conf.disable_rss == 0) + { + conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS; + conf.rx_adv_conf.rss_conf.rss_hf = xd->conf.rss_hf; + } } - bitmap = xd->port_conf.rxmode.offloads & ~dev_info.rx_offload_capa; - if (bitmap) +#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) + if (rxo & DEV_RX_OFFLOAD_JUMBO_FRAME) + { + conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; + xd->max_supported_frame_size = dev_info.max_rx_pktlen; + } + else { - dpdk_log_warn ("unsupported rx offloads requested on port %u: %U", - xd->port_id, format_dpdk_rx_offload_caps, bitmap); - xd->port_conf.rxmode.offloads ^= bitmap; + xd->max_supported_frame_size = + clib_min (1500 + xd->driver_frame_overhead, buf_sz); } +#else + if (xd->conf.disable_multi_seg) + xd->max_supported_frame_size = clib_min (dev_info.max_rx_pktlen, buf_sz); + else + xd->max_supported_frame_size = dev_info.max_rx_pktlen; +#endif + + max_frame_size = clib_min (xd->max_supported_frame_size, + ethernet_main.default_mtu + hi->frame_overhead); - rv = rte_eth_dev_configure (xd->port_id, xd->rx_q_used, - xd->tx_q_used, &xd->port_conf); +#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0) + conf.rxmode.mtu = max_frame_size - xd->driver_frame_overhead; +#endif - if (rv < 0) +retry: + rv = rte_eth_dev_configure (xd->port_id, xd->conf.n_rx_queues, + xd->conf.n_tx_queues, &conf); + if (rv < 0 && conf.intr_conf.rxq) { - dpdk_device_error (xd, "rte_eth_dev_configure", rv); - goto error; + conf.intr_conf.rxq = 0; + goto retry; } - vec_validate_aligned (xd->tx_queues, xd->tx_q_used - 1, +#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0) + rte_eth_dev_set_mtu (xd->port_id, + max_frame_size - xd->driver_frame_overhead); +#endif + + hi->max_frame_size = 0; + vnet_hw_interface_set_max_frame_size (vnm, xd->hw_if_index, max_frame_size); + dpdk_log_debug ("[%u] max_frame_size %u max max_frame_size %u " + "driver_frame_overhead %u", + xd->port_id, hi->max_frame_size, + xd->max_supported_frame_size, xd->driver_frame_overhead); + + vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1, CLIB_CACHE_LINE_BYTES); - for (j = 0; j < xd->tx_q_used; j++) + for (j = 0; j < xd->conf.n_tx_queues; j++) { - rv = - rte_eth_tx_queue_setup (xd->port_id, j, xd->nb_tx_desc, - xd->cpu_socket, &xd->tx_conf); + rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc, + xd->cpu_socket, 0); /* retry with any other CPU socket */ if (rv < 0) - rv = - rte_eth_tx_queue_setup (xd->port_id, j, - xd->nb_tx_desc, SOCKET_ID_ANY, - &xd->tx_conf); + rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc, + SOCKET_ID_ANY, 0); if (rv < 0) dpdk_device_error (xd, "rte_eth_tx_queue_setup", rv); - if (xd->tx_q_used < tm->n_vlib_mains) - clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock); + clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock); } - vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1, + vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1, CLIB_CACHE_LINE_BYTES); - for (j = 0; j < xd->rx_q_used; j++) + + for (j = 0; j < xd->conf.n_rx_queues; j++) { dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, j); u8 bpidx = vlib_buffer_pool_get_default_for_numa ( @@ -129,12 +229,12 @@ dpdk_device_setup (dpdk_device_t * xd) vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx); struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx]; - rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc, + rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc, xd->cpu_socket, 0, mp); /* retry with any other CPU socket */ if (rv < 0) - rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc, + rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc, SOCKET_ID_ANY, 0, mp); rxq->buffer_pool_index = bp->index; @@ -146,7 +246,40 @@ dpdk_device_setup (dpdk_device_t * xd) if (vec_len (xd->errors)) goto error; - rte_eth_dev_set_mtu (xd->port_id, hi->max_packet_bytes); + xd->buffer_flags = + (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID); + + if ((rxo & (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM)) == + (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM)) + xd->buffer_flags |= + (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); + + dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_RX_IP4_CKSUM, + rxo & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM); + dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_MAYBE_MULTISEG, + rxo & RTE_ETH_RX_OFFLOAD_SCATTER); + dpdk_device_flag_set ( + xd, DPDK_DEVICE_FLAG_TX_OFFLOAD, + (txo & (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) == + (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM)); + + /* unconditionally set mac filtering cap */ + caps.val = caps.mask = VNET_HW_IF_CAP_MAC_FILTER; + + ethernet_set_flags (vnm, xd->hw_if_index, + ETHERNET_INTERFACE_FLAG_DEFAULT_L3); + + for (int i = 0; i < ARRAY_LEN (tx_off_caps_map); i++) + { + __typeof__ (tx_off_caps_map[0]) *v = tx_off_caps_map + i; + caps.mask |= v->caps; + if ((v->offload & txo) == v->offload) + caps.val |= v->caps; + } + + vnet_hw_if_change_caps (vnm, xd->hw_if_index, &caps); + xd->enabled_rx_off = rxo; + xd->enabled_tx_off = txo; if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) dpdk_device_start (xd); @@ -186,17 +319,18 @@ dpdk_setup_interrupts (dpdk_device_t *xd) { vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index); + int int_mode = 0; if (!hi) return; - if (!xd->port_conf.intr_conf.rxq) + if (!xd->conf.enable_rxq_int) return; /* Probe for interrupt support */ if (rte_eth_dev_rx_intr_enable (xd->port_id, 0)) { dpdk_log_info ("probe for interrupt mode for device %U. Failed.\n", - format_dpdk_device_name, xd->port_id); + format_dpdk_device_name, xd->device_index); } else { @@ -204,13 +338,13 @@ dpdk_setup_interrupts (dpdk_device_t *xd) if (!(xd->flags & DPDK_DEVICE_FLAG_INT_UNMASKABLE)) rte_eth_dev_rx_intr_disable (xd->port_id, 0); dpdk_log_info ("Probe for interrupt mode for device %U. Success.\n", - format_dpdk_device_name, xd->port_id); + format_dpdk_device_name, xd->device_index); } if (xd->flags & DPDK_DEVICE_FLAG_INT_SUPPORTED) { - hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE; - for (int q = 0; q < xd->rx_q_used; q++) + int_mode = 1; + for (int q = 0; q < xd->conf.n_rx_queues; q++) { dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q); clib_file_t f = { 0 }; @@ -218,15 +352,15 @@ dpdk_setup_interrupts (dpdk_device_t *xd) if (rxq->efd < 0) { xd->flags &= ~DPDK_DEVICE_FLAG_INT_SUPPORTED; - hi->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE; + int_mode = 0; break; } f.read_function = dpdk_rx_read_ready; f.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED; f.file_descriptor = rxq->efd; f.private_data = rxq->queue_index; - f.description = - format (0, "%U queue %u", format_dpdk_device_name, xd->port_id, q); + f.description = format (0, "%U queue %u", format_dpdk_device_name, + xd->device_index, q); rxq->clib_file_index = clib_file_add (&file_main, &f); vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index, rxq->clib_file_index); @@ -239,6 +373,11 @@ dpdk_setup_interrupts (dpdk_device_t *xd) } } } + + if (int_mode) + vnet_hw_if_set_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE); + else + vnet_hw_if_unset_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE); vnet_hw_if_update_runtime_data (vnm, xd->hw_if_index); } @@ -258,6 +397,11 @@ dpdk_device_start (dpdk_device_t * xd) return; } + dpdk_log_debug ("[%u] RX burst function: %U", xd->port_id, + format_dpdk_burst_fn, xd, VLIB_RX); + dpdk_log_debug ("[%u] TX burst function: %U", xd->port_id, + format_dpdk_burst_fn, xd, VLIB_TX); + dpdk_setup_interrupts (xd); if (xd->default_mac_address) @@ -274,8 +418,8 @@ dpdk_device_start (dpdk_device_t * xd) rte_eth_allmulticast_enable (xd->port_id); - dpdk_log_info ("Interface %U started", - format_dpdk_device_name, xd->port_id); + dpdk_log_info ("Interface %U started", format_dpdk_device_name, + xd->device_index); } void @@ -288,8 +432,8 @@ dpdk_device_stop (dpdk_device_t * xd) rte_eth_dev_stop (xd->port_id); clib_memset (&xd->link, 0, sizeof (struct rte_eth_link)); - dpdk_log_info ("Interface %U stopped", - format_dpdk_device_name, xd->port_id); + dpdk_log_info ("Interface %U stopped", format_dpdk_device_name, + xd->device_index); } void vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); @@ -310,10 +454,11 @@ dpdk_port_state_callback_inline (dpdk_portid_t port_id, rte_eth_link_get_nowait (port_id, &link); u8 link_up = link.link_status; if (link_up) - dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s", - port_id, (unsigned) link.link_speed, - (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? - "full-duplex" : "half-duplex"); + dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s", port_id, + (unsigned) link.link_speed, + (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ? + "full-duplex" : + "half-duplex"); else dpdk_log_info ("Port %d Link Down\n\n", port_id); @@ -336,7 +481,11 @@ dpdk_get_pci_device (const struct rte_eth_dev_info *info) const struct rte_bus *bus; bus = rte_bus_find_by_device (info->device); +#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0) + if (bus && !strcmp (rte_bus_name (bus), "pci")) +#else if (bus && !strcmp (bus->name, "pci")) +#endif return RTE_DEV_TO_PCI (info->device); else return NULL; @@ -349,7 +498,11 @@ dpdk_get_vmbus_device (const struct rte_eth_dev_info *info) const struct rte_bus *bus; bus = rte_bus_find_by_device (info->device); +#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0) + if (bus && !strcmp (rte_bus_name (bus), "vmbus")) +#else if (bus && !strcmp (bus->name, "vmbus")) +#endif return container_of (info->device, struct rte_vmbus_device, device); else return NULL;