#include <vnet/vnet.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include <dpdk/buffer.h>
#include <dpdk/device/dpdk.h>
#include <dpdk/cryptodev/cryptodev.h>
const char *pfx;
} if_name_prefixes[] = {
/* sorted, higher speed first */
- { ETH_LINK_SPEED_200G, "TwoHundredGigabitEthernet" },
- { ETH_LINK_SPEED_100G, "HundredGigabitEthernet" },
- { ETH_LINK_SPEED_56G, "FiftySixGigabitEthernet" },
- { ETH_LINK_SPEED_50G, "FiftyGigabitEthernet" },
- { ETH_LINK_SPEED_40G, "FortyGigabitEthernet" },
- { ETH_LINK_SPEED_25G, "TwentyFiveGigabitEthernet" },
- { ETH_LINK_SPEED_20G, "TwentyGigabitEthernet" },
- { ETH_LINK_SPEED_10G, "TenGigabitEthernet" },
- { ETH_LINK_SPEED_5G, "FiveGigabitEthernet" },
- { ETH_LINK_SPEED_2_5G, "TwoDotFiveGigabitEthernet" },
- { ETH_LINK_SPEED_1G, "GigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_200G, "TwoHundredGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_100G, "HundredGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_56G, "FiftySixGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_50G, "FiftyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_40G, "FortyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_25G, "TwentyFiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_20G, "TwentyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_10G, "TenGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_5G, "FiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_2_5G, "TwoDotFiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_1G, "GigabitEthernet" },
};
static clib_error_t *
{
dpdk_main_t *dm = &dpdk_main;
struct rte_pci_device *pci_dev;
- struct rte_vmbus_device *vmbus_dev;
vlib_pci_addr_t pci_addr;
+#ifdef __linux__
+ struct rte_vmbus_device *vmbus_dev;
vlib_vmbus_addr_t vmbus_addr;
+#endif /* __linux__ */
uword *p = 0;
if ((pci_dev = dpdk_get_pci_device (di)))
hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
}
+#ifdef __linux__
if ((vmbus_dev = dpdk_get_vmbus_device (di)))
{
unformat_input_t input_vmbus;
- unformat_init_string (&input_vmbus, di->device->name,
- strlen (di->device->name));
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ const char *dev_name = rte_dev_name (di->device);
+#else
+ const char *dev_name = di->device->name;
+#endif
+ unformat_init_string (&input_vmbus, dev_name, strlen (dev_name));
if (unformat (&input_vmbus, "%U", unformat_vlib_vmbus_addr, &vmbus_addr))
p = mhash_get (&dm->conf->device_config_index_by_vmbus_addr,
&vmbus_addr);
unformat_free (&input_vmbus);
}
+#endif /* __linux__ */
if (p)
return pool_elt_at_index (dm->conf->dev_confs, p[0]);
dm->default_port_conf.n_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
dm->default_port_conf.n_rx_queues = 1;
dm->default_port_conf.n_tx_queues = tm->n_vlib_mains;
- dm->default_port_conf.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
+ dm->default_port_conf.rss_hf =
+ RTE_ETH_RSS_IP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP;
dm->default_port_conf.max_lro_pkt_size = DPDK_MAX_LRO_SIZE_DEFAULT;
if ((clib_mem_get_default_hugepage_size () == 2 << 20) &&
dpdk_device_config_t *devconf = 0;
vnet_eth_interface_registration_t eir = {};
dpdk_driver_t *dr;
+ i8 numa_node;
if (!rte_eth_dev_is_valid_port (port_id))
continue;
pci_dev->addr.devid, pci_dev->addr.function);
else
xd->name = format (xd->name, "%u", port_id);
- }
- /* Handle representor devices that share the same PCI ID */
- if ((di.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) &&
- (di.switch_info.port_id != (uint16_t) -1))
- xd->name = format (xd->name, "/%d", di.switch_info.port_id);
+ /* Handle representor devices that share the same PCI ID */
+ if ((di.switch_info.domain_id !=
+ RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) &&
+ (di.switch_info.port_id != (uint16_t) -1))
+ xd->name = format (xd->name, "/%d", di.switch_info.port_id);
+ }
/* number of RX and TX queues */
if (devconf->num_tx_queues > 0)
q = di.max_rx_pktlen - di.max_mtu;
/* attempt to protect from bogus value provided by pmd */
- if (q < (2 * xd->driver_frame_overhead) && q > 0)
+ if (q < (2 * xd->driver_frame_overhead) && q > 0 &&
+ di.max_mtu != UINT16_MAX)
xd->driver_frame_overhead = q;
dpdk_log_debug ("[%u] min_mtu: %u, max_mtu: %u, min_rx_bufsize: %u, "
"max_rx_pktlen: %u, max_lro_pkt_size: %u",
vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1,
CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1,
+ CLIB_CACHE_LINE_BYTES);
rte_eth_macaddr_get (port_id, (void *) addr);
eir.cb.set_max_frame_size = dpdk_set_max_frame_size;
xd->hw_if_index = vnet_eth_register_interface (vnm, &eir);
hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
- hi->numa_node = xd->cpu_socket = (i8) rte_eth_dev_socket_id (port_id);
+ numa_node = (i8) rte_eth_dev_socket_id (port_id);
+ if (numa_node == SOCKET_ID_ANY)
+ /* numa_node is not set, default to 0 */
+ hi->numa_node = xd->cpu_socket = 0;
+ else
+ hi->numa_node = xd->cpu_socket = numa_node;
sw = vnet_get_hw_sw_interface (vnm, xd->hw_if_index);
xd->sw_if_index = sw->sw_if_index;
- dpdk_log_debug ("[%u] interface %s created", port_id, hi->name);
+ dpdk_log_debug ("[%u] interface %v created", port_id, hi->name);
+
+ if (devconf->tag)
+ vnet_set_sw_interface_tag (vnm, devconf->tag, sw->sw_if_index);
ethernet_set_flags (vnm, xd->hw_if_index,
ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
vnm, xd->hw_if_index, q, VNET_HW_IF_RXQ_THREAD_ANY);
}
+ for (q = 0; q < xd->conf.n_tx_queues; q++)
+ {
+ dpdk_tx_queue_t *txq = vec_elt_at_index (xd->tx_queues, q);
+ txq->queue_index =
+ vnet_hw_if_register_tx_queue (vnm, xd->hw_if_index, q);
+ }
+
+ for (q = 0; q < tm->n_vlib_mains; q++)
+ {
+ u32 qi = xd->tx_queues[q % xd->conf.n_tx_queues].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, q);
+ }
+
if (devconf->tso == DPDK_DEVICE_TSO_ON)
{
/*tcp_udp checksum must be enabled*/
if (xd->conf.enable_tcp_udp_checksum == 0)
dpdk_log_warn ("[%u] TCP/UDP checksum offload must be enabled",
xd->port_id);
- else if ((di.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) == 0)
+ else if ((di.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) == 0)
dpdk_log_warn ("[%u] TSO not supported by device", xd->port_id);
else
xd->conf.enable_tso = 1;
int i;
addrs = vlib_pci_get_all_dev_addrs ();
- /* *INDENT-OFF* */
vec_foreach (addr, addrs)
{
dpdk_device_config_t * devconf = 0;
else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
;
/* all Intel QAT devices VFs */
- else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_PROCESSOR_CO &&
- (d->device_id == 0x0443 || d->device_id == 0x18a1 || d->device_id == 0x19e3 ||
- d->device_id == 0x37c9 || d->device_id == 0x6f55))
+ else if (d->vendor_id == 0x8086 &&
+ d->device_class == PCI_CLASS_PROCESSOR_CO &&
+ (d->device_id == 0x0443 || d->device_id == 0x18a1 ||
+ d->device_id == 0x19e3 || d->device_id == 0x37c9 ||
+ d->device_id == 0x6f55 || d->device_id == 0x18ef ||
+ d->device_id == 0x4941 || d->device_id == 0x4943 ||
+ d->device_id == 0x4945))
;
/* Cisco VIC */
else if (d->vendor_id == 0x1137 &&
{
continue;
}
- /* Mellanox CX6, CX6VF, CX6DX, CX6DXVF */
- else if (d->vendor_id == 0x15b3 && d->device_id >= 0x101b && d->device_id <= 0x101e)
+ /* Mellanox CX6, CX6VF, CX6DX, CX6DXVF, CX6LX */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id >= 0x101b && d->device_id <= 0x101f))
{
- continue;
+ continue;
+ }
+ /* Mellanox CX7 */
+ else if (d->vendor_id == 0x15b3 && d->device_id == 0x1021)
+ {
+ continue;
+ }
+ /* Mellanox BF, BFVF */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id >= 0xa2d2 && d->device_id <= 0Xa2d3))
+ {
+ continue;
+ }
+ /* Mellanox BF2, BF3 */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id == 0xa2d6 || d->device_id == 0xa2dc))
+ {
+ continue;
}
/* Broadcom NetXtreme S, and E series only */
else if (d->vendor_id == 0x14e4 &&
d->device_id == 0x1614 || d->device_id == 0x1606 ||
d->device_id == 0x1609 || d->device_id == 0x1614)))
;
+ /* Google vNIC */
+ else if (d->vendor_id == 0x1ae0 && d->device_id == 0x0042)
+ ;
else
{
dpdk_log_warn ("Unsupported PCI device 0x%04x:0x%04x found "
continue;
}
- error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name);
+ error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name,
+ conf->uio_bind_force);
if (error)
{
clib_error_report (error);
}
}
- /* *INDENT-ON* */
vec_free (pci_addr);
vlib_pci_free_device_info (d);
}
addrs = vlib_vmbus_get_all_dev_addrs ();
- /* *INDENT-OFF* */
vec_foreach (addr, addrs)
{
dpdk_device_config_t *devconf = 0;
clib_error_report (error);
}
}
- /* *INDENT-ON* */
}
uword
;
else if (unformat (input, "name %v", &devconf->name))
;
+ else if (unformat (input, "tag %s", &devconf->tag))
+ ;
else if (unformat (input, "workers %U", unformat_bitmap_list,
&devconf->workers))
;
return 0;
}
+static clib_error_t *
+dpdk_set_stat_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_STATS_POLL_INTERVAL)
+ return clib_error_return (0, "wrong stats-poll-interval value");
+
+ dpdk_main.stat_poll_interval = interval;
+ return 0;
+}
+
+static clib_error_t *
+dpdk_set_link_state_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_LINK_POLL_INTERVAL)
+ return clib_error_return (0, "wrong link-state-poll-interval value");
+
+ dpdk_main.link_state_poll_interval = interval;
+ return 0;
+}
+
static clib_error_t *
dpdk_config (vlib_main_t * vm, unformat_input_t * input)
{
u32 vendor, device, domain, bus, func;
void *fmt_func;
void *fmt_addr;
+ f64 poll_interval;
huge_dir_path =
format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
else if (unformat (input, "max-simd-bitwidth %U",
unformat_max_simd_bitwidth, &conf->max_simd_bitwidth))
;
+ else if (unformat (input, "link-state-poll-interval %f", &poll_interval))
+ {
+ error = dpdk_set_link_state_poll_interval (poll_interval);
+ if (error != 0)
+ return error;
+ }
+ else if (unformat (input, "stats-poll-interval %f", &poll_interval))
+ {
+ error = dpdk_set_stat_poll_interval (poll_interval);
+ if (error != 0)
+ return error;
+ }
else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
&sub_input))
{
}
else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
;
+ else if (unformat (input, "uio-bind-force"))
+ conf->uio_bind_force = 1;
else if (unformat (input, "socket-mem %s", &socket_mem))
;
else if (unformat (input, "no-pci"))
default_hugepage_sz = clib_mem_get_default_hugepage_size ();
- /* *INDENT-OFF* */
clib_bitmap_foreach (x, tm->cpu_socket_bitmap)
{
clib_error_t *e;
if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages)))
clib_error_report (e);
}
- /* *INDENT-ON* */
}
/* on/off dpdk's telemetry thread */
ed->new_link_state = (u8) xd->link.link_status;
}
- if ((xd->link.link_duplex != prev_link.link_duplex))
+ hw_flags_chg = ((xd->link.link_duplex != prev_link.link_duplex) ||
+ (xd->link.link_status != prev_link.link_status));
+
+ if (xd->link.link_speed != prev_link.link_speed)
+ vnet_hw_interface_set_link_speed (vnm, xd->hw_if_index,
+ (xd->link.link_speed == UINT32_MAX) ?
+ UINT32_MAX :
+ xd->link.link_speed * 1000);
+
+ if (hw_flags_chg)
{
- hw_flags_chg = 1;
+ if (xd->link.link_status)
+ hw_flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
+
switch (xd->link.link_duplex)
{
- case ETH_LINK_HALF_DUPLEX:
+ case RTE_ETH_LINK_HALF_DUPLEX:
hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX;
break;
- case ETH_LINK_FULL_DUPLEX:
+ case RTE_ETH_LINK_FULL_DUPLEX:
hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX;
break;
default:
break;
}
- }
- if (xd->link.link_speed != prev_link.link_speed)
- vnet_hw_interface_set_link_speed (vnm, xd->hw_if_index,
- xd->link.link_speed * 1000);
- if (xd->link.link_status != prev_link.link_status)
- {
- hw_flags_chg = 1;
-
- if (xd->link.link_status)
- hw_flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
- }
-
- if (hw_flags_chg)
- {
if (LINK_STATE_ELOGS)
{
ELOG_TYPE_DECLARE (e) =
dpdk_device_t *xd;
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_worker_thread_barrier_sync (vm);
error = dpdk_lib_init (dm);
if (error)
}
}
+ vlib_worker_thread_barrier_release (vm);
tm->worker_thread_release = 1;
f64 now = vlib_time_now (vm);
dpdk_update_link_state (xd, now);
}
+ f64 timeout =
+ clib_min (dm->link_state_poll_interval, dm->stat_poll_interval);
+
while (1)
{
- /*
- * check each time through the loop in case intervals are changed
- */
- f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
- dm->link_state_poll_interval : dm->stat_poll_interval;
-
+ f64 min_wait = clib_max (timeout, DPDK_MIN_POLL_INTERVAL);
vlib_process_wait_for_event_or_clock (vm, min_wait);
+ timeout =
+ clib_min (dm->link_state_poll_interval, dm->stat_poll_interval);
+
if (dm->admin_up_down_in_progress)
/* skip the poll if an admin up down is in progress (on any interface) */
continue;
dpdk_update_link_state (xd, now);
}
- }
+ now = vlib_time_now (vm);
+ vec_foreach (xd, dm->devices)
+ {
+ timeout = clib_min (timeout, xd->time_last_stats_update +
+ dm->stat_poll_interval - now);
+ timeout = clib_min (timeout, xd->time_last_link_update +
+ dm->link_state_poll_interval - now);
+ }
+ }
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dpdk_process_node,static) = {
.function = dpdk_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dpdk-process",
.process_log2_n_stack_bytes = 17,
};
-/* *INDENT-ON* */
static clib_error_t *
dpdk_init (vlib_main_t * vm)