extern int rte_netmap_probe(void);
#endif
+void
+dpdk_device_lock_init(dpdk_device_t * xd)
+{
+ int q;
+ vec_validate(xd->lockp, xd->tx_q_used - 1);
+ for (q = 0; q < xd->tx_q_used; q++)
+ {
+ xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
+ }
+ xd->need_txlock = 1;
+}
+
+void
+dpdk_device_lock_free(dpdk_device_t * xd)
+{
+ int q;
+
+ for (q = 0; q < vec_len(xd->lockp); q++)
+ clib_mem_free((void *) xd->lockp[q]);
+ vec_free(xd->lockp);
+ xd->lockp = 0;
+ xd->need_txlock = 0;
+}
+
static clib_error_t *
dpdk_lib_init (dpdk_main_t * dm)
{
memcpy(&xd->port_conf, &port_conf_template, sizeof(struct rte_eth_conf));
- xd->tx_q_used = dev_info.max_tx_queues < tm->n_vlib_mains ?
- 1 : tm->n_vlib_mains;
+ xd->tx_q_used = clib_min(dev_info.max_tx_queues, tm->n_vlib_mains);
+
+ if (dm->max_tx_queues)
+ xd->tx_q_used = clib_min(xd->tx_q_used, dm->max_tx_queues);
if (dm->use_rss > 1 && dev_info.max_rx_queues >= dm->use_rss)
{
/* Cisco VIC */
case VNET_DPDK_PMD_VICE:
case VNET_DPDK_PMD_ENIC:
- rte_eth_link_get_nowait(xd->device_index, &l);
+ rte_eth_link_get_nowait(i, &l);
if (l.link_speed == 40000)
{
xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
break;
case I40E_DEV_ID_VF:
- rte_eth_link_get_nowait(xd->device_index, &l);
+ rte_eth_link_get_nowait(i, &l);
xd->port_type = l.link_speed == 10000 ?
VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G;
break;
xd->af_packet_port_id = af_packet_port_id++;
break;
+ case VNET_DPDK_PMD_BOND:
+ xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND;
+ break;
+
default:
xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
}
rte_eth_macaddr_get(i,(struct ether_addr *)addr);
if (xd->tx_q_used < tm->n_vlib_mains)
- {
- xd->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
- CLIB_CACHE_LINE_BYTES);
- memset ((void *) xd->lockp, 0, CLIB_CACHE_LINE_BYTES);
- }
+ dpdk_device_lock_init(xd);
xd->device_index = xd - dm->devices;
ASSERT(i == xd->device_index);
rte_eth_dev_set_mtu(xd->device_index, hi->max_packet_bytes);
}
+#ifdef RTE_LIBRTE_KNI
if (dm->num_kni) {
clib_warning("Initializing KNI interfaces...");
rte_kni_init(dm->num_kni);
hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index);
}
}
+#endif
if (nb_desc > dm->num_mbufs)
clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n",
return 0;
}
-/*
- * Tell the vlib physical memory allocator that we've handled
- * the initialization. We don't actually do so until
- * vlib_main(...) callls the dpdk config function.
- */
-int vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm,
- int physmem_required)
-{
- return 1;
-}
-
static clib_error_t *
write_sys_fs (char * file_name, char * fmt, ...)
{
*/
if (bind_uio)
{
- int pci_vendor_id = strtol((char *) pci_vid, NULL, 16);
- int pci_device_id = strtol((char *) pci_did, NULL, 16);
-
- /*
- * Set PCI ID to ".../virtio-pci/new_id" for Intel fortvile adapaters
- */
- if (pci_vendor_id == 0x8086 &&
- (pci_device_id == I40E_DEV_ID_10G_BASE_T ||
- pci_device_id == I40E_DEV_ID_SFP_XL710 ||
- pci_device_id == I40E_DEV_ID_QSFP_A ||
- pci_device_id == I40E_DEV_ID_QSFP_B ||
- pci_device_id == I40E_DEV_ID_QSFP_C))
- {
- _vec_len (path) = 0;
- path = format (path, "/sys/bus/pci/drivers/%s/new_id%c", driver_name, 0);
- error = write_sys_fs ((char *) path, "%s %s", pci_vid, pci_did);
- if (error)
- continue;
- }
+ _vec_len (path) = 0;
+ path = format (path, "/sys/bus/pci/drivers/%s/new_id%c", driver_name, 0);
+ error = write_sys_fs ((char *) path, "%s %s", pci_vid, pci_did);
+ if (error)
+ continue;
_vec_len (path) = 0;
path = format (path, "/sys/bus/pci/drivers/%s/bind%c", driver_name, 0);
return error;
}
-static uword
-unformat_socket_mem (unformat_input_t * input, va_list * va)
-{
- uword ** r = va_arg (* va, uword **);
- int i = 0;
- u32 mem;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, ","))
- hash_set (*r, i, 1024);
- else if (unformat (input, "%u,", &mem))
- hash_set (*r, i, mem);
- else if (unformat (input, "%u", &mem))
- hash_set (*r, i, mem);
- else
- {
- unformat_put_input (input);
- goto done;
- }
- i++;
- }
-
-done:
- return 1;
-}
-
static u32
get_node_free_hugepages_num (u32 node, u32 page_size)
{
no_huge = 1;
}
+ else if (unformat (input, "enable-tcp-udp-checksum"))
+ {
+ dm->buffer_flags_template &=
+ ~(IP_BUFFER_L4_CHECKSUM_CORRECT | IP_BUFFER_L4_CHECKSUM_COMPUTED);
+ }
+
else if (unformat (input, "decimal-interface-names"))
dm->interface_name_format_decimal = 1;
else if (unformat (input, "num-mbufs %d", &dm->num_mbufs))
;
+ else if (unformat (input, "max-tx-queues %d", &dm->max_tx_queues))
+ ;
else if (unformat (input, "kni %d", &dm->num_kni))
;
else if (unformat (input, "uio-driver %s", &dm->uio_driver_name))
if (!dm->coremask_set_manually)
{
vlib_thread_registration_t * tr;
- uword coremask;
+ uword * coremask = 0;
int i;
/* main thread core */
- coremask = 1 << tm->main_lcore;
+ coremask = clib_bitmap_set(coremask, tm->main_lcore, 1);
for (i = 0; i < vec_len (tm->registrations); i++)
{
tr = tm->registrations[i];
- if (clib_bitmap_is_zero(tr->coremask))
- continue;
- coremask |= tr->coremask[0];
+ coremask = clib_bitmap_or(coremask, tr->coremask);
}
vec_insert (dm->eal_init_args, 2, 1);
dm->eal_init_args[1] = (u8 *) "-c";
- tmp = format (0, "%x%c", coremask, 0);
+ tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
dm->eal_init_args[2] = tmp;
+ clib_bitmap_free(coremask);
}
if (!dm->nchannels_set_manually)
vm = dm->vlib_main;
+ /* make copy of args as rte_eal_init tends to mess up with arg array */
+ for (i = 1; i < vec_len(dm->eal_init_args); i++)
+ dm->eal_init_args_str = format(dm->eal_init_args_str, "%s ",
+ dm->eal_init_args[i]);
+
ret = rte_eal_init(vec_len(dm->eal_init_args), (char **) dm->eal_init_args);
/* lazy umount hugepages */
if (ret < 0)
return clib_error_return (0, "rte_eal_init returned %d", ret);
+ /* Dump the physical memory layout prior to creating the mbuf_pool */
+ fprintf(stdout, "DPDK physical memory layout:\n");
+ rte_dump_physmem_layout(stdout);
+
/* main thread 1st */
error = vlib_buffer_pool_create(vm, dm->num_mbufs, MBUF_SIZE, rte_socket_id());
if (error)
vlib_frame_t * f)
{
clib_error_t * error;
+ vnet_main_t * vnm = vnet_get_main();
dpdk_main_t * dm = &dpdk_main;
+ ethernet_main_t * em = ðernet_main;
dpdk_device_t * xd;
vlib_thread_main_t * tm = vlib_get_thread_main();
void *vu_state;
dpdk_update_link_state (xd, now);
}
+{ // Setup MACs for bond interfaces and their links which was initialized in
+ // dpdk_port_setup() but needs to be done again here to take effect.
+ int nports = rte_eth_dev_count();
+ if (nports > 0) {
+ for (i = 0; i < nports; i++) {
+ struct rte_eth_dev_info dev_info;
+ rte_eth_dev_info_get(i, &dev_info);
+ if (!dev_info.driver_name)
+ dev_info.driver_name = dev_info.pci_dev->driver->name;
+ ASSERT(dev_info.driver_name);
+ if (strncmp(dev_info.driver_name, "rte_bond_pmd", 12) == 0) {
+ u8 addr[6];
+ u8 slink[16];
+ int nlink = rte_eth_bond_slaves_get(i, slink, 16);
+ if (nlink > 0) {
+ vnet_hw_interface_t * hi;
+ ethernet_interface_t * ei;
+ /* Get MAC of 1st slave link */
+ rte_eth_macaddr_get(slink[0], (struct ether_addr *)addr);
+ /* Set MAC of bounded interface to that of 1st slave link */
+ rte_eth_bond_mac_address_set(i, (struct ether_addr *)addr);
+ /* Populate MAC of bonded interface in VPP hw tables */
+ hi = vnet_get_hw_interface (
+ vnm, dm->devices[i].vlib_hw_if_index);
+ ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
+ memcpy (hi->hw_address, addr, 6);
+ memcpy (ei->address, addr, 6);
+ /* Add MAC to other slave links */
+ while (nlink > 1) {
+ nlink--;
+ rte_eth_dev_mac_addr_add(
+ slink[nlink], (struct ether_addr *)addr, 0);
+ }
+ }
+ }
+ }
+ }
+}
+
while (1)
{
- vlib_process_wait_for_event_or_clock (vm, 5.0);
+ /*
+ * check each time through the loop in case intervals are changed
+ */
+ f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
+ dm->link_state_poll_interval : dm->stat_poll_interval;
+
+ vlib_process_wait_for_event_or_clock (vm, min_wait);
if (dpdk_get_admin_up_down_in_progress())
/* skip the poll if an admin up down is in progress (on any interface) */
vec_foreach (xd, dm->devices)
{
f64 now = vlib_time_now (vm);
- if ((now - xd->time_last_stats_update) >= DPDK_STATS_POLL_INTERVAL)
+ if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval)
dpdk_update_counters (xd, now);
- if ((now - xd->time_last_link_update) >= DPDK_LINK_POLL_INTERVAL)
+ if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval)
dpdk_update_link_state (xd, now);
if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER)
.process_log2_n_stack_bytes = 17,
};
+int dpdk_set_stat_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_STATS_POLL_INTERVAL)
+ return (VNET_API_ERROR_INVALID_VALUE);
+
+ dpdk_main.stat_poll_interval = interval;
+
+ return 0;
+}
+
+int dpdk_set_link_state_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_LINK_POLL_INTERVAL)
+ return (VNET_API_ERROR_INVALID_VALUE);
+
+ dpdk_main.link_state_poll_interval = interval;
+
+ return 0;
+}
+
clib_error_t *
dpdk_init (vlib_main_t * vm)
{
_(rte_cxgbe_driver)
#endif
+#ifdef RTE_LIBRTE_PMD_BOND
+ _(bond_drv)
+#endif
+
#undef _
/*
dm->vhost_coalesce_frames = 32;
dm->vhost_coalesce_time = 1e-3;
+ /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
+ dm->buffer_flags_template =
+ (VLIB_BUFFER_TOTAL_LENGTH_VALID
+ | IP_BUFFER_L4_CHECKSUM_COMPUTED
+ | IP_BUFFER_L4_CHECKSUM_CORRECT);
+
+ dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL;
+ dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;
+
/* init CLI */
if ((error = vlib_call_init_function (vm, dpdk_cli_init)))
return error;