#include <vlib/log.h>
#include <vnet/ethernet/ethernet.h>
+#include <dpdk/buffer.h>
#include <dpdk/device/dpdk.h>
#include <vlib/pci/pci.h>
#include <vlib/vmbus/vmbus.h>
#include <sys/mount.h>
#include <string.h>
#include <fcntl.h>
+#include <dirent.h>
#include <dpdk/device/dpdk_priv.h>
}
}
-static struct rte_mempool_ops *
-get_ops_by_name (char *ops_name)
+static int
+dpdk_port_crc_strip_enabled (dpdk_device_t * xd)
{
- u32 i;
-
- for (i = 0; i < rte_mempool_ops_table.num_ops; i++)
- {
- if (!strcmp (ops_name, rte_mempool_ops_table.ops[i].name))
- return &rte_mempool_ops_table.ops[i];
- }
-
- return 0;
+ return !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC);
}
+/* The funciton check_l3cache helps check if Level 3 cache exists or not on current CPUs
+ return value 1: exist.
+ return value 0: not exist.
+*/
static int
-dpdk_ring_alloc (struct rte_mempool *mp)
+check_l3cache ()
{
- u32 rg_flags = 0, count;
- i32 ret;
- char rg_name[RTE_RING_NAMESIZE];
- struct rte_ring *r;
-
- ret = snprintf (rg_name, sizeof (rg_name), RTE_MEMPOOL_MZ_FORMAT, mp->name);
- if (ret < 0 || ret >= (i32) sizeof (rg_name))
- return -ENAMETOOLONG;
-
- /* ring flags */
- if (mp->flags & MEMPOOL_F_SP_PUT)
- rg_flags |= RING_F_SP_ENQ;
- if (mp->flags & MEMPOOL_F_SC_GET)
- rg_flags |= RING_F_SC_DEQ;
-
- count = rte_align32pow2 (mp->size + 1);
- /*
- * Allocate the ring that will be used to store objects.
- * Ring functions will return appropriate errors if we are
- * running as a secondary process etc., so no checks made
- * in this function for that condition.
- */
- /* XXX can we get memory from the right socket? */
- r = clib_mem_alloc_aligned (rte_ring_get_memsize (count),
- CLIB_CACHE_LINE_BYTES);
-
- /* XXX rte_ring_lookup will not work */
-
- ret = rte_ring_init (r, rg_name, count, rg_flags);
- if (ret)
- return ret;
-
- mp->pool_data = r;
- return 0;
-}
+ struct dirent *dp;
+ clib_error_t *err;
+ const char *sys_cache_dir = "/sys/devices/system/cpu/cpu0/cache";
+ DIR *dir_cache = opendir (sys_cache_dir);
-static int
-dpdk_port_crc_strip_enabled (dpdk_device_t * xd)
-{
-#if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
- return ! !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP);
-#else
- return !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC);
-#endif
+ if (dir_cache == NULL)
+ return -1;
+
+ while ((dp = readdir (dir_cache)) != NULL)
+ {
+ if (dp->d_type == DT_DIR)
+ {
+ u8 *p = NULL;
+ int level_cache = -1;
+
+ p = format (p, "%s/%s/%s", sys_cache_dir, dp->d_name, "level");
+ if ((err = clib_sysfs_read ((char *) p, "%d", &level_cache)))
+ clib_error_free (err);
+
+ if (level_cache == 3)
+ {
+ closedir (dir_cache);
+ return 1;
+ }
+ }
+ }
+
+ if (dir_cache != NULL)
+ closedir (dir_cache);
+
+ return 0;
}
static clib_error_t *
{
u32 nports;
u32 mtu, max_rx_frame;
- u32 nb_desc = 0;
int i;
clib_error_t *error;
vlib_main_t *vm = vlib_get_main ();
struct rte_eth_dev_info dev_info;
struct rte_pci_device *pci_dev;
struct rte_eth_link l;
+ dpdk_portid_t next_port_id;
dpdk_device_config_t *devconf = 0;
vlib_pci_addr_t pci_addr;
uword *p = 0;
devconf = &dm->conf->default_devconf;
/* Handle interface naming for devices with multiple ports sharing same PCI ID */
- if (pci_dev)
+ if (pci_dev &&
+ ((next_port_id = rte_eth_find_next (i + 1)) != RTE_MAX_ETHPORTS))
{
struct rte_eth_dev_info di = { 0 };
struct rte_pci_device *next_pci_dev;
- rte_eth_dev_info_get (i + 1, &di);
+ rte_eth_dev_info_get (next_port_id, &di);
next_pci_dev = di.device ? RTE_DEV_TO_PCI (di.device) : 0;
- if (pci_dev && next_pci_dev &&
+ if (next_pci_dev &&
pci_addr.as_u32 != last_pci_addr.as_u32 &&
memcmp (&pci_dev->addr, &next_pci_dev->addr,
sizeof (struct rte_pci_addr)) == 0)
case VNET_DPDK_PMD_IGB:
case VNET_DPDK_PMD_IXGBE:
case VNET_DPDK_PMD_I40E:
+ case VNET_DPDK_PMD_ICE:
xd->port_type = port_type_from_speed_capa (&dev_info);
xd->supported_flow_actions = VNET_FLOW_ACTION_MARK |
VNET_FLOW_ACTION_REDIRECT_TO_NODE |
+ VNET_FLOW_ACTION_REDIRECT_TO_QUEUE |
VNET_FLOW_ACTION_BUFFER_ADVANCE |
VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP;
case VNET_DPDK_PMD_IXGBEVF:
case VNET_DPDK_PMD_I40EVF:
xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
-#if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
-#endif
break;
case VNET_DPDK_PMD_THUNDERX:
xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
-#if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
-#endif
if (dm->conf->no_tx_checksum_offload == 0)
{
/* Cisco VIC */
case VNET_DPDK_PMD_ENIC:
- if (l.link_speed == 40000)
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
- else
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
+ xd->port_type = port_type_from_link_speed (l.link_speed);
break;
/* Intel Red Rock Canyon */
case VNET_DPDK_PMD_FM10K:
xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH;
-#if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0)
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
-#endif
break;
/* virtio */
if (devconf->num_rx_desc)
xd->nb_rx_desc = devconf->num_rx_desc;
+ else {
+
+ /* If num_rx_desc is not specified by VPP user, the current CPU is working
+ with 2M page and has no L3 cache, default num_rx_desc is changed to 512
+ from original 1024 to help reduce TLB misses.
+ */
+ if ((clib_mem_get_default_hugepage_size () == 2 << 20)
+ && check_l3cache() == 0)
+ xd->nb_rx_desc = 512;
+ }
if (devconf->num_tx_desc)
xd->nb_tx_desc = devconf->num_tx_desc;
- }
+ else {
+
+ /* If num_tx_desc is not specified by VPP user, the current CPU is working
+ with 2M page and has no L3 cache, default num_tx_desc is changed to 512
+ from original 1024 to help reduce TLB misses.
+ */
+ if ((clib_mem_get_default_hugepage_size () == 2 << 20)
+ && check_l3cache() == 0)
+ xd->nb_tx_desc = 512;
+ }
+ }
if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
{
dq->queue_id = 0;
}
- /* count the number of descriptors used for this device */
- nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used;
-
error = ethernet_register_interface
(dm->vnet_main, dpdk_device_class.index, xd->device_index,
/* ethernet address */ addr,
}
/*
- * For cisco VIC vNIC, set default to VLAN strip enabled, unless
- * specified otherwise in the startup config.
- * For other NICs default to VLAN strip disabled, unless specified
+ * A note on Cisco VIC (PMD_ENIC) and VLAN:
+ *
+ * With Cisco VIC vNIC, every ingress packet is tagged. On a
+ * trunk vNIC (C series "standalone" server), packets on no VLAN
+ * are tagged with vlan 0. On an access vNIC (standalone or B
+ * series "blade" server), packets on the default/native VLAN
+ * are tagged with that vNIC's VLAN. VPP expects these packets
+ * to be untagged, and previously enabled VLAN strip on VIC by
+ * default. But it also broke vlan sub-interfaces.
+ *
+ * The VIC adapter has "untag default vlan" ingress VLAN rewrite
+ * mode, which removes tags from these packets. VPP now includes
+ * a local patch for the enic driver to use this untag mode, so
+ * enabling vlan stripping is no longer needed. In future, the
+ * driver + dpdk will have an API to set the mode after
+ * rte_eal_init. Then, this note and local patch will be
+ * removed.
+ */
+
+ /*
+ * VLAN stripping: default to VLAN strip disabled, unless specified
* otherwise in the startup config.
*/
- if (xd->pmd == VNET_DPDK_PMD_ENIC)
- {
- if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF)
- vlan_strip = 1; /* remove vlan tag from VIC port by default */
- else
- dpdk_log_warn ("VLAN strip disabled for interface\n");
- }
- else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
+ if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
vlan_strip = 1;
if (vlan_strip)
}
/* *INDENT-ON* */
- if (nb_desc > dm->conf->num_mbufs)
- dpdk_log_err ("%d mbufs allocated but total rx/tx ring size is %d\n",
- dm->conf->num_mbufs, nb_desc);
-
return 0;
}
(d->device_id == 0x0443 || d->device_id == 0x37c9 || d->device_id == 0x19e3))
;
/* Cisco VIC */
- else if (d->vendor_id == 0x1137 && d->device_id == 0x0043)
+ else if (d->vendor_id == 0x1137 &&
+ (d->device_id == 0x0043 || d->device_id == 0x0071))
;
/* Chelsio T4/T5 */
else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000)
devconf->pci_addr.as_u32 = pci_addr.as_u32;
devconf->hqos_enabled = 0;
+#if 0
dpdk_device_config_hqos_default (&devconf->hqos);
+#endif
if (!input)
return 0;
unformat_input_t sub_input;
uword default_hugepage_sz, x;
u8 *s, *tmp = 0;
- u32 log_level;
int ret, i;
int num_whitelisted = 0;
u8 no_pci = 0;
format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
- log_level = RTE_LOG_NOTICE;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
else if (unformat (input, "decimal-interface-names"))
conf->interface_name_format_decimal = 1;
- else if (unformat (input, "log-level %U", unformat_dpdk_log_level, &x))
- log_level = x;
-
else if (unformat (input, "no-multi-seg"))
conf->no_multi_seg = 1;
}
else if (unformat (input, "num-mem-channels %d", &conf->nchannels))
conf->nchannels_set_manually = 0;
- else if (unformat (input, "num-mbufs %d", &conf->num_mbufs))
+ else if (unformat (input, "num-crypto-mbufs %d",
+ &conf->num_crypto_mbufs))
;
else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
;
/* Set up DPDK eal and packet mbuf pool early. */
- rte_log_set_global_level (log_level);
int log_fds[2] = { 0 };
if (pipe (log_fds) == 0)
{
if (ret < 0)
return clib_error_return (0, "rte_eal_init returned %d", ret);
- /* set custom ring memory allocator */
- {
- struct rte_mempool_ops *ops = NULL;
-
- ops = get_ops_by_name ("ring_sp_sc");
- ops->alloc = dpdk_ring_alloc;
-
- ops = get_ops_by_name ("ring_mp_sc");
- ops->alloc = dpdk_ring_alloc;
-
- ops = get_ops_by_name ("ring_sp_mc");
- ops->alloc = dpdk_ring_alloc;
-
- ops = get_ops_by_name ("ring_mp_mc");
- ops->alloc = dpdk_ring_alloc;
- }
-
/* main thread 1st */
- error = dpdk_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ());
- if (error)
+ if ((error = dpdk_buffer_pools_create (vm)))
return error;
- for (i = 0; i < RTE_MAX_LCORE; i++)
- {
- error = dpdk_buffer_pool_create (vm, conf->num_mbufs,
- rte_lcore_to_socket_id (i));
- if (error)
- return error;
- }
-
done:
return error;
}
dm->conf = &dpdk_config_main;
dm->conf->nchannels = 4;
- dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF;
vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
vec_add1 (dm->conf->eal_init_args, (u8 *) "--in-memory");