mlx5 from dpdk16_11 29/5229/1
authorHanoh Haim <[email protected]>
Sun, 8 Jan 2017 09:20:53 +0000 (11:20 +0200)
committerHanoh Haim <[email protected]>
Sun, 8 Jan 2017 09:20:53 +0000 (11:20 +0200)
Signed-off-by: Hanoh Haim <[email protected]>
17 files changed:
src/dpdk/drivers/net/mlx5/mlx5.c
src/dpdk/drivers/net/mlx5/mlx5.h
src/dpdk/drivers/net/mlx5/mlx5_defs.h
src/dpdk/drivers/net/mlx5/mlx5_ethdev.c
src/dpdk/drivers/net/mlx5/mlx5_fdir.c
src/dpdk/drivers/net/mlx5/mlx5_mac.c
src/dpdk/drivers/net/mlx5/mlx5_mr.c
src/dpdk/drivers/net/mlx5/mlx5_prm.h
src/dpdk/drivers/net/mlx5/mlx5_rss.c
src/dpdk/drivers/net/mlx5/mlx5_rxmode.c
src/dpdk/drivers/net/mlx5/mlx5_rxq.c
src/dpdk/drivers/net/mlx5/mlx5_rxtx.c
src/dpdk/drivers/net/mlx5/mlx5_rxtx.h
src/dpdk/drivers/net/mlx5/mlx5_stats.c
src/dpdk/drivers/net/mlx5/mlx5_trigger.c
src/dpdk/drivers/net/mlx5/mlx5_txq.c
src/dpdk/drivers/net/mlx5/mlx5_vlan.c

index 303b917..7fc6ccf 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
@@ -60,7 +60,7 @@
 #include <rte_common.h>
 #include <rte_kvargs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
@@ -181,9 +181,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
        }
        if (priv->reta_idx != NULL)
                rte_free(priv->reta_idx);
-
-    mlx5_stats_free(dev);
-
        priv_unlock(priv);
        memset(priv, 0, sizeof(*priv));
 }
@@ -257,6 +254,7 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr)
        return ret;
 }
 
+
 /**
  * Verify and store value for device argument.
  *
@@ -297,6 +295,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
        return 0;
 }
 
+
+
 /**
  * Parse device parameters.
  *
@@ -341,6 +341,8 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs)
        return 0;
 }
 
+
+
 static struct eth_driver mlx5_driver;
 
 /**
@@ -527,7 +529,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 
                /* TREX PATCH */
                /* set for maximum performance default */
-               priv->txq_inline  =128;
+               priv->txq_inline  =64;
                priv->txqs_inline =4;
 
 
@@ -781,3 +783,516 @@ static struct rte_driver rte_mlx5_driver = {
 
 PMD_REGISTER_DRIVER(rte_mlx5_driver, mlx5);
 DRIVER_REGISTER_PCI_TABLE(mlx5, mlx5_pci_id_map);
+
+
+
+
+
+
+#if 0
+/**
+ * Verify and store value for device argument.
+ *
+ * @param[in] key
+ *   Key argument to verify.
+ * @param[in] val
+ *   Value associated with key.
+ * @param opaque
+ *   User data.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+mlx5_args_check(const char *key, const char *val, void *opaque)
+{
+       struct priv *priv = opaque;
+       unsigned long tmp;
+
+       errno = 0;
+       tmp = strtoul(val, NULL, 0);
+       if (errno) {
+               WARN("%s: \"%s\" is not a valid integer", key, val);
+               return errno;
+       }
+       if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
+               priv->cqe_comp = !!tmp;
+       } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {
+               priv->txq_inline = tmp;
+       } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
+               priv->txqs_inline = tmp;
+       } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
+               priv->mps = !!tmp;
+       } else {
+               WARN("%s: unknown parameter", key);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/**
+ * Parse device parameters.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param devargs
+ *   Device arguments structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+mlx5_args(struct priv *priv, struct rte_devargs *devargs)
+{
+       const char **params = (const char *[]){
+               MLX5_RXQ_CQE_COMP_EN,
+               MLX5_TXQ_INLINE,
+               MLX5_TXQS_MIN_INLINE,
+               MLX5_TXQ_MPW_EN,
+               NULL,
+       };
+       struct rte_kvargs *kvlist;
+       int ret = 0;
+       int i;
+
+       if (devargs == NULL)
+               return 0;
+       /* Following UGLY cast is done to pass checkpatch. */
+       kvlist = rte_kvargs_parse(devargs->args, params);
+       if (kvlist == NULL)
+               return 0;
+       /* Process parameters. */
+       for (i = 0; (params[i] != NULL); ++i) {
+               if (rte_kvargs_count(kvlist, params[i])) {
+                       ret = rte_kvargs_process(kvlist, params[i],
+                                                mlx5_args_check, priv);
+                       if (ret != 0)
+                               return ret;
+               }
+       }
+       rte_kvargs_free(kvlist);
+       return 0;
+}
+
+static struct eth_driver mlx5_driver;
+
+/**
+ * DPDK callback to register a PCI device.
+ *
+ * This function creates an Ethernet device for each port of a given
+ * PCI device.
+ *
+ * @param[in] pci_drv
+ *   PCI driver structure (mlx5_driver).
+ * @param[in] pci_dev
+ *   PCI device information.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+       struct ibv_device **list;
+       struct ibv_device *ibv_dev;
+       int err = 0;
+       struct ibv_context *attr_ctx = NULL;
+       struct ibv_device_attr device_attr;
+       unsigned int sriov;
+       unsigned int mps;
+       int idx;
+       int i;
+
+       (void)pci_drv;
+       assert(pci_drv == &mlx5_driver.pci_drv);
+       /* Get mlx5_dev[] index. */
+       idx = mlx5_dev_idx(&pci_dev->addr);
+       if (idx == -1) {
+               ERROR("this driver cannot support any more adapters");
+               return -ENOMEM;
+       }
+       DEBUG("using driver device index %d", idx);
+
+       /* Save PCI address. */
+       mlx5_dev[idx].pci_addr = pci_dev->addr;
+       list = ibv_get_device_list(&i);
+       if (list == NULL) {
+               assert(errno);
+               if (errno == ENOSYS) {
+                       WARN("cannot list devices, is ib_uverbs loaded?");
+                       return 0;
+               }
+               return -errno;
+       }
+       assert(i >= 0);
+       /*
+        * For each listed device, check related sysfs entry against
+        * the provided PCI ID.
+        */
+       while (i != 0) {
+               struct rte_pci_addr pci_addr;
+
+               --i;
+               DEBUG("checking device \"%s\"", list[i]->name);
+               if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr))
+                       continue;
+               if ((pci_dev->addr.domain != pci_addr.domain) ||
+                   (pci_dev->addr.bus != pci_addr.bus) ||
+                   (pci_dev->addr.devid != pci_addr.devid) ||
+                   (pci_dev->addr.function != pci_addr.function))
+                       continue;
+               sriov = ((pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) ||
+                     (pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF));
+               /* Multi-packet send is only supported by ConnectX-4 Lx PF. */
+               mps = (pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LX);
+               INFO("PCI information matches, using device \"%s\""
+                    " (SR-IOV: %s, MPS: %s)",
+                    list[i]->name,
+                    sriov ? "true" : "false",
+                    mps ? "true" : "false");
+               attr_ctx = ibv_open_device(list[i]);
+               err = errno;
+               break;
+       }
+       if (attr_ctx == NULL) {
+               ibv_free_device_list(list);
+               switch (err) {
+               case 0:
+                       WARN("cannot access device, is mlx5_ib loaded?");
+                       return 0;
+               case EINVAL:
+                       WARN("cannot use device, are drivers up to date?");
+                       return 0;
+               }
+               assert(err > 0);
+               return -err;
+       }
+       ibv_dev = list[i];
+
+       DEBUG("device opened");
+       if (ibv_query_device(attr_ctx, &device_attr))
+               goto error;
+       INFO("%u port(s) detected", device_attr.phys_port_cnt);
+
+       for (i = 0; i < device_attr.phys_port_cnt; i++) {
+               uint32_t port = i + 1; /* ports are indexed from one */
+               uint32_t test = (1 << i);
+               struct ibv_context *ctx = NULL;
+               struct ibv_port_attr port_attr;
+               struct ibv_pd *pd = NULL;
+               struct priv *priv = NULL;
+               struct rte_eth_dev *eth_dev;
+               struct ibv_exp_device_attr exp_device_attr;
+               struct ether_addr mac;
+               uint16_t num_vfs = 0;
+
+               exp_device_attr.comp_mask =
+                       IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
+                       IBV_EXP_DEVICE_ATTR_RX_HASH |
+                       IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS |
+                       IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN |
+                       0;
+
+               DEBUG("using port %u (%08" PRIx32 ")", port, test);
+
+               ctx = ibv_open_device(ibv_dev);
+               if (ctx == NULL)
+                       goto port_error;
+
+               /* Check port status. */
+               err = ibv_query_port(ctx, port, &port_attr);
+               if (err) {
+                       ERROR("port query failed: %s", strerror(err));
+                       goto port_error;
+               }
+
+               if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
+                       ERROR("port %d is not configured in Ethernet mode",
+                             port);
+                       goto port_error;
+               }
+
+               if (port_attr.state != IBV_PORT_ACTIVE)
+                       DEBUG("port %d is not active: \"%s\" (%d)",
+                             port, ibv_port_state_str(port_attr.state),
+                             port_attr.state);
+
+               /* Allocate protection domain. */
+               pd = ibv_alloc_pd(ctx);
+               if (pd == NULL) {
+                       ERROR("PD allocation failure");
+                       err = ENOMEM;
+                       goto port_error;
+               }
+
+               mlx5_dev[idx].ports |= test;
+
+               /* from rte_ethdev.c */
+               priv = rte_zmalloc("ethdev private structure",
+                                  sizeof(*priv),
+                                  RTE_CACHE_LINE_SIZE);
+               if (priv == NULL) {
+                       ERROR("priv allocation failure");
+                       err = ENOMEM;
+                       goto port_error;
+               }
+
+               priv->ctx = ctx;
+               priv->device_attr = device_attr;
+               priv->port = port;
+               priv->pd = pd;
+               priv->mtu = ETHER_MTU;
+               priv->mps = mps; /* Enable MPW by default if supported. */
+               priv->cqe_comp = 1; /* Enable compression by default. */
+               err = mlx5_args(priv, pci_dev->device.devargs);
+               if (err) {
+                       ERROR("failed to process device arguments: %s",
+                             strerror(err));
+                       goto port_error;
+               }
+               if (ibv_exp_query_device(ctx, &exp_device_attr)) {
+                       ERROR("ibv_exp_query_device() failed");
+                       goto port_error;
+               }
+
+               priv->hw_csum =
+                       ((exp_device_attr.exp_device_cap_flags &
+                         IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) &&
+                        (exp_device_attr.exp_device_cap_flags &
+                         IBV_EXP_DEVICE_RX_CSUM_IP_PKT));
+               DEBUG("checksum offloading is %ssupported",
+                     (priv->hw_csum ? "" : "not "));
+
+               priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
+                                        IBV_EXP_DEVICE_VXLAN_SUPPORT);
+               DEBUG("L2 tunnel checksum offloads are %ssupported",
+                     (priv->hw_csum_l2tun ? "" : "not "));
+
+               priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
+               /* Remove this check once DPDK supports larger/variable
+                * indirection tables. */
+               if (priv->ind_table_max_size > (unsigned int)RSS_INDIRECTION_TABLE_SIZE)
+                       priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE;
+               DEBUG("maximum RX indirection table size is %u",
+                     priv->ind_table_max_size);
+               priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap &
+                                        IBV_EXP_RECEIVE_WQ_CVLAN_STRIP);
+               DEBUG("VLAN stripping is %ssupported",
+                     (priv->hw_vlan_strip ? "" : "not "));
+
+               priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags &
+                                       IBV_EXP_DEVICE_SCATTER_FCS);
+               DEBUG("FCS stripping configuration is %ssupported",
+                     (priv->hw_fcs_strip ? "" : "not "));
+
+               priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align;
+               DEBUG("hardware RX end alignment padding is %ssupported",
+                     (priv->hw_padding ? "" : "not "));
+
+               priv_get_num_vfs(priv, &num_vfs);
+               priv->sriov = (num_vfs || sriov);
+               if (priv->mps && !mps) {
+                       ERROR("multi-packet send not supported on this device"
+                             " (" MLX5_TXQ_MPW_EN ")");
+                       err = ENOTSUP;
+                       goto port_error;
+               }
+               /* Allocate and register default RSS hash keys. */
+               priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
+                                           sizeof((*priv->rss_conf)[0]), 0);
+               if (priv->rss_conf == NULL) {
+                       err = ENOMEM;
+                       goto port_error;
+               }
+               err = rss_hash_rss_conf_new_key(priv,
+                                               rss_hash_default_key,
+                                               rss_hash_default_key_len,
+                                               ETH_RSS_PROTO_MASK);
+               if (err)
+                       goto port_error;
+               /* Configure the first MAC address by default. */
+               if (priv_get_mac(priv, &mac.addr_bytes)) {
+                       ERROR("cannot get MAC address, is mlx5_en loaded?"
+                             " (errno: %s)", strerror(errno));
+                       goto port_error;
+               }
+               INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+                    priv->port,
+                    mac.addr_bytes[0], mac.addr_bytes[1],
+                    mac.addr_bytes[2], mac.addr_bytes[3],
+                    mac.addr_bytes[4], mac.addr_bytes[5]);
+               /* Register MAC address. */
+               claim_zero(priv_mac_addr_add(priv, 0,
+                                            (const uint8_t (*)[ETHER_ADDR_LEN])
+                                            mac.addr_bytes));
+               /* Initialize FD filters list. */
+               err = fdir_init_filters_list(priv);
+               if (err)
+                       goto port_error;
+#ifndef NDEBUG
+               {
+                       char ifname[IF_NAMESIZE];
+
+                       if (priv_get_ifname(priv, &ifname) == 0)
+                               DEBUG("port %u ifname is \"%s\"",
+                                     priv->port, ifname);
+                       else
+                               DEBUG("port %u ifname is unknown", priv->port);
+               }
+#endif
+               /* Get actual MTU if possible. */
+               priv_get_mtu(priv, &priv->mtu);
+               DEBUG("port %u MTU is %u", priv->port, priv->mtu);
+
+               /* from rte_ethdev.c */
+               {
+                       char name[RTE_ETH_NAME_MAX_LEN];
+
+                       snprintf(name, sizeof(name), "%s port %u",
+                                ibv_get_device_name(ibv_dev), port);
+                       eth_dev = rte_eth_dev_allocate(name);
+               }
+               if (eth_dev == NULL) {
+                       ERROR("can not allocate rte ethdev");
+                       err = ENOMEM;
+                       goto port_error;
+               }
+
+               /* Secondary processes have to use local storage for their
+                * private data as well as a copy of eth_dev->data, but this
+                * pointer must not be modified before burst functions are
+                * actually called. */
+               if (mlx5_is_secondary()) {
+                       struct mlx5_secondary_data *sd =
+                               &mlx5_secondary_data[eth_dev->data->port_id];
+                       sd->primary_priv = eth_dev->data->dev_private;
+                       if (sd->primary_priv == NULL) {
+                               ERROR("no private data for port %u",
+                                               eth_dev->data->port_id);
+                               err = EINVAL;
+                               goto port_error;
+                       }
+                       sd->shared_dev_data = eth_dev->data;
+                       rte_spinlock_init(&sd->lock);
+                       memcpy(sd->data.name, sd->shared_dev_data->name,
+                                  sizeof(sd->data.name));
+                       sd->data.dev_private = priv;
+                       sd->data.rx_mbuf_alloc_failed = 0;
+                       sd->data.mtu = ETHER_MTU;
+                       sd->data.port_id = sd->shared_dev_data->port_id;
+                       sd->data.mac_addrs = priv->mac;
+                       eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup;
+                       eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup;
+               } else {
+                       eth_dev->data->dev_private = priv;
+                       eth_dev->data->rx_mbuf_alloc_failed = 0;
+                       eth_dev->data->mtu = ETHER_MTU;
+                       eth_dev->data->mac_addrs = priv->mac;
+               }
+
+               eth_dev->pci_dev = pci_dev;
+               rte_eth_copy_pci_info(eth_dev, pci_dev);
+               eth_dev->driver = &mlx5_driver;
+               priv->dev = eth_dev;
+               eth_dev->dev_ops = &mlx5_dev_ops;
+
+               TAILQ_INIT(&eth_dev->link_intr_cbs);
+
+               /* Bring Ethernet device up. */
+               DEBUG("forcing Ethernet interface up");
+               priv_set_flags(priv, ~IFF_UP, IFF_UP);
+               mlx5_link_update_unlocked(priv->dev, 1);
+               continue;
+
+port_error:
+               if (priv) {
+                       rte_free(priv->rss_conf);
+                       rte_free(priv);
+               }
+               if (pd)
+                       claim_zero(ibv_dealloc_pd(pd));
+               if (ctx)
+                       claim_zero(ibv_close_device(ctx));
+               break;
+       }
+
+       /*
+        * XXX if something went wrong in the loop above, there is a resource
+        * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as
+        * long as the dpdk does not provide a way to deallocate a ethdev and a
+        * way to enumerate the registered ethdevs to free the previous ones.
+        */
+
+       /* no port found, complain */
+       if (!mlx5_dev[idx].ports) {
+               err = ENODEV;
+               goto error;
+       }
+
+error:
+       if (attr_ctx)
+               claim_zero(ibv_close_device(attr_ctx));
+       if (list)
+               ibv_free_device_list(list);
+       assert(err >= 0);
+       return -err;
+}
+
+static const struct rte_pci_id mlx5_pci_id_map[] = {
+       {
+               RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+                              PCI_DEVICE_ID_MELLANOX_CONNECTX4)
+       },
+       {
+               RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+                              PCI_DEVICE_ID_MELLANOX_CONNECTX4VF)
+       },
+       {
+               RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+                              PCI_DEVICE_ID_MELLANOX_CONNECTX4LX)
+       },
+       {
+               RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+                              PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)
+       },
+       {
+               .vendor_id = 0
+       }
+};
+
+static struct eth_driver mlx5_driver = {
+       .pci_drv = {
+               .driver = {
+                       .name = MLX5_DRIVER_NAME
+               },
+               .id_table = mlx5_pci_id_map,
+               .probe = mlx5_pci_probe,
+               .drv_flags = RTE_PCI_DRV_INTR_LSC,
+       },
+       .dev_private_size = sizeof(struct priv)
+};
+
+/**
+ * Driver initialization routine.
+ */
+RTE_INIT(rte_mlx5_pmd_init);
+static void
+rte_mlx5_pmd_init(void)
+{
+       /*
+        * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
+        * huge pages. Calling ibv_fork_init() during init allows
+        * applications to use fork() safely for purposes other than
+        * using this PMD, which is not supported in forked processes.
+        */
+       setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
+       ibv_fork_init();
+       rte_eal_pci_register(&mlx5_driver.pci_drv);
+}
+
+RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__);
+RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map);
+#endif
index 68bad90..79b7a60 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ether.h>
 #include <rte_ethdev.h>
@@ -60,7 +60,7 @@
 #include <rte_interrupts.h>
 #include <rte_errno.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5_utils.h"
@@ -84,34 +84,6 @@ enum {
        PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016,
 };
 
-struct mlx5_stats_priv {
-
-    struct rte_eth_stats m_shadow;
-    uint32_t      n_stats; /* number of counters */
-
-    void    *  et_stats  ;/* point to ethtool counter struct ethtool_stats*/
-
-    /* index into ethtool */
-    uint16_t inx_rx_vport_unicast_bytes;
-    uint16_t inx_rx_vport_multicast_bytes;
-    uint16_t inx_rx_vport_broadcast_bytes;
-    uint16_t inx_rx_vport_unicast_packets;
-    uint16_t inx_rx_vport_multicast_packets;
-    uint16_t inx_rx_vport_broadcast_packets;
-    uint16_t inx_tx_vport_unicast_bytes;
-    uint16_t inx_tx_vport_multicast_bytes;
-    uint16_t inx_tx_vport_broadcast_bytes;
-    uint16_t inx_tx_vport_unicast_packets;
-    uint16_t inx_tx_vport_multicast_packets;
-    uint16_t inx_tx_vport_broadcast_packets;
-    uint16_t inx_rx_wqe_err;
-    uint16_t inx_rx_crc_errors_phy;
-    uint16_t inx_rx_in_range_len_errors_phy;
-    uint16_t inx_rx_symbol_err_phy;
-    uint16_t inx_tx_errors_phy;
-};
-
-
 struct priv {
        struct rte_eth_dev *dev; /* Ethernet device. */
        struct ibv_context *ctx; /* Verbs context. */
@@ -162,8 +134,9 @@ struct priv {
        unsigned int (*reta_idx)[]; /* RETA index table. */
        unsigned int reta_idx_n; /* RETA index size. */
        struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
+       struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
+       uint32_t link_speed_capa; /* Link speed capabilities. */
        rte_spinlock_t lock; /* Lock for control functions. */
-    struct mlx5_stats_priv m_stats;
 };
 
 /* Local storage for secondary process data. */
@@ -215,6 +188,7 @@ int priv_set_flags(struct priv *, unsigned int, unsigned int);
 int mlx5_dev_configure(struct rte_eth_dev *);
 void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *);
 const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev);
+int mlx5_link_update_unlocked(struct rte_eth_dev *, int);
 int mlx5_link_update(struct rte_eth_dev *, int);
 int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t);
 int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *);
@@ -272,8 +246,6 @@ void mlx5_allmulticast_disable(struct rte_eth_dev *);
 
 void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
 void mlx5_stats_reset(struct rte_eth_dev *);
-void mlx5_stats_free(struct rte_eth_dev *dev);
-
 
 /* mlx5_vlan.c */
 
@@ -288,6 +260,7 @@ void mlx5_dev_stop(struct rte_eth_dev *);
 
 /* mlx5_fdir.c */
 
+void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *);
 int fdir_init_filters_list(struct priv *);
 void priv_fdir_delete_filters_list(struct priv *);
 void priv_fdir_disable(struct priv *);
index cc2a6f3..b32816e 100644 (file)
@@ -37,7 +37,7 @@
 #include "mlx5_autoconf.h"
 
 /* Reported driver name. */
-#define MLX5_DRIVER_NAME "librte_pmd_mlx5"
+#define MLX5_DRIVER_NAME "net_mlx5"
 
 /* Maximum number of simultaneous MAC addresses. */
 #define MLX5_MAX_MAC_ADDRESSES 128
index 130e15d..85b8136 100644 (file)
@@ -50,7 +50,7 @@
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_atomic.h>
 #include <rte_ethdev.h>
@@ -60,7 +60,7 @@
 #include <rte_alarm.h>
 #include <rte_malloc.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
@@ -583,7 +583,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
                 (DEV_RX_OFFLOAD_IPV4_CKSUM |
                  DEV_RX_OFFLOAD_UDP_CKSUM |
                  DEV_RX_OFFLOAD_TCP_CKSUM) :
-                0);
+                0) |
+               (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0);
        if (!priv->mps)
                info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
        if (priv->hw_csum)
@@ -599,15 +600,10 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
         * size if it is not fixed.
         * The API should be updated to solve this problem. */
        info->reta_size = priv->ind_table_max_size;
-       info->speed_capa =
-                       ETH_LINK_SPEED_1G |
-                       ETH_LINK_SPEED_10G |
-                       ETH_LINK_SPEED_20G |
-                       ETH_LINK_SPEED_25G |
-                       ETH_LINK_SPEED_40G |
-                       ETH_LINK_SPEED_50G |
-                       ETH_LINK_SPEED_56G |
-                       ETH_LINK_SPEED_100G;
+       info->hash_key_size = ((*priv->rss_conf) ?
+                              (*priv->rss_conf)[0]->rss_key_len :
+                              0);
+       info->speed_capa = priv->link_speed_capa;
        priv_unlock(priv);
 }
 
@@ -630,7 +626,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 }
 
 /**
- * DPDK callback to retrieve physical link information (unlocked version).
+ * Retrieve physical link information (unlocked version using legacy ioctl).
  *
  * @param dev
  *   Pointer to Ethernet device structure.
@@ -638,11 +634,11 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
  *   Wait for request completion (ignored).
  */
 static int
-mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
+mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
 {
        struct priv *priv = mlx5_get_priv(dev);
        struct ethtool_cmd edata = {
-               .cmd = ETHTOOL_GSET
+               .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */
        };
        struct ifreq ifr;
        struct rte_eth_link dev_link;
@@ -667,6 +663,19 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
                dev_link.link_speed = 0;
        else
                dev_link.link_speed = link_speed;
+       priv->link_speed_capa = 0;
+       if (edata.supported & SUPPORTED_Autoneg)
+               priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
+       if (edata.supported & (SUPPORTED_1000baseT_Full |
+                              SUPPORTED_1000baseKX_Full))
+               priv->link_speed_capa |= ETH_LINK_SPEED_1G;
+       if (edata.supported & SUPPORTED_10000baseKR_Full)
+               priv->link_speed_capa |= ETH_LINK_SPEED_10G;
+       if (edata.supported & (SUPPORTED_40000baseKR4_Full |
+                              SUPPORTED_40000baseCR4_Full |
+                              SUPPORTED_40000baseSR4_Full |
+                              SUPPORTED_40000baseLR4_Full))
+               priv->link_speed_capa |= ETH_LINK_SPEED_40G;
        dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
                                ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
        dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
@@ -680,6 +689,123 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
        return -1;
 }
 
+/**
+ * Retrieve physical link information (unlocked version using new ioctl from
+ * Linux 4.5).
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param wait_to_complete
+ *   Wait for request completion (ignored).
+ */
+static int
+mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
+{
+#ifdef ETHTOOL_GLINKSETTINGS
+       struct priv *priv = mlx5_get_priv(dev);
+       struct ethtool_link_settings edata = {
+               .cmd = ETHTOOL_GLINKSETTINGS,
+       };
+       struct ifreq ifr;
+       struct rte_eth_link dev_link;
+       uint64_t sc;
+
+       (void)wait_to_complete;
+       if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
+               WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
+               return -1;
+       }
+       memset(&dev_link, 0, sizeof(dev_link));
+       dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
+                               (ifr.ifr_flags & IFF_RUNNING));
+       ifr.ifr_data = (void *)&edata;
+       if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
+               DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s",
+                     strerror(errno));
+               return -1;
+       }
+       dev_link.link_speed = edata.speed;
+       sc = edata.link_mode_masks[0] |
+               ((uint64_t)edata.link_mode_masks[1] << 32);
+       priv->link_speed_capa = 0;
+       /* Link speeds available in kernel v4.5. */
+       if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT)
+               priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
+       if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT |
+                 ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))
+               priv->link_speed_capa |= ETH_LINK_SPEED_1G;
+       if (sc & (ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT |
+                 ETHTOOL_LINK_MODE_10000baseKR_Full_BIT |
+                 ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))
+               priv->link_speed_capa |= ETH_LINK_SPEED_10G;
+       if (sc & (ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT |
+                 ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))
+               priv->link_speed_capa |= ETH_LINK_SPEED_20G;
+       if (sc & (ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT |
+                 ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT |
+                 ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT |
+                 ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))
+               priv->link_speed_capa |= ETH_LINK_SPEED_40G;
+       if (sc & (ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT |
+                 ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT |
+                 ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT |
+                 ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))
+               priv->link_speed_capa |= ETH_LINK_SPEED_56G;
+       /* Link speeds available in kernel v4.6. */
+#ifdef HAVE_ETHTOOL_LINK_MODE_25G
+       if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT |
+                 ETHTOOL_LINK_MODE_25000baseKR_Full_BIT |
+                 ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))
+               priv->link_speed_capa |= ETH_LINK_SPEED_25G;
+#endif
+#ifdef HAVE_ETHTOOL_LINK_MODE_50G
+       if (sc & (ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT |
+                 ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))
+               priv->link_speed_capa |= ETH_LINK_SPEED_50G;
+#endif
+#ifdef HAVE_ETHTOOL_LINK_MODE_100G
+       if (sc & (ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT |
+                 ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT |
+                 ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT |
+                 ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))
+               priv->link_speed_capa |= ETH_LINK_SPEED_100G;
+#endif
+       dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
+                               ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
+       dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+                                 ETH_LINK_SPEED_FIXED);
+       if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
+               /* Link status changed. */
+               dev->data->dev_link = dev_link;
+               return 0;
+       }
+#else
+       (void)dev;
+       (void)wait_to_complete;
+#endif
+       /* Link status is still the same. */
+       return -1;
+}
+
+/**
+ * DPDK callback to retrieve physical link information (unlocked version).
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param wait_to_complete
+ *   Wait for request completion (ignored).
+ */
+int
+mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
+{
+       int ret;
+
+       ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
+       if (ret < 0)
+               ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
+       return ret;
+}
+
 /**
  * DPDK callback to retrieve physical link information.
  *
@@ -807,7 +933,7 @@ recover:
                if (rehash)
                        ret = rxq_rehash(dev, rxq_ctrl);
                else
-                       ret = rxq_ctrl_setup(dev, rxq_ctrl, rxq->elts_n,
+                       ret = rxq_ctrl_setup(dev, rxq_ctrl, 1 << rxq->elts_n,
                                             rxq_ctrl->socket, NULL, rxq->mp);
                if (!ret)
                        continue;
@@ -1067,8 +1193,8 @@ mlx5_dev_link_status_handler(void *arg)
        assert(priv->pending_alarm == 1);
        ret = priv_dev_link_status_handler(priv, dev);
        priv_unlock(priv);
-       if (ret)
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+       //if (ret)
+       //      _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
 /**
@@ -1090,8 +1216,8 @@ mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg)
        priv_lock(priv);
        ret = priv_dev_link_status_handler(priv, dev);
        priv_unlock(priv);
-       if (ret)
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC);
+       //if (ret)
+       //      _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
 }
 
 /**
@@ -1308,12 +1434,14 @@ mlx5_secondary_data_setup(struct priv *priv)
                        continue;
                primary_txq_ctrl = container_of(primary_txq,
                                                struct txq_ctrl, txq);
-               txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl), 0,
+               txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl) +
+                                            (1 << primary_txq->elts_n) *
+                                            sizeof(struct rte_mbuf *), 0,
                                             primary_txq_ctrl->socket);
                if (txq_ctrl != NULL) {
                        if (txq_ctrl_setup(priv->dev,
-                                          primary_txq_ctrl,
-                                          primary_txq->elts_n,
+                                          txq_ctrl,
+                                          1 << primary_txq->elts_n,
                                           primary_txq_ctrl->socket,
                                           NULL) == 0) {
                                txq_ctrl->txq.stats.idx =
@@ -1397,10 +1525,6 @@ priv_select_tx_function(struct priv *priv)
        } else if ((priv->sriov == 0) && priv->mps) {
                priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
                DEBUG("selected MPW TX function");
-       } else if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
-               priv->dev->tx_pkt_burst = mlx5_tx_burst_inline;
-               DEBUG("selected inline TX function (%u >= %u queues)",
-                     priv->txqs_n, priv->txqs_inline);
        }
 }
 
index 4ba3bb9..1acf682 100644 (file)
 #include <string.h>
 #include <errno.h>
 
-#define TREX_PATCH
-
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
-#include <infiniband/verbs_exp.h>
+#include <infiniband/verbs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ether.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_common.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
@@ -69,10 +67,6 @@ struct fdir_flow_desc {
        uint16_t src_port;
        uint32_t src_ip[4];
        uint32_t dst_ip[4];
-    uint8_t  tos;
-    uint8_t  ip_id;
-    uint8_t  proto;
-
        uint8_t mac[6];
        uint16_t vlan_tag;
        enum hash_rxq_type type;
@@ -81,6 +75,7 @@ struct fdir_flow_desc {
 struct mlx5_fdir_filter {
        LIST_ENTRY(mlx5_fdir_filter) next;
        uint16_t queue; /* Queue assigned to if FDIR match. */
+       enum rte_eth_fdir_behavior behavior;
        struct fdir_flow_desc desc;
        struct ibv_exp_flow *flow;
 };
@@ -108,7 +103,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
        desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci;
 
        /* Set MAC address. */
-#ifndef TREX_PATCH
        if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
                rte_memcpy(desc->mac,
                           fdir_filter->input.flow.mac_vlan_flow.mac_addr.
@@ -117,13 +111,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
                desc->type = HASH_RXQ_ETH;
                return;
        }
-#else
-    if (fdir_filter->input.flow.ip4_flow.ip_id == 2) {
-               desc->type = HASH_RXQ_ETH;
-        desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id;
-               return;
-    }
-#endif
 
        /* Set mode */
        switch (fdir_filter->input.flow_type) {
@@ -155,13 +142,9 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
        case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
                desc->src_port = fdir_filter->input.flow.udp4_flow.src_port;
                desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port;
-
        case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
                desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip;
                desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip;
-        desc->tos       = fdir_filter->input.flow.ip4_flow.ttl; /* TTL is mapped to TOS TREX_PATCH */
-        desc->ip_id     = fdir_filter->input.flow.ip4_flow.ip_id;
-        desc->proto     = fdir_filter->input.flow.ip4_flow.proto;
                break;
        case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
        case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
@@ -175,17 +158,12 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
                rte_memcpy(desc->dst_ip,
                           fdir_filter->input.flow.ipv6_flow.dst_ip,
                           sizeof(desc->dst_ip));
-        desc->tos       = (uint8_t)fdir_filter->input.flow.ipv6_flow.hop_limits;  /* TTL is mapped to TOS - TREX_PATCH */
-        desc->ip_id     = (uint8_t)fdir_filter->input.flow.ipv6_flow.flow_label;
-        desc->proto     = fdir_filter->input.flow.ipv6_flow.proto;
-
                break;
        default:
                break;
        }
 }
 
-
 /**
  * Check if two flow descriptors overlap according to configured mask.
  *
@@ -220,12 +198,6 @@ priv_fdir_overlap(const struct priv *priv,
            ((desc1->dst_port & mask->dst_port_mask) !=
             (desc2->dst_port & mask->dst_port_mask)))
                return 0;
-
-    if  ( (desc1->tos    != desc2->tos)  ||
-          (desc1->ip_id  != desc2->ip_id) ||
-          (desc1->proto  != desc2->proto) ) 
-        return 0;
-
        switch (desc1->type) {
        case HASH_RXQ_IPV4:
        case HASH_RXQ_UDPV4:
@@ -233,9 +205,8 @@ priv_fdir_overlap(const struct priv *priv,
                if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) !=
                     (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) ||
                    ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) !=
-                    (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip))) 
+                    (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip)))
                        return 0;
-
                break;
        case HASH_RXQ_IPV6:
        case HASH_RXQ_UDPV6:
@@ -281,8 +252,8 @@ priv_fdir_flow_add(struct priv *priv,
        struct ibv_exp_flow_attr *attr = &data->attr;
        uintptr_t spec_offset = (uintptr_t)&data->spec;
        struct ibv_exp_flow_spec_eth *spec_eth;
-       struct ibv_exp_flow_spec_ipv4_ext *spec_ipv4;
-       struct ibv_exp_flow_spec_ipv6_ext *spec_ipv6;
+       struct ibv_exp_flow_spec_ipv4 *spec_ipv4;
+       struct ibv_exp_flow_spec_ipv6 *spec_ipv6;
        struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp;
        struct mlx5_fdir_filter *iter_fdir_filter;
        unsigned int i;
@@ -294,10 +265,8 @@ priv_fdir_flow_add(struct priv *priv,
                    (iter_fdir_filter->flow != NULL) &&
                    (priv_fdir_overlap(priv,
                                       &mlx5_fdir_filter->desc,
-                                      &iter_fdir_filter->desc))){
-            ERROR("overlap rules, please check your rules");
-            return EEXIST;
-        }
+                                      &iter_fdir_filter->desc)))
+                       return EEXIST;
 
        /*
         * No padding must be inserted by the compiler between attr and spec.
@@ -320,7 +289,6 @@ priv_fdir_flow_add(struct priv *priv,
        /* Update priority */
        attr->priority = 2;
 
-#ifndef TREX_PATCH
        if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
                /* MAC Address */
                for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) {
@@ -330,14 +298,6 @@ priv_fdir_flow_add(struct priv *priv,
                }
                goto create_flow;
        }
-#else
-    // empty mask means "match everything". This rule will match all packets, no matter what is the ether type
-    if (desc->ip_id == 2) {
-        spec_eth->val.ether_type = 0x0806;
-        spec_eth->mask.ether_type = 0x0000;
-        goto create_flow;
-    }
-#endif
 
        switch (desc->type) {
        case HASH_RXQ_IPV4:
@@ -346,10 +306,10 @@ priv_fdir_flow_add(struct priv *priv,
                spec_offset += spec_eth->size;
 
                /* Set IP spec */
-               spec_ipv4 = (struct ibv_exp_flow_spec_ipv4_ext *)spec_offset;
+               spec_ipv4 = (struct ibv_exp_flow_spec_ipv4 *)spec_offset;
 
                /* The second specification must be IP. */
-               assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4_EXT);
+               assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4);
                assert(spec_ipv4->size == sizeof(*spec_ipv4));
 
                spec_ipv4->val.src_ip =
@@ -359,21 +319,6 @@ priv_fdir_flow_add(struct priv *priv,
                spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip;
                spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip;
 
-        /* PROTO */
-        spec_ipv4->val.proto  = desc->proto & mask->ipv4_mask.proto;
-        spec_ipv4->mask.proto = mask->ipv4_mask.proto;
-
-#ifdef TREX_PATCH
-        /* TOS */
-        if (desc->ip_id == 1) {
-            spec_ipv4->mask.tos = 0x1;
-            spec_ipv4->val.tos = 0x1;
-        } else {
-            spec_ipv4->mask.tos = 0x0;
-            spec_ipv4->val.tos = 0x0;
-        }
-        //        spec_ipv4->val.tos = desc->tos & spec_ipv4->mask.tos;// & mask->ipv4_mask.tos;
-#endif
                /* Update priority */
                attr->priority = 1;
 
@@ -388,10 +333,10 @@ priv_fdir_flow_add(struct priv *priv,
                spec_offset += spec_eth->size;
 
                /* Set IP spec */
-               spec_ipv6 = (struct ibv_exp_flow_spec_ipv6_ext *)spec_offset;
+               spec_ipv6 = (struct ibv_exp_flow_spec_ipv6 *)spec_offset;
 
                /* The second specification must be IP. */
-               assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6_EXT);
+               assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6);
                assert(spec_ipv6->size == sizeof(*spec_ipv6));
 
                for (i = 0; i != RTE_DIM(desc->src_ip); ++i) {
@@ -407,20 +352,6 @@ priv_fdir_flow_add(struct priv *priv,
                           mask->ipv6_mask.dst_ip,
                           sizeof(spec_ipv6->mask.dst_ip));
 
-        spec_ipv6->val.next_hdr  = desc->proto & mask->ipv6_mask.proto;
-        spec_ipv6->mask.next_hdr = mask->ipv6_mask.proto;
-
-#ifdef TREX_PATCH
-        /* TOS */
-        if (desc->ip_id == 1) {
-            spec_ipv6->mask.traffic_class = 0x1;
-            spec_ipv6->val.traffic_class = 0x1;
-        } else {
-            spec_ipv6->mask.traffic_class = 0;
-            spec_ipv6->val.traffic_class = 0;
-        }
-#endif
-
                /* Update priority */
                attr->priority = 1;
 
@@ -469,6 +400,145 @@ create_flow:
        return 0;
 }
 
+/**
+ * Destroy a flow director queue.
+ *
+ * @param fdir_queue
+ *   Flow director queue to be destroyed.
+ */
+void
+priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
+{
+       struct mlx5_fdir_filter *fdir_filter;
+
+       /* Disable filter flows still applying to this queue. */
+       LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
+               unsigned int idx = fdir_filter->queue;
+               struct rxq_ctrl *rxq_ctrl =
+                       container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
+
+               assert(idx < priv->rxqs_n);
+               if (fdir_queue == rxq_ctrl->fdir_queue &&
+                   fdir_filter->flow != NULL) {
+                       claim_zero(ibv_exp_destroy_flow(fdir_filter->flow));
+                       fdir_filter->flow = NULL;
+               }
+       }
+       assert(fdir_queue->qp);
+       claim_zero(ibv_destroy_qp(fdir_queue->qp));
+       assert(fdir_queue->ind_table);
+       claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table));
+       if (fdir_queue->wq)
+               claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
+       if (fdir_queue->cq)
+               claim_zero(ibv_destroy_cq(fdir_queue->cq));
+#ifndef NDEBUG
+       memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
+#endif
+       rte_free(fdir_queue);
+}
+
+/**
+ * Create a flow director queue.
+ *
+ * @param priv
+ *   Private structure.
+ * @param wq
+ *   Work queue to route matched packets to, NULL if one needs to
+ *   be created.
+ *
+ * @return
+ *   Related flow director queue on success, NULL otherwise.
+ */
+static struct fdir_queue *
+priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq,
+                      unsigned int socket)
+{
+       struct fdir_queue *fdir_queue;
+
+       fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
+                                      0, socket);
+       if (!fdir_queue) {
+               ERROR("cannot allocate flow director queue");
+               return NULL;
+       }
+       assert(priv->pd);
+       assert(priv->ctx);
+       if (!wq) {
+               fdir_queue->cq = ibv_exp_create_cq(
+                       priv->ctx, 1, NULL, NULL, 0,
+                       &(struct ibv_exp_cq_init_attr){
+                               .comp_mask = 0,
+                       });
+               if (!fdir_queue->cq) {
+                       ERROR("cannot create flow director CQ");
+                       goto error;
+               }
+               fdir_queue->wq = ibv_exp_create_wq(
+                       priv->ctx,
+                       &(struct ibv_exp_wq_init_attr){
+                               .wq_type = IBV_EXP_WQT_RQ,
+                               .max_recv_wr = 1,
+                               .max_recv_sge = 1,
+                               .pd = priv->pd,
+                               .cq = fdir_queue->cq,
+                       });
+               if (!fdir_queue->wq) {
+                       ERROR("cannot create flow director WQ");
+                       goto error;
+               }
+               wq = fdir_queue->wq;
+       }
+       fdir_queue->ind_table = ibv_exp_create_rwq_ind_table(
+               priv->ctx,
+               &(struct ibv_exp_rwq_ind_table_init_attr){
+                       .pd = priv->pd,
+                       .log_ind_tbl_size = 0,
+                       .ind_tbl = &wq,
+                       .comp_mask = 0,
+               });
+       if (!fdir_queue->ind_table) {
+               ERROR("cannot create flow director indirection table");
+               goto error;
+       }
+       fdir_queue->qp = ibv_exp_create_qp(
+               priv->ctx,
+               &(struct ibv_exp_qp_init_attr){
+                       .qp_type = IBV_QPT_RAW_PACKET,
+                       .comp_mask =
+                               IBV_EXP_QP_INIT_ATTR_PD |
+                               IBV_EXP_QP_INIT_ATTR_PORT |
+                               IBV_EXP_QP_INIT_ATTR_RX_HASH,
+                       .pd = priv->pd,
+                       .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
+                               .rx_hash_function =
+                                       IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
+                               .rx_hash_key_len = rss_hash_default_key_len,
+                               .rx_hash_key = rss_hash_default_key,
+                               .rx_hash_fields_mask = 0,
+                               .rwq_ind_tbl = fdir_queue->ind_table,
+                       },
+                       .port_num = priv->port,
+               });
+       if (!fdir_queue->qp) {
+               ERROR("cannot create flow director hash RX QP");
+               goto error;
+       }
+       return fdir_queue;
+error:
+       assert(fdir_queue);
+       assert(!fdir_queue->qp);
+       if (fdir_queue->ind_table)
+               claim_zero(ibv_exp_destroy_rwq_ind_table
+                          (fdir_queue->ind_table));
+       if (fdir_queue->wq)
+               claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
+       if (fdir_queue->cq)
+               claim_zero(ibv_destroy_cq(fdir_queue->cq));
+       rte_free(fdir_queue);
+       return NULL;
+}
+
 /**
  * Get flow director queue for a specific RX queue, create it in case
  * it does not exist.
@@ -486,74 +556,42 @@ priv_get_fdir_queue(struct priv *priv, uint16_t idx)
 {
        struct rxq_ctrl *rxq_ctrl =
                container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-       struct fdir_queue *fdir_queue = &rxq_ctrl->fdir_queue;
-       struct ibv_exp_rwq_ind_table *ind_table = NULL;
-       struct ibv_qp *qp = NULL;
-       struct ibv_exp_rwq_ind_table_init_attr ind_init_attr;
-       struct ibv_exp_rx_hash_conf hash_conf;
-       struct ibv_exp_qp_init_attr qp_init_attr;
-       int err = 0;
-
-       /* Return immediately if it has already been created. */
-       if (fdir_queue->qp != NULL)
-               return fdir_queue;
-
-       ind_init_attr = (struct ibv_exp_rwq_ind_table_init_attr){
-               .pd = priv->pd,
-               .log_ind_tbl_size = 0,
-               .ind_tbl = &rxq_ctrl->wq,
-               .comp_mask = 0,
-       };
+       struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue;
 
-       errno = 0;
-       ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
-                                                &ind_init_attr);
-       if (ind_table == NULL) {
-               /* Not clear whether errno is set. */
-               err = (errno ? errno : EINVAL);
-               ERROR("RX indirection table creation failed with error %d: %s",
-                     err, strerror(err));
-               goto error;
-       }
-
-       /* Create fdir_queue qp. */
-       hash_conf = (struct ibv_exp_rx_hash_conf){
-               .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
-               .rx_hash_key_len = rss_hash_default_key_len,
-               .rx_hash_key = rss_hash_default_key,
-               .rx_hash_fields_mask = 0,
-               .rwq_ind_tbl = ind_table,
-       };
-       qp_init_attr = (struct ibv_exp_qp_init_attr){
-               .max_inl_recv = 0, /* Currently not supported. */
-               .qp_type = IBV_QPT_RAW_PACKET,
-               .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
-                             IBV_EXP_QP_INIT_ATTR_RX_HASH),
-               .pd = priv->pd,
-               .rx_hash_conf = &hash_conf,
-               .port_num = priv->port,
-       };
-
-       qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr);
-       if (qp == NULL) {
-               err = (errno ? errno : EINVAL);
-               ERROR("hash RX QP creation failure: %s", strerror(err));
-               goto error;
+       assert(rxq_ctrl->wq);
+       if (fdir_queue == NULL) {
+               fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq,
+                                                   rxq_ctrl->socket);
+               rxq_ctrl->fdir_queue = fdir_queue;
        }
-
-       fdir_queue->ind_table = ind_table;
-       fdir_queue->qp = qp;
-
        return fdir_queue;
+}
 
-error:
-       if (qp != NULL)
-               claim_zero(ibv_destroy_qp(qp));
-
-       if (ind_table != NULL)
-               claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
+/**
+ * Get or flow director drop queue. Create it if it does not exist.
+ *
+ * @param priv
+ *   Private structure.
+ *
+ * @return
+ *   Flow director drop queue on success, NULL otherwise.
+ */
+static struct fdir_queue *
+priv_get_fdir_drop_queue(struct priv *priv)
+{
+       struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
 
-       return NULL;
+       if (fdir_queue == NULL) {
+               unsigned int socket = SOCKET_ID_ANY;
+
+               /* Select a known NUMA socket if possible. */
+               if (priv->rxqs_n && (*priv->rxqs)[0])
+                       socket = container_of((*priv->rxqs)[0],
+                                             struct rxq_ctrl, rxq)->socket;
+               fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
+               priv->fdir_drop_queue = fdir_queue;
+       }
+       return fdir_queue;
 }
 
 /**
@@ -578,7 +616,11 @@ priv_fdir_filter_enable(struct priv *priv,
                return 0;
 
        /* Get fdir_queue for specific queue. */
-       fdir_queue = priv_get_fdir_queue(priv, mlx5_fdir_filter->queue);
+       if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
+               fdir_queue = priv_get_fdir_drop_queue(priv);
+       else
+               fdir_queue = priv_get_fdir_queue(priv,
+                                                mlx5_fdir_filter->queue);
 
        if (fdir_queue == NULL) {
                ERROR("failed to create flow director rxq for queue %d",
@@ -671,7 +713,6 @@ priv_fdir_disable(struct priv *priv)
 {
        unsigned int i;
        struct mlx5_fdir_filter *mlx5_fdir_filter;
-       struct fdir_queue *fdir_queue;
 
        /* Run on every flow director filter and destroy flow handle. */
        LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
@@ -688,23 +729,19 @@ priv_fdir_disable(struct priv *priv)
                }
        }
 
-       /* Run on every RX queue to destroy related flow director QP and
-        * indirection table. */
+       /* Destroy flow director context in each RX queue. */
        for (i = 0; (i != priv->rxqs_n); i++) {
                struct rxq_ctrl *rxq_ctrl =
                        container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq);
 
-               fdir_queue = &rxq_ctrl->fdir_queue;
-               if (fdir_queue->qp != NULL) {
-                       claim_zero(ibv_destroy_qp(fdir_queue->qp));
-                       fdir_queue->qp = NULL;
-               }
-
-               if (fdir_queue->ind_table != NULL) {
-                       claim_zero(ibv_exp_destroy_rwq_ind_table
-                                  (fdir_queue->ind_table));
-                       fdir_queue->ind_table = NULL;
-               }
+               if (!rxq_ctrl->fdir_queue)
+                       continue;
+               priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
+               rxq_ctrl->fdir_queue = NULL;
+       }
+       if (priv->fdir_drop_queue) {
+               priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
+               priv->fdir_drop_queue = NULL;
        }
 }
 
@@ -792,10 +829,8 @@ priv_fdir_filter_add(struct priv *priv,
        /* Duplicate filters are currently unsupported. */
        mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
        if (mlx5_fdir_filter != NULL) {
-#ifndef TREX_PATCH
                ERROR("filter already exists");
-#endif
-               return EEXIST;
+               return EINVAL;
        }
 
        /* Create new flow director filter. */
@@ -808,8 +843,9 @@ priv_fdir_filter_add(struct priv *priv,
                return err;
        }
 
-       /* Set queue. */
+       /* Set action parameters. */
        mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
+       mlx5_fdir_filter->behavior = fdir_filter->action.behavior;
 
        /* Convert to mlx5 filter descriptor. */
        fdir_filter_to_flow_desc(fdir_filter,
@@ -919,11 +955,9 @@ priv_fdir_filter_delete(struct priv *priv,
                return 0;
        }
 
-#ifndef TREX_PATCH
        ERROR("%p: flow director delete failed, cannot find filter",
              (void *)priv);
-#endif
-       return ENOENT;
+       return EINVAL;
 }
 
 /**
@@ -1029,7 +1063,7 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
                     enum rte_filter_op filter_op,
                     void *arg)
 {
-       int ret = -EINVAL;
+       int ret = EINVAL;
        struct priv *priv = dev->data->dev_private;
 
        switch (filter_type) {
@@ -1044,5 +1078,5 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
                break;
        }
 
-       return ret;
+       return -ret;
 }
index f6b27bb..4fcfd3b 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_common.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
index 67dfefa..0a36384 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_mempool.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
index 5db219b..8426adb 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/mlx5_hw.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include "mlx5_autoconf.h"
+
 /* Get CQE owner bit. */
 #define MLX5_CQE_OWNER(op_own) ((op_own) & MLX5_CQE_OWNER_MASK)
 
 /* Maximum number of packets a multi-packet WQE can handle. */
 #define MLX5_MPW_DSEG_MAX 5
 
-/* Room for inline data in regular work queue element. */
-#define MLX5_WQE64_INL_DATA 12
+/* WQE DWORD size */
+#define MLX5_WQE_DWORD_SIZE 16
+
+/* WQE size */
+#define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE)
+
+/* Compute the number of DS. */
+#define MLX5_WQE_DS(n) \
+       (((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE)
 
 /* Room for inline data in multi-packet WQE. */
 #define MLX5_MWQE64_INL_DATA 28
 
+//#ifndef HAVE_VERBS_MLX5_OPCODE_TSO
+//#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */
+//#endif
+
+/* IPv4 packet. */
+#define MLX5_CQE_RX_IPV4_PACKET (1u << 2)
+
+/* IPv6 packet. */
+#define MLX5_CQE_RX_IPV6_PACKET (1u << 3)
+
+/* Outer IPv4 packet. */
+#define MLX5_CQE_RX_OUTER_IPV4_PACKET (1u << 7)
+
+/* Outer IPv6 packet. */
+#define MLX5_CQE_RX_OUTER_IPV6_PACKET (1u << 8)
+
+/* Tunnel packet bit in the CQE. */
+#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 4)
+
+/* Outer IP checksum OK. */
+#define MLX5_CQE_RX_OUTER_IP_CSUM_OK (1u << 5)
+
+/* Outer UDP header and checksum OK. */
+#define MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK (1u << 6)
+
 /* Subset of struct mlx5_wqe_eth_seg. */
 struct mlx5_wqe_eth_seg_small {
        uint32_t rsvd0;
@@ -79,59 +113,26 @@ struct mlx5_wqe_eth_seg_small {
        uint16_t mss;
        uint32_t rsvd2;
        uint16_t inline_hdr_sz;
+       uint8_t inline_hdr[2];
 };
 
-/* Regular WQE. */
-struct mlx5_wqe_regular {
-       union {
-               struct mlx5_wqe_ctrl_seg ctrl;
-               uint32_t data[4];
-       } ctrl;
-       struct mlx5_wqe_eth_seg eseg;
-       struct mlx5_wqe_data_seg dseg;
-} __rte_aligned(64);
-
-/* Inline WQE. */
-struct mlx5_wqe_inl {
-       union {
-               struct mlx5_wqe_ctrl_seg ctrl;
-               uint32_t data[4];
-       } ctrl;
-       struct mlx5_wqe_eth_seg eseg;
+struct mlx5_wqe_inl_small {
        uint32_t byte_cnt;
-       uint8_t data[MLX5_WQE64_INL_DATA];
-} __rte_aligned(64);
+       uint8_t raw;
+};
 
-/* Multi-packet WQE. */
-struct mlx5_wqe_mpw {
-       union {
-               struct mlx5_wqe_ctrl_seg ctrl;
-               uint32_t data[4];
-       } ctrl;
+/* Small common part of the WQE. */
+struct mlx5_wqe {
+       uint32_t ctrl[4];
        struct mlx5_wqe_eth_seg_small eseg;
-       struct mlx5_wqe_data_seg dseg[2];
-} __rte_aligned(64);
+};
 
-/* Multi-packet WQE with inline. */
-struct mlx5_wqe_mpw_inl {
-       union {
-               struct mlx5_wqe_ctrl_seg ctrl;
-               uint32_t data[4];
-       } ctrl;
-       struct mlx5_wqe_eth_seg_small eseg;
-       uint32_t byte_cnt;
-       uint8_t data[MLX5_MWQE64_INL_DATA];
+/* WQE. */
+struct mlx5_wqe64 {
+       struct mlx5_wqe hdr;
+       uint8_t raw[32];
 } __rte_aligned(64);
 
-/* Union of all WQE types. */
-union mlx5_wqe {
-       struct mlx5_wqe_regular wqe;
-       struct mlx5_wqe_inl inl;
-       struct mlx5_wqe_mpw mpw;
-       struct mlx5_wqe_mpw_inl mpw_inl;
-       uint8_t data[64];
-};
-
 /* MPW session status. */
 enum mlx5_mpw_state {
        MLX5_MPW_STATE_OPENED,
@@ -145,7 +146,7 @@ struct mlx5_mpw {
        unsigned int pkts_n;
        unsigned int len;
        unsigned int total_len;
-       volatile union mlx5_wqe *wqe;
+       volatile struct mlx5_wqe *wqe;
        union {
                volatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX];
                volatile uint8_t *raw;
@@ -157,7 +158,21 @@ struct mlx5_cqe {
 #if (RTE_CACHE_LINE_SIZE == 128)
        uint8_t padding[64];
 #endif
-       struct mlx5_cqe64 cqe64;
+       uint8_t pkt_info;
+       uint8_t rsvd0[11];
+       uint32_t rx_hash_res;
+       uint8_t rx_hash_type;
+       uint8_t rsvd1[11];
+       uint8_t hds_ip_ext;
+       uint8_t l4_hdr_type_etc;
+       uint16_t vlan_info;
+       uint8_t rsvd2[12];
+       uint32_t byte_cnt;
+       uint64_t timestamp;
+       uint8_t rsvd3[4];
+       uint16_t wqe_counter;
+       uint8_t rsvd4;
+       uint8_t op_own;
 };
 
 #endif /* RTE_PMD_MLX5_PRM_H_ */
index 639e935..0bed74e 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
index 8b58555..173e6e8 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ethdev.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
index 6be01d3..28e93d3 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #include <infiniband/arch.h>
 #include <infiniband/mlx5_hw.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_common.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
@@ -102,7 +102,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                ETH_RSS_FRAG_IPV4),
                .flow_priority = 1,
                .flow_spec.ipv4 = {
-                       .type = IBV_EXP_FLOW_SPEC_IPV4_EXT,
+                       .type = IBV_EXP_FLOW_SPEC_IPV4,
                        .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
                },
                .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
@@ -140,7 +140,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                ETH_RSS_FRAG_IPV6),
                .flow_priority = 1,
                .flow_spec.ipv6 = {
-                       .type = IBV_EXP_FLOW_SPEC_IPV6_EXT,
+                       .type = IBV_EXP_FLOW_SPEC_IPV6,
                        .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
                },
                .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
@@ -723,7 +723,7 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
        if (rxq_ctrl->rxq.elts == NULL)
                return;
 
-       for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) {
+       for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) {
                if ((*rxq_ctrl->rxq.elts)[i] != NULL)
                        rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
                (*rxq_ctrl->rxq.elts)[i] = NULL;
@@ -745,6 +745,8 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 
        DEBUG("cleaning up %p", (void *)rxq_ctrl);
        rxq_free_elts(rxq_ctrl);
+       if (rxq_ctrl->fdir_queue != NULL)
+               priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue);
        if (rxq_ctrl->if_wq != NULL) {
                assert(rxq_ctrl->priv != NULL);
                assert(rxq_ctrl->priv->ctx != NULL);
@@ -805,7 +807,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 int
 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 {
-       unsigned int elts_n = rxq_ctrl->rxq.elts_n;
+       unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
        unsigned int i;
        struct ibv_exp_wq_attr mod;
        int err;
@@ -868,7 +870,7 @@ rxq_setup(struct rxq_ctrl *tmpl)
        struct ibv_cq *ibcq = tmpl->cq;
        struct mlx5_cq *cq = to_mxxx(cq, cq);
        struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
-       struct rte_mbuf *(*elts)[tmpl->rxq.elts_n] =
+       struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] =
                rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);
 
        if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) {
@@ -879,7 +881,7 @@ rxq_setup(struct rxq_ctrl *tmpl)
        if (elts == NULL)
                return ENOMEM;
        tmpl->rxq.rq_db = rwq->rq.db;
-       tmpl->rxq.cqe_n = ibcq->cqe + 1;
+       tmpl->rxq.cqe_n = log2above(ibcq->cqe);
        tmpl->rxq.cq_ci = 0;
        tmpl->rxq.rq_ci = 0;
        tmpl->rxq.cq_db = cq->dbrec;
@@ -922,8 +924,9 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
                .priv = priv,
                .socket = socket,
                .rxq = {
-                       .elts_n = desc,
+                       .elts_n = log2above(desc),
                        .mp = mp,
+                       .rss_hash = priv->rxqs_n > 1,
                },
        };
        struct ibv_exp_wq_attr mod;
@@ -943,6 +946,11 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
        (void)conf; /* Thresholds configuration (ignored). */
        /* Enable scattered packets support for this queue if necessary. */
        assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+       /* If smaller than MRU, multi-segment support must be enabled. */
+       if (mb_len < (priv->mtu > dev->data->dev_conf.rxmode.max_rx_pkt_len ?
+                    dev->data->dev_conf.rxmode.max_rx_pkt_len :
+                    priv->mtu))
+               dev->data->dev_conf.rxmode.jumbo_frame = 1;
        if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
            (dev->data->dev_conf.rxmode.max_rx_pkt_len >
             (mb_len - RTE_PKTMBUF_HEADROOM))) {
@@ -1146,7 +1154,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
        }
        /* Reuse buffers from original queue if possible. */
        if (rxq_ctrl->rxq.elts_n) {
-               assert(rxq_ctrl->rxq.elts_n == desc);
+               assert(1 << rxq_ctrl->rxq.elts_n == desc);
                assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts);
                ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts);
        } else
@@ -1259,7 +1267,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                      (void *)dev, (void *)rxq_ctrl);
                (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
                /* Update receive callback. */
-               dev->rx_pkt_burst = mlx5_rx_burst;
+               priv_select_rx_function(priv);
        }
        priv_unlock(priv);
        return -ret;
index c0bcfd0..b56c0a1 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #include <infiniband/mlx5_hw.h>
 #include <infiniband/arch.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
@@ -59,7 +59,7 @@
 #include <rte_branch_prediction.h>
 #include <rte_ether.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
@@ -69,6 +69,8 @@
 #include "mlx5_defs.h"
 #include "mlx5_prm.h"
 
+//#define MLX5_OPCODE_TSO 0xe
+
 #ifndef NDEBUG
 
 /**
  *   0 the first time.
  */
 static inline int
-check_cqe64_seen(volatile struct mlx5_cqe64 *cqe)
+check_cqe_seen(volatile struct mlx5_cqe *cqe)
 {
        static const uint8_t magic[] = "seen";
-       volatile uint8_t (*buf)[sizeof(cqe->rsvd40)] = &cqe->rsvd40;
+       volatile uint8_t (*buf)[sizeof(cqe->rsvd3)] = &cqe->rsvd3;
        int ret = 1;
        unsigned int i;
 
@@ -99,9 +101,9 @@ check_cqe64_seen(volatile struct mlx5_cqe64 *cqe)
 #endif /* NDEBUG */
 
 static inline int
-check_cqe64(volatile struct mlx5_cqe64 *cqe,
-           unsigned int cqes_n, const uint16_t ci)
-           __attribute__((always_inline));
+check_cqe(volatile struct mlx5_cqe *cqe,
+         unsigned int cqes_n, const uint16_t ci)
+         __attribute__((always_inline));
 
 /**
  * Check whether CQE is valid.
@@ -117,8 +119,8 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
  *   0 on success, 1 on failure.
  */
 static inline int
-check_cqe64(volatile struct mlx5_cqe64 *cqe,
-               unsigned int cqes_n, const uint16_t ci)
+check_cqe(volatile struct mlx5_cqe *cqe,
+         unsigned int cqes_n, const uint16_t ci)
 {
        uint16_t idx = ci & cqes_n;
        uint8_t op_own = cqe->op_own;
@@ -136,14 +138,14 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
                if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
                    (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
                        return 0;
-               if (!check_cqe64_seen(cqe))
+               if (!check_cqe_seen(cqe))
                        ERROR("unexpected CQE error %u (0x%02x)"
                              " syndrome 0x%02x",
                              op_code, op_code, syndrome);
                return 1;
        } else if ((op_code != MLX5_CQE_RESP_SEND) &&
                   (op_code != MLX5_CQE_REQ)) {
-               if (!check_cqe64_seen(cqe))
+               if (!check_cqe_seen(cqe))
                        ERROR("unexpected CQE opcode %u (0x%02x)",
                              op_code, op_code);
                return 1;
@@ -152,6 +154,9 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
        return 0;
 }
 
+static inline void
+txq_complete(struct txq *txq) __attribute__((always_inline));
+
 /**
  * Manage TX completions.
  *
@@ -160,34 +165,34 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
  * @param txq
  *   Pointer to TX queue structure.
  */
-static void
+static inline void
 txq_complete(struct txq *txq)
 {
-       const unsigned int elts_n = txq->elts_n;
-       const unsigned int cqe_n = txq->cqe_n;
+       const unsigned int elts_n = 1 << txq->elts_n;
+       const unsigned int cqe_n = 1 << txq->cqe_n;
        const unsigned int cqe_cnt = cqe_n - 1;
        uint16_t elts_free = txq->elts_tail;
        uint16_t elts_tail;
        uint16_t cq_ci = txq->cq_ci;
-       volatile struct mlx5_cqe64 *cqe = NULL;
-       volatile union mlx5_wqe *wqe;
+       volatile struct mlx5_cqe *cqe = NULL;
+       volatile struct mlx5_wqe *wqe;
 
        do {
-               volatile struct mlx5_cqe64 *tmp;
+               volatile struct mlx5_cqe *tmp;
 
-               tmp = &(*txq->cqes)[cq_ci & cqe_cnt].cqe64;
-               if (check_cqe64(tmp, cqe_n, cq_ci))
+               tmp = &(*txq->cqes)[cq_ci & cqe_cnt];
+               if (check_cqe(tmp, cqe_n, cq_ci))
                        break;
                cqe = tmp;
 #ifndef NDEBUG
                if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
-                       if (!check_cqe64_seen(cqe))
+                       if (!check_cqe_seen(cqe))
                                ERROR("unexpected compressed CQE, TX stopped");
                        return;
                }
                if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
                    (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
-                       if (!check_cqe64_seen(cqe))
+                       if (!check_cqe_seen(cqe))
                                ERROR("unexpected error CQE, TX stopped");
                        return;
                }
@@ -196,9 +201,10 @@ txq_complete(struct txq *txq)
        } while (1);
        if (unlikely(cqe == NULL))
                return;
-       wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
-       elts_tail = wqe->wqe.ctrl.data[3];
-       assert(elts_tail < txq->wqe_n);
+       wqe = &(*txq->wqes)[htons(cqe->wqe_counter) &
+                           ((1 << txq->wqe_n) - 1)].hdr;
+       elts_tail = wqe->ctrl[3];
+       assert(elts_tail < (1 << txq->wqe_n));
        /* Free buffers. */
        while (elts_free != elts_tail) {
                struct rte_mbuf *elt = (*txq->elts)[elts_free];
@@ -283,235 +289,6 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
        return lkey;
 }
 
-/**
- * Write a regular WQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
- * @param length
- *   Packet length.
- * @param lkey
- *   Memory region lkey.
- */
-static inline void
-mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe,
-              uintptr_t addr, uint32_t length, uint32_t lkey)
-{
-       wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-       wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
-       wqe->wqe.ctrl.data[2] = 0;
-       wqe->wqe.ctrl.data[3] = 0;
-       wqe->inl.eseg.rsvd0 = 0;
-       wqe->inl.eseg.rsvd1 = 0;
-       wqe->inl.eseg.mss = 0;
-       wqe->inl.eseg.rsvd2 = 0;
-       wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE);
-       /* Copy the first 16 bytes into inline header. */
-       rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
-                  (uint8_t *)(uintptr_t)addr,
-                  MLX5_ETH_INLINE_HEADER_SIZE);
-       addr += MLX5_ETH_INLINE_HEADER_SIZE;
-       length -= MLX5_ETH_INLINE_HEADER_SIZE;
-       /* Store remaining data in data segment. */
-       wqe->wqe.dseg.byte_count = htonl(length);
-       wqe->wqe.dseg.lkey = lkey;
-       wqe->wqe.dseg.addr = htonll(addr);
-       /* Increment consumer index. */
-       ++txq->wqe_ci;
-}
-
-/**
- * Write a regular WQE with VLAN.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
- * @param length
- *   Packet length.
- * @param lkey
- *   Memory region lkey.
- * @param vlan_tci
- *   VLAN field to insert in packet.
- */
-static inline void
-mlx5_wqe_write_vlan(struct txq *txq, volatile union mlx5_wqe *wqe,
-                   uintptr_t addr, uint32_t length, uint32_t lkey,
-                   uint16_t vlan_tci)
-{
-       uint32_t vlan = htonl(0x81000000 | vlan_tci);
-
-       wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-       wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4);
-       wqe->wqe.ctrl.data[2] = 0;
-       wqe->wqe.ctrl.data[3] = 0;
-       wqe->inl.eseg.rsvd0 = 0;
-       wqe->inl.eseg.rsvd1 = 0;
-       wqe->inl.eseg.mss = 0;
-       wqe->inl.eseg.rsvd2 = 0;
-       wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE);
-       /*
-        * Copy 12 bytes of source & destination MAC address.
-        * Copy 4 bytes of VLAN.
-        * Copy 2 bytes of Ether type.
-        */
-       rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start,
-                  (uint8_t *)(uintptr_t)addr, 12);
-       rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 12),
-                  &vlan, sizeof(vlan));
-       rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 16),
-                  (uint8_t *)((uintptr_t)addr + 12), 2);
-       addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
-       length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
-       /* Store remaining data in data segment. */
-       wqe->wqe.dseg.byte_count = htonl(length);
-       wqe->wqe.dseg.lkey = lkey;
-       wqe->wqe.dseg.addr = htonll(addr);
-       /* Increment consumer index. */
-       ++txq->wqe_ci;
-}
-
-/**
- * Write a inline WQE.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
- * @param length
- *   Packet length.
- * @param lkey
- *   Memory region lkey.
- */
-static inline void
-mlx5_wqe_write_inline(struct txq *txq, volatile union mlx5_wqe *wqe,
-                     uintptr_t addr, uint32_t length)
-{
-       uint32_t size;
-       uint16_t wqe_cnt = txq->wqe_n - 1;
-       uint16_t wqe_ci = txq->wqe_ci + 1;
-
-       /* Copy the first 16 bytes into inline header. */
-       rte_memcpy((void *)(uintptr_t)wqe->inl.eseg.inline_hdr_start,
-                  (void *)(uintptr_t)addr,
-                  MLX5_ETH_INLINE_HEADER_SIZE);
-       addr += MLX5_ETH_INLINE_HEADER_SIZE;
-       length -= MLX5_ETH_INLINE_HEADER_SIZE;
-       size = 3 + ((4 + length + 15) / 16);
-       wqe->inl.byte_cnt = htonl(length | MLX5_INLINE_SEG);
-       rte_memcpy((void *)(uintptr_t)&wqe->inl.data[0],
-                  (void *)addr, MLX5_WQE64_INL_DATA);
-       addr += MLX5_WQE64_INL_DATA;
-       length -= MLX5_WQE64_INL_DATA;
-       while (length) {
-               volatile union mlx5_wqe *wqe_next =
-                       &(*txq->wqes)[wqe_ci & wqe_cnt];
-               uint32_t copy_bytes = (length > sizeof(*wqe)) ?
-                                     sizeof(*wqe) :
-                                     length;
-
-               rte_mov64((uint8_t *)(uintptr_t)&wqe_next->data[0],
-                         (uint8_t *)addr);
-               addr += copy_bytes;
-               length -= copy_bytes;
-               ++wqe_ci;
-       }
-       assert(size < 64);
-       wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-       wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size);
-       wqe->inl.ctrl.data[2] = 0;
-       wqe->inl.ctrl.data[3] = 0;
-       wqe->inl.eseg.rsvd0 = 0;
-       wqe->inl.eseg.rsvd1 = 0;
-       wqe->inl.eseg.mss = 0;
-       wqe->inl.eseg.rsvd2 = 0;
-       wqe->inl.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE);
-       /* Increment consumer index. */
-       txq->wqe_ci = wqe_ci;
-}
-
-/**
- * Write a inline WQE with VLAN.
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param wqe
- *   Pointer to the WQE to fill.
- * @param addr
- *   Buffer data address.
- * @param length
- *   Packet length.
- * @param lkey
- *   Memory region lkey.
- * @param vlan_tci
- *   VLAN field to insert in packet.
- */
-static inline void
-mlx5_wqe_write_inline_vlan(struct txq *txq, volatile union mlx5_wqe *wqe,
-                          uintptr_t addr, uint32_t length, uint16_t vlan_tci)
-{
-       uint32_t size;
-       uint32_t wqe_cnt = txq->wqe_n - 1;
-       uint16_t wqe_ci = txq->wqe_ci + 1;
-       uint32_t vlan = htonl(0x81000000 | vlan_tci);
-
-       /*
-        * Copy 12 bytes of source & destination MAC address.
-        * Copy 4 bytes of VLAN.
-        * Copy 2 bytes of Ether type.
-        */
-       rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start,
-                  (uint8_t *)addr, 12);
-       rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start + 12,
-                  &vlan, sizeof(vlan));
-       rte_memcpy((uint8_t *)((uintptr_t)wqe->inl.eseg.inline_hdr_start + 16),
-                  (uint8_t *)(addr + 12), 2);
-       addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
-       length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan);
-       size = (sizeof(wqe->inl.ctrl.ctrl) +
-               sizeof(wqe->inl.eseg) +
-               sizeof(wqe->inl.byte_cnt) +
-               length + 15) / 16;
-       wqe->inl.byte_cnt = htonl(length | MLX5_INLINE_SEG);
-       rte_memcpy((void *)(uintptr_t)&wqe->inl.data[0],
-                  (void *)addr, MLX5_WQE64_INL_DATA);
-       addr += MLX5_WQE64_INL_DATA;
-       length -= MLX5_WQE64_INL_DATA;
-       while (length) {
-               volatile union mlx5_wqe *wqe_next =
-                       &(*txq->wqes)[wqe_ci & wqe_cnt];
-               uint32_t copy_bytes = (length > sizeof(*wqe)) ?
-                                     sizeof(*wqe) :
-                                     length;
-
-               rte_mov64((uint8_t *)(uintptr_t)&wqe_next->data[0],
-                         (uint8_t *)addr);
-               addr += copy_bytes;
-               length -= copy_bytes;
-               ++wqe_ci;
-       }
-       assert(size < 64);
-       wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-       wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size);
-       wqe->inl.ctrl.data[2] = 0;
-       wqe->inl.ctrl.data[3] = 0;
-       wqe->inl.eseg.rsvd0 = 0;
-       wqe->inl.eseg.rsvd1 = 0;
-       wqe->inl.eseg.mss = 0;
-       wqe->inl.eseg.rsvd2 = 0;
-       wqe->inl.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE);
-       /* Increment consumer index. */
-       txq->wqe_ci = wqe_ci;
-}
-
 /**
  * Ring TX queue doorbell.
  *
@@ -532,8 +309,8 @@ mlx5_tx_dbrec(struct txq *txq)
        *txq->qp_db = htonl(txq->wqe_ci);
        /* Ensure ordering between DB record and BF copy. */
        rte_wmb();
-       rte_mov16(dst, (uint8_t *)data);
-       txq->bf_offset ^= txq->bf_buf_size;
+       memcpy(dst, (uint8_t *)data, 16);
+       txq->bf_offset ^= (1 << txq->bf_buf_size);
 }
 
 /**
@@ -547,9 +324,9 @@ mlx5_tx_dbrec(struct txq *txq)
 static inline void
 tx_prefetch_cqe(struct txq *txq, uint16_t ci)
 {
-       volatile struct mlx5_cqe64 *cqe;
+       volatile struct mlx5_cqe *cqe;
 
-       cqe = &(*txq->cqes)[ci & (txq->cqe_n - 1)].cqe64;
+       cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
        rte_prefetch0(cqe);
 }
 
@@ -564,9 +341,9 @@ tx_prefetch_cqe(struct txq *txq, uint16_t ci)
 static inline void
 tx_prefetch_wqe(struct txq *txq, uint16_t ci)
 {
-       volatile union mlx5_wqe *wqe;
+       volatile struct mlx5_wqe64 *wqe;
 
-       wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
+       wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
        rte_prefetch0(wqe);
 }
 
@@ -588,12 +365,15 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
        struct txq *txq = (struct txq *)dpdk_txq;
        uint16_t elts_head = txq->elts_head;
-       const unsigned int elts_n = txq->elts_n;
+       const unsigned int elts_n = 1 << txq->elts_n;
        unsigned int i = 0;
        unsigned int j = 0;
        unsigned int max;
        unsigned int comp;
-       volatile union mlx5_wqe *wqe = NULL;
+       volatile struct mlx5_wqe *wqe = NULL;
+       unsigned int segs_n = 0;
+       struct rte_mbuf *buf = NULL;
+       uint8_t *raw;
 
        if (unlikely(!pkts_n))
                return 0;
@@ -607,15 +387,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        if (max > elts_n)
                max -= elts_n;
        do {
-               struct rte_mbuf *buf = *(pkts++);
-               unsigned int elts_head_next;
-               uintptr_t addr;
+               volatile struct mlx5_wqe_data_seg *dseg = NULL;
                uint32_t length;
-               uint32_t lkey;
-               unsigned int segs_n = buf->nb_segs;
-               volatile struct mlx5_wqe_data_seg *dseg;
-               unsigned int ds = sizeof(*wqe) / 16;
+               unsigned int ds = 0;
+               uintptr_t addr;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+               uint32_t total_length = 0;
+#endif
 
+               /* first_seg */
+               buf = *(pkts++);
+               segs_n = buf->nb_segs;
                /*
                 * Make sure there is enough room to store this packet and
                 * that one ring entry remains unused.
@@ -624,235 +406,180 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                if (max < segs_n + 1)
                        break;
                max -= segs_n;
-               --pkts_n;
-               elts_head_next = (elts_head + 1) & (elts_n - 1);
-               wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
-               dseg = &wqe->wqe.dseg;
-               rte_prefetch0(wqe);
-               if (pkts_n)
+               --segs_n;
+               if (!segs_n)
+                       --pkts_n;
+               wqe = &(*txq->wqes)[txq->wqe_ci &
+                                   ((1 << txq->wqe_n) - 1)].hdr;
+               tx_prefetch_wqe(txq, txq->wqe_ci + 1);
+               if (pkts_n > 1)
                        rte_prefetch0(*pkts);
-               /* Retrieve buffer information. */
                addr = rte_pktmbuf_mtod(buf, uintptr_t);
                length = DATA_LEN(buf);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+               total_length = length;
+#endif
+               assert(length >= MLX5_WQE_DWORD_SIZE);
                /* Update element. */
                (*txq->elts)[elts_head] = buf;
+               elts_head = (elts_head + 1) & (elts_n - 1);
                /* Prefetch next buffer data. */
-               if (pkts_n)
-                       rte_prefetch0(rte_pktmbuf_mtod(*pkts,
-                                                      volatile void *));
-               /* Retrieve Memory Region key for this memory pool. */
-               lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-               if (buf->ol_flags & PKT_TX_VLAN_PKT)
-                       mlx5_wqe_write_vlan(txq, wqe, addr, length, lkey,
-                                           buf->vlan_tci);
-               else
-                       mlx5_wqe_write(txq, wqe, addr, length, lkey);
+               if (pkts_n > 1) {
+                       volatile void *pkt_addr;
+
+                       pkt_addr = rte_pktmbuf_mtod(*pkts, volatile void *);
+                       rte_prefetch0(pkt_addr);
+               }
                /* Should we enable HW CKSUM offload */
                if (buf->ol_flags &
                    (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-                       wqe->wqe.eseg.cs_flags =
+                       wqe->eseg.cs_flags =
                                MLX5_ETH_WQE_L3_CSUM |
                                MLX5_ETH_WQE_L4_CSUM;
                } else {
-                       wqe->wqe.eseg.cs_flags = 0;
+                       wqe->eseg.cs_flags = 0;
+               }
+               raw  = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
+               /* Start the know and common part of the WQE structure. */
+               wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
+               wqe->ctrl[2] = 0;
+               wqe->ctrl[3] = 0;
+               wqe->eseg.rsvd0 = 0;
+               wqe->eseg.rsvd1 = 0;
+               wqe->eseg.mss = 0;
+               wqe->eseg.rsvd2 = 0;
+               /* Start by copying the Ethernet Header. */
+               memcpy((uint8_t *)raw, ((uint8_t *)addr), 16);
+               length -= MLX5_WQE_DWORD_SIZE;
+               addr += MLX5_WQE_DWORD_SIZE;
+               /* Replace the Ethernet type by the VLAN if necessary. */
+               if (buf->ol_flags & PKT_TX_VLAN_PKT) {
+                       uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
+
+                       memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE -
+                                          sizeof(vlan)),
+                              &vlan, sizeof(vlan));
+                       addr -= sizeof(vlan);
+                       length += sizeof(vlan);
                }
-               while (--segs_n) {
+               /* Inline if enough room. */
+               if (txq->max_inline != 0) {
+                       uintptr_t end =
+                               (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
+                       uint16_t max_inline =
+                               txq->max_inline * RTE_CACHE_LINE_SIZE;
+                       uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
+                       uint16_t room;
+
+                       raw += MLX5_WQE_DWORD_SIZE;
+                       room = end - (uintptr_t)raw;
+                       if (room > max_inline) {
+                               uintptr_t addr_end = (addr + max_inline) &
+                                       ~(RTE_CACHE_LINE_SIZE - 1);
+                               uint16_t copy_b = ((addr_end - addr) > length) ?
+                                                 length :
+                                                 (addr_end - addr);
+
+                               rte_memcpy((void *)raw, (void *)addr, copy_b);
+                               addr += copy_b;
+                               length -= copy_b;
+                               pkt_inline_sz += copy_b;
+                               /* Sanity check. */
+                               assert(addr <= addr_end);
+                       }
+                       /* Store the inlined packet size in the WQE. */
+                       wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
+                       /*
+                        * 2 DWORDs consumed by the WQE header + 1 DSEG +
+                        * the size of the inline part of the packet.
+                        */
+                       ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
+                       if (length > 0) {
+                               dseg = (struct mlx5_wqe_data_seg *)
+                                       ((uintptr_t)wqe +
+                                        (ds * MLX5_WQE_DWORD_SIZE));
+                               if ((uintptr_t)dseg >= end)
+                                       dseg = (struct mlx5_wqe_data_seg *)
+                                               ((uintptr_t)&(*txq->wqes)[0]);
+                               goto use_dseg;
+                       } else if (!segs_n) {
+                               goto next_pkt;
+                       } else {
+                               goto next_seg;
+                       }
+               } else {
                        /*
-                        * Spill on next WQE when the current one does not have
-                        * enough room left. Size of WQE must a be a multiple
-                        * of data segment size.
+                        * No inline has been done in the packet, only the
+                        * Ethernet Header as been stored.
                         */
-                       assert(!(sizeof(*wqe) % sizeof(*dseg)));
-                       if (!(ds % (sizeof(*wqe) / 16)))
-                               dseg = (volatile void *)
-                                       &(*txq->wqes)[txq->wqe_ci++ &
-                                                     (txq->wqe_n - 1)];
-                       else
-                               ++dseg;
+                       wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
+                       dseg = (struct mlx5_wqe_data_seg *)
+                               ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));
+                       ds = 3;
+use_dseg:
+                       /* Add the remaining packet as a simple ds. */
+                       *dseg = (struct mlx5_wqe_data_seg) {
+                               .addr = htonll(addr),
+                               .byte_count = htonl(length),
+                               .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+                       };
                        ++ds;
-                       buf = buf->next;
-                       assert(buf);
-                       /* Store segment information. */
-                       dseg->byte_count = htonl(DATA_LEN(buf));
-                       dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-                       dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
-                       (*txq->elts)[elts_head_next] = buf;
-                       elts_head_next = (elts_head_next + 1) & (elts_n - 1);
-#ifdef MLX5_PMD_SOFT_COUNTERS
-                       length += DATA_LEN(buf);
-#endif
-                       ++j;
+                       if (!segs_n)
+                               goto next_pkt;
                }
-               /* Update DS field in WQE. */
-               wqe->wqe.ctrl.data[1] &= htonl(0xffffffc0);
-               wqe->wqe.ctrl.data[1] |= htonl(ds & 0x3f);
-               elts_head = elts_head_next;
-#ifdef MLX5_PMD_SOFT_COUNTERS
-               /* Increment sent bytes counter. */
-               txq->stats.obytes += length;
-#endif
-               elts_head = elts_head_next;
-               ++i;
-       } while (pkts_n);
-       /* Take a shortcut if nothing must be sent. */
-       if (unlikely(i == 0))
-               return 0;
-       /* Check whether completion threshold has been reached. */
-       comp = txq->elts_comp + i + j;
-       if (comp >= MLX5_TX_COMP_THRESH) {
-               /* Request completion on last WQE. */
-               wqe->wqe.ctrl.data[2] = htonl(8);
-               /* Save elts_head in unused "immediate" field of WQE. */
-               wqe->wqe.ctrl.data[3] = elts_head;
-               txq->elts_comp = 0;
-       } else {
-               txq->elts_comp = comp;
-       }
-#ifdef MLX5_PMD_SOFT_COUNTERS
-       /* Increment sent packets counter. */
-       txq->stats.opackets += i;
-#endif
-       /* Ring QP doorbell. */
-       mlx5_tx_dbrec(txq);
-       txq->elts_head = elts_head;
-       return i;
-}
-
-/**
- * DPDK callback for TX with inline support.
- *
- * @param dpdk_txq
- *   Generic pointer to TX queue structure.
- * @param[in] pkts
- *   Packets to transmit.
- * @param pkts_n
- *   Number of packets in array.
- *
- * @return
- *   Number of packets successfully transmitted (<= pkts_n).
- */
-uint16_t
-mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
-       struct txq *txq = (struct txq *)dpdk_txq;
-       uint16_t elts_head = txq->elts_head;
-       const unsigned int elts_n = txq->elts_n;
-       unsigned int i = 0;
-       unsigned int j = 0;
-       unsigned int max;
-       unsigned int comp;
-       volatile union mlx5_wqe *wqe = NULL;
-       unsigned int max_inline = txq->max_inline;
-
-       if (unlikely(!pkts_n))
-               return 0;
-       /* Prefetch first packet cacheline. */
-       tx_prefetch_cqe(txq, txq->cq_ci);
-       tx_prefetch_cqe(txq, txq->cq_ci + 1);
-       rte_prefetch0(*pkts);
-       /* Start processing. */
-       txq_complete(txq);
-       max = (elts_n - (elts_head - txq->elts_tail));
-       if (max > elts_n)
-               max -= elts_n;
-       do {
-               struct rte_mbuf *buf = *(pkts++);
-               unsigned int elts_head_next;
-               uintptr_t addr;
-               uint32_t length;
-               uint32_t lkey;
-               unsigned int segs_n = buf->nb_segs;
-               volatile struct mlx5_wqe_data_seg *dseg;
-               unsigned int ds = sizeof(*wqe) / 16;
-
+next_seg:
+               assert(buf);
+               assert(ds);
+               assert(wqe);
                /*
-                * Make sure there is enough room to store this packet and
-                * that one ring entry remains unused.
+                * Spill on next WQE when the current one does not have
+                * enough room left. Size of WQE must a be a multiple
+                * of data segment size.
                 */
-               assert(segs_n);
-               if (max < segs_n + 1)
-                       break;
-               max -= segs_n;
-               --pkts_n;
-               elts_head_next = (elts_head + 1) & (elts_n - 1);
-               wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)];
-               dseg = &wqe->wqe.dseg;
-               tx_prefetch_wqe(txq, txq->wqe_ci);
-               tx_prefetch_wqe(txq, txq->wqe_ci + 1);
-               if (pkts_n)
-                       rte_prefetch0(*pkts);
-               /* Should we enable HW CKSUM offload */
-               if (buf->ol_flags &
-                   (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
-                       wqe->inl.eseg.cs_flags =
-                               MLX5_ETH_WQE_L3_CSUM |
-                               MLX5_ETH_WQE_L4_CSUM;
-               } else {
-                       wqe->inl.eseg.cs_flags = 0;
+               assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
+               if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {
+                       unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) &
+                               ((1 << txq->wqe_n) - 1);
+
+                       dseg = (struct mlx5_wqe_data_seg *)
+                               ((uintptr_t)&(*txq->wqes)[n]);
+                       tx_prefetch_wqe(txq, n + 1);
+               } else if (!dseg) {
+            dseg = (struct mlx5_wqe_data_seg *)
+                ((uintptr_t)wqe +
+                 (ds * MLX5_WQE_DWORD_SIZE));
+        } else {
+                       ++dseg;
                }
-               /* Retrieve buffer information. */
-               addr = rte_pktmbuf_mtod(buf, uintptr_t);
+               ++ds;
+               buf = buf->next;
+               assert(buf);
                length = DATA_LEN(buf);
-               /* Update element. */
-               (*txq->elts)[elts_head] = buf;
-               /* Prefetch next buffer data. */
-               if (pkts_n)
-                       rte_prefetch0(rte_pktmbuf_mtod(*pkts,
-                                                      volatile void *));
-               if ((length <= max_inline) && (segs_n == 1)) {
-                       if (buf->ol_flags & PKT_TX_VLAN_PKT)
-                               mlx5_wqe_write_inline_vlan(txq, wqe,
-                                                          addr, length,
-                                                          buf->vlan_tci);
-                       else
-                               mlx5_wqe_write_inline(txq, wqe, addr, length);
-                       goto skip_segs;
-               } else {
-                       /* Retrieve Memory Region key for this memory pool. */
-                       lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-                       if (buf->ol_flags & PKT_TX_VLAN_PKT)
-                               mlx5_wqe_write_vlan(txq, wqe, addr, length,
-                                                   lkey, buf->vlan_tci);
-                       else
-                               mlx5_wqe_write(txq, wqe, addr, length, lkey);
-               }
-               while (--segs_n) {
-                       /*
-                        * Spill on next WQE when the current one does not have
-                        * enough room left. Size of WQE must a be a multiple
-                        * of data segment size.
-                        */
-                       assert(!(sizeof(*wqe) % sizeof(*dseg)));
-                       if (!(ds % (sizeof(*wqe) / 16)))
-                               dseg = (volatile void *)
-                                       &(*txq->wqes)[txq->wqe_ci++ &
-                                                     (txq->wqe_n - 1)];
-                       else
-                               ++dseg;
-                       ++ds;
-                       buf = buf->next;
-                       assert(buf);
-                       /* Store segment information. */
-                       dseg->byte_count = htonl(DATA_LEN(buf));
-                       dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf));
-                       dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
-                       (*txq->elts)[elts_head_next] = buf;
-                       elts_head_next = (elts_head_next + 1) & (elts_n - 1);
 #ifdef MLX5_PMD_SOFT_COUNTERS
-                       length += DATA_LEN(buf);
+               total_length += length;
 #endif
-                       ++j;
-               }
-               /* Update DS field in WQE. */
-               wqe->inl.ctrl.data[1] &= htonl(0xffffffc0);
-               wqe->inl.ctrl.data[1] |= htonl(ds & 0x3f);
-skip_segs:
-               elts_head = elts_head_next;
+               /* Store segment information. */
+               *dseg = (struct mlx5_wqe_data_seg) {
+                       .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
+                       .byte_count = htonl(length),
+                       .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+               };
+               (*txq->elts)[elts_head] = buf;
+               elts_head = (elts_head + 1) & (elts_n - 1);
+               ++j;
+               --segs_n;
+               if (segs_n)
+                       goto next_seg;
+               else
+                       --pkts_n;
+next_pkt:
+               ++i;
+               wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
+               txq->wqe_ci += (ds + 3) / 4;
 #ifdef MLX5_PMD_SOFT_COUNTERS
                /* Increment sent bytes counter. */
-               txq->stats.obytes += length;
+               txq->stats.obytes += total_length;
 #endif
-               ++i;
        } while (pkts_n);
        /* Take a shortcut if nothing must be sent. */
        if (unlikely(i == 0))
@@ -861,9 +588,9 @@ skip_segs:
        comp = txq->elts_comp + i + j;
        if (comp >= MLX5_TX_COMP_THRESH) {
                /* Request completion on last WQE. */
-               wqe->inl.ctrl.data[2] = htonl(8);
+               wqe->ctrl[2] = htonl(8);
                /* Save elts_head in unused "immediate" field of WQE. */
-               wqe->inl.ctrl.data[3] = elts_head;
+               wqe->ctrl[3] = elts_head;
                txq->elts_comp = 0;
        } else {
                txq->elts_comp = comp;
@@ -891,28 +618,29 @@ skip_segs:
 static inline void
 mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
-       uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+       uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
        volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
                (volatile struct mlx5_wqe_data_seg (*)[])
-               (uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)];
+               (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)];
 
        mpw->state = MLX5_MPW_STATE_OPENED;
        mpw->pkts_n = 0;
        mpw->len = length;
        mpw->total_len = 0;
-       mpw->wqe = &(*txq->wqes)[idx];
-       mpw->wqe->mpw.eseg.mss = htons(length);
-       mpw->wqe->mpw.eseg.inline_hdr_sz = 0;
-       mpw->wqe->mpw.eseg.rsvd0 = 0;
-       mpw->wqe->mpw.eseg.rsvd1 = 0;
-       mpw->wqe->mpw.eseg.rsvd2 = 0;
-       mpw->wqe->mpw.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
-                                          (txq->wqe_ci << 8) |
-                                          MLX5_OPCODE_TSO);
-       mpw->wqe->mpw.ctrl.data[2] = 0;
-       mpw->wqe->mpw.ctrl.data[3] = 0;
-       mpw->data.dseg[0] = &mpw->wqe->mpw.dseg[0];
-       mpw->data.dseg[1] = &mpw->wqe->mpw.dseg[1];
+       mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+       mpw->wqe->eseg.mss = htons(length);
+       mpw->wqe->eseg.inline_hdr_sz = 0;
+       mpw->wqe->eseg.rsvd0 = 0;
+       mpw->wqe->eseg.rsvd1 = 0;
+       mpw->wqe->eseg.rsvd2 = 0;
+       mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
+                                 (txq->wqe_ci << 8) | MLX5_OPCODE_TSO);
+       mpw->wqe->ctrl[2] = 0;
+       mpw->wqe->ctrl[3] = 0;
+       mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
+               (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
+       mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *)
+               (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE));
        mpw->data.dseg[2] = &(*dseg)[0];
        mpw->data.dseg[3] = &(*dseg)[1];
        mpw->data.dseg[4] = &(*dseg)[2];
@@ -935,7 +663,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
         * Store size in multiple of 16 bytes. Control and Ethernet segments
         * count as 2.
         */
-       mpw->wqe->mpw.ctrl.data[1] = htonl(txq->qp_num_8s | (2 + num));
+       mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num));
        mpw->state = MLX5_MPW_STATE_CLOSED;
        if (num < 3)
                ++txq->wqe_ci;
@@ -963,7 +691,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
        struct txq *txq = (struct txq *)dpdk_txq;
        uint16_t elts_head = txq->elts_head;
-       const unsigned int elts_n = txq->elts_n;
+       const unsigned int elts_n = 1 << txq->elts_n;
        unsigned int i = 0;
        unsigned int j = 0;
        unsigned int max;
@@ -1013,11 +741,11 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
                    ((mpw.len != length) ||
                     (segs_n != 1) ||
-                    (mpw.wqe->mpw.eseg.cs_flags != cs_flags)))
+                    (mpw.wqe->eseg.cs_flags != cs_flags)))
                        mlx5_mpw_close(txq, &mpw);
                if (mpw.state == MLX5_MPW_STATE_CLOSED) {
                        mlx5_mpw_new(txq, &mpw, length);
-                       mpw.wqe->mpw.eseg.cs_flags = cs_flags;
+                       mpw.wqe->eseg.cs_flags = cs_flags;
                }
                /* Multi-segment packets must be alone in their MPW. */
                assert((segs_n == 1) || (mpw.pkts_n == 0));
@@ -1063,12 +791,12 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        /* "j" includes both packets and segments. */
        comp = txq->elts_comp + j;
        if (comp >= MLX5_TX_COMP_THRESH) {
-               volatile union mlx5_wqe *wqe = mpw.wqe;
+               volatile struct mlx5_wqe *wqe = mpw.wqe;
 
                /* Request completion on last WQE. */
-               wqe->mpw.ctrl.data[2] = htonl(8);
+               wqe->ctrl[2] = htonl(8);
                /* Save elts_head in unused "immediate" field of WQE. */
-               wqe->mpw.ctrl.data[3] = elts_head;
+               wqe->ctrl[3] = elts_head;
                txq->elts_comp = 0;
        } else {
                txq->elts_comp = comp;
@@ -1098,25 +826,28 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 static inline void
 mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
 {
-       uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+       uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
+       struct mlx5_wqe_inl_small *inl;
 
        mpw->state = MLX5_MPW_INL_STATE_OPENED;
        mpw->pkts_n = 0;
        mpw->len = length;
        mpw->total_len = 0;
-       mpw->wqe = &(*txq->wqes)[idx];
-       mpw->wqe->mpw_inl.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
-                                              (txq->wqe_ci << 8) |
-                                              MLX5_OPCODE_TSO);
-       mpw->wqe->mpw_inl.ctrl.data[2] = 0;
-       mpw->wqe->mpw_inl.ctrl.data[3] = 0;
-       mpw->wqe->mpw_inl.eseg.mss = htons(length);
-       mpw->wqe->mpw_inl.eseg.inline_hdr_sz = 0;
-       mpw->wqe->mpw_inl.eseg.cs_flags = 0;
-       mpw->wqe->mpw_inl.eseg.rsvd0 = 0;
-       mpw->wqe->mpw_inl.eseg.rsvd1 = 0;
-       mpw->wqe->mpw_inl.eseg.rsvd2 = 0;
-       mpw->data.raw = &mpw->wqe->mpw_inl.data[0];
+       mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
+       mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
+                                 (txq->wqe_ci << 8) |
+                                 MLX5_OPCODE_TSO);
+       mpw->wqe->ctrl[2] = 0;
+       mpw->wqe->ctrl[3] = 0;
+       mpw->wqe->eseg.mss = htons(length);
+       mpw->wqe->eseg.inline_hdr_sz = 0;
+       mpw->wqe->eseg.cs_flags = 0;
+       mpw->wqe->eseg.rsvd0 = 0;
+       mpw->wqe->eseg.rsvd1 = 0;
+       mpw->wqe->eseg.rsvd2 = 0;
+       inl = (struct mlx5_wqe_inl_small *)
+               (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
+       mpw->data.raw = (uint8_t *)&inl->raw;
 }
 
 /**
@@ -1131,17 +862,18 @@ static inline void
 mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
 {
        unsigned int size;
+       struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
+               (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
 
-       size = sizeof(*mpw->wqe) - MLX5_MWQE64_INL_DATA + mpw->total_len;
+       size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len;
        /*
         * Store size in multiple of 16 bytes. Control and Ethernet segments
         * count as 2.
         */
-       mpw->wqe->mpw_inl.ctrl.data[1] =
-               htonl(txq->qp_num_8s | ((size + 15) / 16));
+       mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size));
        mpw->state = MLX5_MPW_STATE_CLOSED;
-       mpw->wqe->mpw_inl.byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
-       txq->wqe_ci += (size + (sizeof(*mpw->wqe) - 1)) / sizeof(*mpw->wqe);
+       inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
+       txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
 }
 
 /**
@@ -1163,12 +895,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 {
        struct txq *txq = (struct txq *)dpdk_txq;
        uint16_t elts_head = txq->elts_head;
-       const unsigned int elts_n = txq->elts_n;
+       const unsigned int elts_n = 1 << txq->elts_n;
        unsigned int i = 0;
        unsigned int j = 0;
        unsigned int max;
        unsigned int comp;
-       unsigned int inline_room = txq->max_inline;
+       unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
        struct mlx5_mpw mpw = {
                .state = MLX5_MPW_STATE_CLOSED,
        };
@@ -1214,31 +946,33 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                if (mpw.state == MLX5_MPW_STATE_OPENED) {
                        if ((mpw.len != length) ||
                            (segs_n != 1) ||
-                           (mpw.wqe->mpw.eseg.cs_flags != cs_flags))
+                           (mpw.wqe->eseg.cs_flags != cs_flags))
                                mlx5_mpw_close(txq, &mpw);
                } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
                        if ((mpw.len != length) ||
                            (segs_n != 1) ||
                            (length > inline_room) ||
-                           (mpw.wqe->mpw_inl.eseg.cs_flags != cs_flags)) {
+                           (mpw.wqe->eseg.cs_flags != cs_flags)) {
                                mlx5_mpw_inline_close(txq, &mpw);
-                               inline_room = txq->max_inline;
+                               inline_room =
+                                       txq->max_inline * RTE_CACHE_LINE_SIZE;
                        }
                }
                if (mpw.state == MLX5_MPW_STATE_CLOSED) {
                        if ((segs_n != 1) ||
                            (length > inline_room)) {
                                mlx5_mpw_new(txq, &mpw, length);
-                               mpw.wqe->mpw.eseg.cs_flags = cs_flags;
+                               mpw.wqe->eseg.cs_flags = cs_flags;
                        } else {
                                mlx5_mpw_inline_new(txq, &mpw, length);
-                               mpw.wqe->mpw_inl.eseg.cs_flags = cs_flags;
+                               mpw.wqe->eseg.cs_flags = cs_flags;
                        }
                }
                /* Multi-segment packets must be alone in their MPW. */
                assert((segs_n == 1) || (mpw.pkts_n == 0));
                if (mpw.state == MLX5_MPW_STATE_OPENED) {
-                       assert(inline_room == txq->max_inline);
+                       assert(inline_room ==
+                              txq->max_inline * RTE_CACHE_LINE_SIZE);
 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
                        length = 0;
 #endif
@@ -1277,7 +1011,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                        addr = rte_pktmbuf_mtod(buf, uintptr_t);
                        (*txq->elts)[elts_head] = buf;
                        /* Maximum number of bytes before wrapping. */
-                       max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] -
+                       max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] -
                               (uintptr_t)mpw.data.raw);
                        if (length > max) {
                                rte_memcpy((void *)(uintptr_t)mpw.data.raw,
@@ -1296,14 +1030,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                                mpw.data.raw += length;
                        }
                        if ((uintptr_t)mpw.data.raw ==
-                           (uintptr_t)&(*txq->wqes)[txq->wqe_n])
+                           (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n])
                                mpw.data.raw =
                                        (volatile void *)&(*txq->wqes)[0];
                        ++mpw.pkts_n;
                        ++j;
                        if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) {
                                mlx5_mpw_inline_close(txq, &mpw);
-                               inline_room = txq->max_inline;
+                               inline_room =
+                                       txq->max_inline * RTE_CACHE_LINE_SIZE;
                        } else {
                                inline_room -= length;
                        }
@@ -1323,12 +1058,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
        /* "j" includes both packets and segments. */
        comp = txq->elts_comp + j;
        if (comp >= MLX5_TX_COMP_THRESH) {
-               volatile union mlx5_wqe *wqe = mpw.wqe;
+               volatile struct mlx5_wqe *wqe = mpw.wqe;
 
                /* Request completion on last WQE. */
-               wqe->mpw_inl.ctrl.data[2] = htonl(8);
+               wqe->ctrl[2] = htonl(8);
                /* Save elts_head in unused "immediate" field of WQE. */
-               wqe->mpw_inl.ctrl.data[3] = elts_head;
+               wqe->ctrl[3] = elts_head;
                txq->elts_comp = 0;
        } else {
                txq->elts_comp = comp;
@@ -1359,25 +1094,24 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
  *   Packet type for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe)
+rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
 {
        uint32_t pkt_type;
        uint8_t flags = cqe->l4_hdr_type_etc;
-       uint8_t info = cqe->rsvd0[0];
 
-       if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET)
+       if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET)
                pkt_type =
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
+                                 MLX5_CQE_RX_OUTER_IPV4_PACKET,
                                  RTE_PTYPE_L3_IPV4) |
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
+                                 MLX5_CQE_RX_OUTER_IPV6_PACKET,
                                  RTE_PTYPE_L3_IPV6) |
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_IPV4_PACKET,
+                                 MLX5_CQE_RX_IPV4_PACKET,
                                  RTE_PTYPE_INNER_L3_IPV4) |
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_IPV6_PACKET,
+                                 MLX5_CQE_RX_IPV6_PACKET,
                                  RTE_PTYPE_INNER_L3_IPV6);
        else
                pkt_type =
@@ -1399,14 +1133,16 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe)
  *   Pointer to RX queue.
  * @param cqe
  *   CQE to process.
+ * @param[out] rss_hash
+ *   Packet RSS Hash result.
  *
  * @return
  *   Packet size in bytes (0 if there is none), -1 in case of completion
  *   with error.
  */
 static inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
-                uint16_t cqe_cnt)
+mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+                uint16_t cqe_cnt, uint32_t *rss_hash)
 {
        struct rxq_zip *zip = &rxq->zip;
        uint16_t cqe_n = cqe_cnt + 1;
@@ -1416,9 +1152,10 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
        if (zip->ai) {
                volatile struct mlx5_mini_cqe8 (*mc)[8] =
                        (volatile struct mlx5_mini_cqe8 (*)[8])
-                       (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].cqe64);
+                       (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt]);
 
                len = ntohl((*mc)[zip->ai & 7].byte_cnt);
+               *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result);
                if ((++zip->ai & 7) == 0) {
                        /*
                         * Increment consumer index to skip the number of
@@ -1433,7 +1170,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
                        uint16_t end = zip->cq_ci;
 
                        while (idx != end) {
-                               (*rxq->cqes)[idx & cqe_cnt].cqe64.op_own =
+                               (*rxq->cqes)[idx & cqe_cnt].op_own =
                                        MLX5_CQE_INVALIDATE;
                                ++idx;
                        }
@@ -1445,7 +1182,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
                int ret;
                int8_t op_own;
 
-               ret = check_cqe64(cqe, cqe_n, rxq->cq_ci);
+               ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
                if (unlikely(ret == 1))
                        return 0;
                ++rxq->cq_ci;
@@ -1454,7 +1191,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
                        volatile struct mlx5_mini_cqe8 (*mc)[8] =
                                (volatile struct mlx5_mini_cqe8 (*)[8])
                                (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci &
-                                                         cqe_cnt].cqe64);
+                                                         cqe_cnt]);
 
                        /* Fix endianness. */
                        zip->cqe_cnt = ntohl(cqe->byte_cnt);
@@ -1473,9 +1210,11 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
                        zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
                        /* Get packet size to return. */
                        len = ntohl((*mc)[0].byte_cnt);
+                       *rss_hash = ntohl((*mc)[0].rx_hash_result);
                        zip->ai = 1;
                } else {
                        len = ntohl(cqe->byte_cnt);
+                       *rss_hash = ntohl(cqe->rx_hash_res);
                }
                /* Error while receiving packet. */
                if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
@@ -1496,38 +1235,32 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
  *   Offload flags (ol_flags) for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe)
+rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
 {
        uint32_t ol_flags = 0;
        uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK;
        uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK;
-       uint8_t info = cqe->rsvd0[0];
 
        if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) ||
            (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6))
-               ol_flags |=
-                       (!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) *
-                        PKT_RX_IP_CKSUM_BAD);
+               ol_flags |= TRANSPOSE(cqe->hds_ip_ext,
+                                     MLX5_CQE_L3_OK,
+                                     PKT_RX_IP_CKSUM_GOOD);
        if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) ||
            (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) ||
            (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) ||
            (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP))
+               ol_flags |= TRANSPOSE(cqe->hds_ip_ext,
+                                     MLX5_CQE_L4_OK,
+                                     PKT_RX_L4_CKSUM_GOOD);
+       if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
                ol_flags |=
-                       (!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) *
-                        PKT_RX_L4_CKSUM_BAD);
-       /*
-        * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
-        * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
-        * (its value is 0).
-        */
-       if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
-               ol_flags |=
-                       TRANSPOSE(~cqe->l4_hdr_type_etc,
-                                 IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
-                                 PKT_RX_IP_CKSUM_BAD) |
-                       TRANSPOSE(~cqe->l4_hdr_type_etc,
-                                 IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
-                                 PKT_RX_L4_CKSUM_BAD);
+                       TRANSPOSE(cqe->l4_hdr_type_etc,
+                                 MLX5_CQE_RX_OUTER_IP_CSUM_OK,
+                                 PKT_RX_IP_CKSUM_GOOD) |
+                       TRANSPOSE(cqe->l4_hdr_type_etc,
+                                 MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK,
+                                 PKT_RX_L4_CKSUM_GOOD);
        return ol_flags;
 }
 
@@ -1548,21 +1281,22 @@ uint16_t
 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
        struct rxq *rxq = dpdk_rxq;
-       const unsigned int wqe_cnt = rxq->elts_n - 1;
-       const unsigned int cqe_cnt = rxq->cqe_n - 1;
+       const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
+       const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
        const unsigned int sges_n = rxq->sges_n;
        struct rte_mbuf *pkt = NULL;
        struct rte_mbuf *seg = NULL;
-       volatile struct mlx5_cqe64 *cqe =
-               &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
+       volatile struct mlx5_cqe *cqe =
+               &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
        unsigned int i = 0;
        unsigned int rq_ci = rxq->rq_ci << sges_n;
-       int len;
+       int len; /* keep its value across iterations. */
 
        while (pkts_n) {
                unsigned int idx = rq_ci & wqe_cnt;
                volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
                struct rte_mbuf *rep = (*rxq->elts)[idx];
+               uint32_t rss_hash_res = 0;
 
                if (pkt)
                        NEXT(seg) = rep;
@@ -1572,6 +1306,14 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                rte_prefetch0(wqe);
                rep = rte_mbuf_raw_alloc(rxq->mp);
                if (unlikely(rep == NULL)) {
+                       ++rxq->stats.rx_nombuf;
+                       if (!pkt) {
+                               /*
+                                * no buffers before we even started,
+                                * bail out silently.
+                                */
+                               break;
+                       }
                        while (pkt != seg) {
                                assert(pkt != (*rxq->elts)[idx]);
                                seg = NEXT(pkt);
@@ -1579,13 +1321,13 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                __rte_mbuf_raw_free(pkt);
                                pkt = seg;
                        }
-                       ++rxq->stats.rx_nombuf;
                        break;
                }
                if (!pkt) {
-                       cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
-                       len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt);
-                       if (len == 0) {
+                       cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
+                       len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt,
+                                              &rss_hash_res);
+                       if (!len) {
                                rte_mbuf_refcnt_set(rep, 0);
                                __rte_mbuf_raw_free(rep);
                                break;
@@ -1602,12 +1344,16 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        /* Update packet information. */
                        pkt->packet_type = 0;
                        pkt->ol_flags = 0;
+                       if (rxq->rss_hash) {
+                               pkt->hash.rss = rss_hash_res;
+                               pkt->ol_flags = PKT_RX_RSS_HASH;
+                       }
                        if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
                            rxq->crc_present) {
                                if (rxq->csum) {
                                        pkt->packet_type =
                                                rxq_cq_to_pkt_type(cqe);
-                                       pkt->ol_flags =
+                                       pkt->ol_flags |=
                                                rxq_cq_to_ol_flags(rxq, cqe);
                                }
                                if (cqe->l4_hdr_type_etc &
index d87dd19..5579f89 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #include <infiniband/mlx5_hw.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
+#include <rte_common.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5_utils.h"
@@ -87,6 +88,8 @@ struct mlx5_txq_stats {
 struct fdir_queue {
        struct ibv_qp *qp; /* Associated RX QP. */
        struct ibv_exp_rwq_ind_table *ind_table; /* Indirection table. */
+       struct ibv_exp_wq *wq; /* Work queue. */
+       struct ibv_cq *cq; /* Completion queue. */
 };
 
 struct priv;
@@ -107,16 +110,18 @@ struct rxq {
        unsigned int vlan_strip:1; /* Enable VLAN stripping. */
        unsigned int crc_present:1; /* CRC must be subtracted. */
        unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */
+       unsigned int cqe_n:4; /* Log 2 of CQ elements. */
+       unsigned int elts_n:4; /* Log 2 of Mbufs. */
+       unsigned int port_id:8;
+       unsigned int rss_hash:1; /* RSS hash result is enabled. */
+       unsigned int :9; /* Remaining bits. */
+       volatile uint32_t *rq_db;
+       volatile uint32_t *cq_db;
        uint16_t rq_ci;
        uint16_t cq_ci;
-       uint16_t elts_n;
-       uint16_t cqe_n; /* Number of CQ elements. */
-       uint16_t port_id;
        volatile struct mlx5_wqe_data_seg(*wqes)[];
        volatile struct mlx5_cqe(*cqes)[];
        struct rxq_zip zip; /* Compressed context. */
-       volatile uint32_t *rq_db;
-       volatile uint32_t *cq_db;
        struct rte_mbuf *(*elts)[];
        struct rte_mempool *mp;
        struct mlx5_rxq_stats stats;
@@ -128,7 +133,7 @@ struct rxq_ctrl {
        struct ibv_cq *cq; /* Completion Queue. */
        struct ibv_exp_wq *wq; /* Work Queue. */
        struct ibv_exp_res_domain *rd; /* Resource Domain. */
-       struct fdir_queue fdir_queue; /* Flow director queue. */
+       struct fdir_queue *fdir_queue; /* Flow director queue. */
        struct ibv_mr *mr; /* Memory Region (for mp). */
        struct ibv_exp_wq_family *if_wq; /* WQ burst interface. */
        struct ibv_exp_cq_family_v1 *if_cq; /* CQ interface. */
@@ -173,8 +178,8 @@ struct hash_rxq_init {
                        uint16_t size;
                } hdr;
                struct ibv_exp_flow_spec_tcp_udp tcp_udp;
-               struct ibv_exp_flow_spec_ipv4_ext ipv4;
-               struct ibv_exp_flow_spec_ipv6_ext ipv6;
+               struct ibv_exp_flow_spec_ipv4 ipv4;
+               struct ibv_exp_flow_spec_ipv6 ipv6;
                struct ibv_exp_flow_spec_eth eth;
        } flow_spec; /* Flow specification template. */
        const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */
@@ -235,22 +240,30 @@ struct hash_rxq {
                [MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS];
 };
 
+/** C extension macro for environments lacking C11 features. */
+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L
+#define RTE_STD_C11 __extension__
+#else
+#define RTE_STD_C11
+#endif
+
 /* TX queue descriptor. */
+RTE_STD_C11
 struct txq {
        uint16_t elts_head; /* Current index in (*elts)[]. */
        uint16_t elts_tail; /* First element awaiting completion. */
        uint16_t elts_comp; /* Counter since last completion request. */
-       uint16_t elts_n; /* (*elts)[] length. */
        uint16_t cq_ci; /* Consumer index for completion queue. */
-       uint16_t cqe_n; /* Number of CQ elements. */
        uint16_t wqe_ci; /* Consumer index for work queue. */
-       uint16_t wqe_n; /* Number of WQ elements. */
+       uint16_t elts_n:4; /* (*elts)[] length (in log2). */
+       uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
+       uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */
+       uint16_t bf_buf_size:4; /* Log2 Blueflame size. */
        uint16_t bf_offset; /* Blueflame offset. */
-       uint16_t bf_buf_size; /* Blueflame size. */
-       uint16_t max_inline; /* Maximum size to inline in a WQE. */
+       uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
        uint32_t qp_num_8s; /* QP number shifted by 8. */
        volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
-       volatile union mlx5_wqe (*wqes)[]; /* Work queue. */
+       volatile struct mlx5_wqe64 (*wqes)[]; /* Work queue. */
        volatile uint32_t *qp_db; /* Work queue doorbell. */
        volatile uint32_t *cq_db; /* Completion queue doorbell. */
        volatile void *bf_reg; /* Blueflame register. */
@@ -312,7 +325,6 @@ uint16_t mlx5_tx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
 /* mlx5_rxtx.c */
 
 uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t);
-uint16_t mlx5_tx_burst_inline(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_tx_burst_mpw(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_tx_burst_mpw_inline(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t);
index 788ef93..f2b5781 100644 (file)
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ethdev.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_defs.h"
 
-
-#include <linux/ethtool.h>
-#include <linux/sockios.h>
-
 /**
  * DPDK callback to get device statistics.
  *
  * @param[out] stats
  *   Stats structure output buffer.
  */
-
-
-static void
-mlx5_stats_read_hw(struct rte_eth_dev *dev,
-                struct rte_eth_stats *stats){
-    struct priv *priv = mlx5_get_priv(dev);
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-    unsigned int i;
-
-    struct rte_eth_stats tmp = {0};
-    struct ethtool_stats    *et_stats   = (struct ethtool_stats    *)lps->et_stats;
-    struct ifreq ifr;
-
-    et_stats->cmd = ETHTOOL_GSTATS;
-    et_stats->n_stats = lps->n_stats;
-
-    ifr.ifr_data = (caddr_t) et_stats;
-
-    if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { 
-        WARN("unable to get statistic values for mlnx5 "); 
-    }
-
-    tmp.ibytes += et_stats->data[lps->inx_rx_vport_unicast_bytes] +
-                  et_stats->data[lps->inx_rx_vport_multicast_bytes] +
-                  et_stats->data[lps->inx_rx_vport_broadcast_bytes];
-
-    tmp.ipackets += et_stats->data[lps->inx_rx_vport_unicast_packets] +
-                et_stats->data[lps->inx_rx_vport_multicast_packets] +
-                et_stats->data[lps->inx_rx_vport_broadcast_packets];
-
-    tmp.ierrors +=     (et_stats->data[lps->inx_rx_wqe_err] +
-                    et_stats->data[lps->inx_rx_crc_errors_phy] +
-                    et_stats->data[lps->inx_rx_in_range_len_errors_phy] +
-                    et_stats->data[lps->inx_rx_symbol_err_phy]);
-
-    tmp.obytes += et_stats->data[lps->inx_tx_vport_unicast_bytes] +
-                  et_stats->data[lps->inx_tx_vport_multicast_bytes] +
-                  et_stats->data[lps->inx_tx_vport_broadcast_bytes];
-
-    tmp.opackets += (et_stats->data[lps->inx_tx_vport_unicast_packets] +
-                     et_stats->data[lps->inx_tx_vport_multicast_packets] +
-                     et_stats->data[lps->inx_tx_vport_broadcast_packets]);
-
-    tmp.oerrors += et_stats->data[lps->inx_tx_errors_phy];
-
-    /* SW Rx */
-    for (i = 0; (i != priv->rxqs_n); ++i) {
-        struct rxq *rxq = (*priv->rxqs)[i];
-        if (rxq) {
-            tmp.imissed += rxq->stats.idropped;
-            tmp.rx_nombuf += rxq->stats.rx_nombuf;
-        }
-    }
-
-    /*SW Tx */
-    for (i = 0; (i != priv->txqs_n); ++i) {
-        struct txq *txq = (*priv->txqs)[i];
-        if (txq) {
-            tmp.oerrors += txq->stats.odropped;
-        }
-    }
-
-    *stats =tmp;
-}
-
-void
-mlx5_stats_free(struct rte_eth_dev *dev)
-{
-    struct priv *priv = mlx5_get_priv(dev);
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-
-    if ( lps->et_stats ){
-        free(lps->et_stats);
-        lps->et_stats=0;
-    }
-}
-
-
-static void
-mlx5_stats_init(struct rte_eth_dev *dev)
-{
-    struct priv *priv = mlx5_get_priv(dev);
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-    struct rte_eth_stats tmp = {0};
-
-    unsigned int i;
-    unsigned int idx;
-    char ifname[IF_NAMESIZE];
-    struct ifreq ifr;
-
-    struct ethtool_stats    *et_stats   = NULL;
-    struct ethtool_drvinfo drvinfo;
-    struct ethtool_gstrings *strings = NULL;
-    unsigned int n_stats, sz_str, sz_stats;
-
-    if (priv_get_ifname(priv, &ifname)) {
-            WARN("unable to get interface name");
-            return;
-    }
-    /* How many statistics are available ? */
-    drvinfo.cmd = ETHTOOL_GDRVINFO;
-    ifr.ifr_data = (caddr_t) &drvinfo;
-    if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
-            WARN("unable to get driver info for %s", ifname);
-            return;
-    }
-
-    n_stats = drvinfo.n_stats;
-    if (n_stats < 1) {
-            WARN("no statistics available for %s", ifname);
-            return;
-    }
-    lps->n_stats = n_stats;
-
-    /* Allocate memory to grab stat names and values */ 
-    sz_str = n_stats * ETH_GSTRING_LEN; 
-    sz_stats = n_stats * sizeof(uint64_t); 
-    strings = calloc(1, sz_str + sizeof(struct ethtool_gstrings)); 
-    if (!strings) { 
-        WARN("unable to allocate memory for strings"); 
-        return;
-    } 
-
-    et_stats = calloc(1, sz_stats + sizeof(struct ethtool_stats)); 
-    if (!et_stats) { 
-        free(strings);
-        WARN("unable to allocate memory for stats"); 
-    } 
-
-    strings->cmd = ETHTOOL_GSTRINGS; 
-    strings->string_set = ETH_SS_STATS; 
-    strings->len = n_stats; 
-    ifr.ifr_data = (caddr_t) strings; 
-    if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { 
-        WARN("unable to get statistic names for %s", ifname); 
-        free(strings);
-        free(et_stats);
-        return;
-    } 
-
-    for (i = 0; (i != n_stats); ++i) {
-
-            const char * curr_string = (const char*) &(strings->data[i * ETH_GSTRING_LEN]);
-
-            if (!strcmp("rx_vport_unicast_bytes", curr_string)) lps->inx_rx_vport_unicast_bytes = i;
-            if (!strcmp("rx_vport_multicast_bytes", curr_string)) lps->inx_rx_vport_multicast_bytes = i;
-            if (!strcmp("rx_vport_broadcast_bytes", curr_string)) lps->inx_rx_vport_broadcast_bytes = i;
-
-            if (!strcmp("rx_vport_unicast_packets", curr_string)) lps->inx_rx_vport_unicast_packets = i;
-            if (!strcmp("rx_vport_multicast_packets", curr_string)) lps->inx_rx_vport_multicast_packets = i;
-            if (!strcmp("rx_vport_broadcast_packets", curr_string)) lps->inx_rx_vport_broadcast_packets = i;
-
-            if (!strcmp("tx_vport_unicast_bytes", curr_string)) lps->inx_tx_vport_unicast_bytes = i;
-            if (!strcmp("tx_vport_multicast_bytes", curr_string)) lps->inx_tx_vport_multicast_bytes = i;
-            if (!strcmp("tx_vport_broadcast_bytes", curr_string)) lps->inx_tx_vport_broadcast_bytes = i;
-
-            if (!strcmp("tx_vport_unicast_packets", curr_string)) lps->inx_tx_vport_unicast_packets = i;
-            if (!strcmp("tx_vport_multicast_packets", curr_string)) lps->inx_tx_vport_multicast_packets = i;
-            if (!strcmp("tx_vport_broadcast_packets", curr_string)) lps->inx_tx_vport_broadcast_packets = i;
-
-            if (!strcmp("rx_wqe_err", curr_string)) lps->inx_rx_wqe_err = i;
-            if (!strcmp("rx_crc_errors_phy", curr_string)) lps->inx_rx_crc_errors_phy = i;
-            if (!strcmp("rx_in_range_len_errors_phy", curr_string)) lps->inx_rx_in_range_len_errors_phy = i;
-            if (!strcmp("rx_symbol_err_phy", curr_string)) lps->inx_rx_symbol_err_phy = i;
-
-            if (!strcmp("tx_errors_phy", curr_string)) lps->inx_tx_errors_phy = i;
-    }
-
-    lps->et_stats =(void *)et_stats;
-
-    if (!lps->inx_rx_vport_unicast_bytes ||
-    !lps->inx_rx_vport_multicast_bytes ||
-    !lps->inx_rx_vport_broadcast_bytes || 
-    !lps->inx_rx_vport_unicast_packets ||
-    !lps->inx_rx_vport_multicast_packets ||
-    !lps->inx_rx_vport_broadcast_packets ||
-    !lps->inx_tx_vport_unicast_bytes || 
-    !lps->inx_tx_vport_multicast_bytes ||
-    !lps->inx_tx_vport_broadcast_bytes ||
-    !lps->inx_tx_vport_unicast_packets ||
-    !lps->inx_tx_vport_multicast_packets ||
-    !lps->inx_tx_vport_broadcast_packets ||
-    !lps->inx_rx_wqe_err ||
-    !lps->inx_rx_crc_errors_phy ||
-    !lps->inx_rx_in_range_len_errors_phy) {
-        WARN("Counters are not recognized %s", ifname);
-        return;
-    }
-
-    mlx5_stats_read_hw(dev,&tmp);
-
-    /* copy yo shadow at first time */
-    lps->m_shadow = tmp;
-
-    free(strings);
-}
-
-
-static void
-mlx5_stats_diff(struct rte_eth_stats *a,
-                struct rte_eth_stats *b,
-                struct rte_eth_stats *c){
-    #define MLX5_DIFF(cnt) { a->cnt = (b->cnt - c->cnt);  }
-
-    MLX5_DIFF(ipackets);
-    MLX5_DIFF(opackets); 
-    MLX5_DIFF(ibytes); 
-    MLX5_DIFF(obytes);
-    MLX5_DIFF(imissed);
-
-    MLX5_DIFF(ierrors); 
-    MLX5_DIFF(oerrors); 
-    MLX5_DIFF(rx_nombuf);
-}
-
 void
 mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
        struct priv *priv = mlx5_get_priv(dev);
-
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-    priv_lock(priv);
-
-    if (lps->et_stats == NULL) {
-        mlx5_stats_init(dev);
-    }
-    struct rte_eth_stats tmp = {0};
-
-    mlx5_stats_read_hw(dev,&tmp);
-
-    mlx5_stats_diff(stats,
-                    &tmp,
-                    &lps->m_shadow);
-
+       struct rte_eth_stats tmp = {0};
+       unsigned int i;
+       unsigned int idx;
+
+       priv_lock(priv);
+       /* Add software counters. */
+       for (i = 0; (i != priv->rxqs_n); ++i) {
+               struct rxq *rxq = (*priv->rxqs)[i];
+
+               if (rxq == NULL)
+                       continue;
+               idx = rxq->stats.idx;
+               if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+#ifdef MLX5_PMD_SOFT_COUNTERS
+                       tmp.q_ipackets[idx] += rxq->stats.ipackets;
+                       tmp.q_ibytes[idx] += rxq->stats.ibytes;
+#endif
+                       tmp.q_errors[idx] += (rxq->stats.idropped +
+                                             rxq->stats.rx_nombuf);
+               }
+#ifdef MLX5_PMD_SOFT_COUNTERS
+               tmp.ipackets += rxq->stats.ipackets;
+               tmp.ibytes += rxq->stats.ibytes;
+#endif
+               tmp.ierrors += rxq->stats.idropped;
+               tmp.rx_nombuf += rxq->stats.rx_nombuf;
+       }
+       for (i = 0; (i != priv->txqs_n); ++i) {
+               struct txq *txq = (*priv->txqs)[i];
+
+               if (txq == NULL)
+                       continue;
+               idx = txq->stats.idx;
+               if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+#ifdef MLX5_PMD_SOFT_COUNTERS
+                       tmp.q_opackets[idx] += txq->stats.opackets;
+                       tmp.q_obytes[idx] += txq->stats.obytes;
+#endif
+                       tmp.q_errors[idx] += txq->stats.odropped;
+               }
+#ifdef MLX5_PMD_SOFT_COUNTERS
+               tmp.opackets += txq->stats.opackets;
+               tmp.obytes += txq->stats.obytes;
+#endif
+               tmp.oerrors += txq->stats.odropped;
+       }
+#ifndef MLX5_PMD_SOFT_COUNTERS
+       /* FIXME: retrieve and add hardware counters. */
+#endif
+       *stats = tmp;
        priv_unlock(priv);
 }
 
@@ -304,20 +119,26 @@ void
 mlx5_stats_reset(struct rte_eth_dev *dev)
 {
        struct priv *priv = dev->data->dev_private;
-    struct mlx5_stats_priv * lps = &priv->m_stats;
-
-    priv_lock(priv);
-
-    if (lps->et_stats == NULL) {
-        mlx5_stats_init(dev);
-    }
-    struct rte_eth_stats tmp = {0};
-
-
-    mlx5_stats_read_hw(dev,&tmp);
-
-    /* copy to shadow */
-    lps->m_shadow = tmp;
-
+       unsigned int i;
+       unsigned int idx;
+
+       priv_lock(priv);
+       for (i = 0; (i != priv->rxqs_n); ++i) {
+               if ((*priv->rxqs)[i] == NULL)
+                       continue;
+               idx = (*priv->rxqs)[i]->stats.idx;
+               (*priv->rxqs)[i]->stats =
+                       (struct mlx5_rxq_stats){ .idx = idx };
+       }
+       for (i = 0; (i != priv->txqs_n); ++i) {
+               if ((*priv->txqs)[i] == NULL)
+                       continue;
+               idx = (*priv->txqs)[i]->stats.idx;
+               (*priv->txqs)[i]->stats =
+                       (struct mlx5_txq_stats){ .idx = idx };
+       }
+#ifndef MLX5_PMD_SOFT_COUNTERS
+       /* FIXME: reset hardware counters. */
+#endif
        priv_unlock(priv);
 }
index e9b9a29..d4dccd8 100644 (file)
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_interrupts.h>
 #include <rte_alarm.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5.h"
index 6fe61c4..053665d 100644 (file)
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <infiniband/verbs.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_common.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5_utils.h"
@@ -81,8 +81,8 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
 
        for (i = 0; (i != elts_n); ++i)
                (*txq_ctrl->txq.elts)[i] = NULL;
-       for (i = 0; (i != txq_ctrl->txq.wqe_n); ++i) {
-               volatile union mlx5_wqe *wqe = &(*txq_ctrl->txq.wqes)[i];
+       for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
+               volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i];
 
                memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
        }
@@ -101,7 +101,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
 static void
 txq_free_elts(struct txq_ctrl *txq_ctrl)
 {
-       unsigned int elts_n = txq_ctrl->txq.elts_n;
+       unsigned int elts_n = 1 << txq_ctrl->txq.elts_n;
        unsigned int elts_head = txq_ctrl->txq.elts_head;
        unsigned int elts_tail = txq_ctrl->txq.elts_tail;
        struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
@@ -212,22 +212,22 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
                      "it should be set to %u", RTE_CACHE_LINE_SIZE);
                return EINVAL;
        }
-       tmpl->txq.cqe_n = ibcq->cqe + 1;
+       tmpl->txq.cqe_n = log2above(ibcq->cqe);
        tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
        tmpl->txq.wqes =
-               (volatile union mlx5_wqe (*)[])
+               (volatile struct mlx5_wqe64 (*)[])
                (uintptr_t)qp->gen_data.sqstart;
-       tmpl->txq.wqe_n = qp->sq.wqe_cnt;
+       tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt);
        tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
        tmpl->txq.bf_reg = qp->gen_data.bf->reg;
        tmpl->txq.bf_offset = qp->gen_data.bf->offset;
-       tmpl->txq.bf_buf_size = qp->gen_data.bf->buf_size;
+       tmpl->txq.bf_buf_size = log2above(qp->gen_data.bf->buf_size);
        tmpl->txq.cq_db = cq->dbrec;
        tmpl->txq.cqes =
                (volatile struct mlx5_cqe (*)[])
                (uintptr_t)cq->active_buf->buf;
        tmpl->txq.elts =
-               (struct rte_mbuf *(*)[tmpl->txq.elts_n])
+               (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
                ((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
        return 0;
 }
@@ -277,7 +277,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
        }
        (void)conf; /* Thresholds configuration (ignored). */
        assert(desc > MLX5_TX_COMP_THRESH);
-       tmpl.txq.elts_n = desc;
+       tmpl.txq.elts_n = log2above(desc);
        /* MRs will be registered in mp2mr[] later. */
        attr.rd = (struct ibv_exp_res_domain_init_attr){
                .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
@@ -338,9 +338,12 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
                .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
                              IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
        };
-       if (priv->txq_inline && priv->txqs_n >= priv->txqs_inline) {
-               tmpl.txq.max_inline = priv->txq_inline;
-               attr.init.cap.max_inline_data = tmpl.txq.max_inline;
+       if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+               tmpl.txq.max_inline =
+                       ((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
+                        RTE_CACHE_LINE_SIZE);
+               attr.init.cap.max_inline_data =
+                       tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
        }
        tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
        if (tmpl.qp == NULL) {
index 4719e69..1b0fa40 100644 (file)
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
 #endif
 #include <rte_ethdev.h>
 #include <rte_common.h>
 #ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
 #endif
 
 #include "mlx5_utils.h"
@@ -87,7 +87,8 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
                --priv->vlan_filter_n;
                memmove(&priv->vlan_filter[i],
                        &priv->vlan_filter[i + 1],
-                       priv->vlan_filter_n - i);
+                       sizeof(priv->vlan_filter[i]) *
+                       (priv->vlan_filter_n - i));
                priv->vlan_filter[priv->vlan_filter_n] = 0;
        } else {
                assert(i == priv->vlan_filter_n);