Add support for DPDK 18.05 24/12924/4
authorDamjan Marion <damarion@cisco.com>
Wed, 6 Jun 2018 12:23:42 +0000 (14:23 +0200)
committerMarco Varlese <marco.varlese@suse.de>
Thu, 7 Jun 2018 05:48:49 +0000 (05:48 +0000)
Change-Id: I205932bc727c990011bbbe1dc6c0cf5349d19806
Signed-off-by: Damjan Marion <damarion@cisco.com>
20 files changed:
dpdk/Makefile
dpdk/deb/debian/rules
dpdk/dpdk-17.02_patches/0001-dpdk-dev-net-mlx5-add-out-of-buffer-counter-to-extended-statistic.patch [deleted file]
dpdk/dpdk-17.02_patches/0002-dpdk-dev-1-2-net-mlx5-remove-unused-interface-name-query.patch [deleted file]
dpdk/dpdk-17.02_patches/0003-dpdk-dev-2-2-net-mlx5-fix-extended-statistics-wrong-number.patch [deleted file]
dpdk/dpdk-17.02_patches/0004-dpdk-dev-net-mlx5-fix-extended-statistics-counters-identification.patch [deleted file]
dpdk/dpdk-17.02_patches/0005-net-mlx5-fix-startup-when-flow-cannot-be-applied.patch [deleted file]
dpdk/dpdk-17.02_patches/0006-net-mlx5-add-hardware-TSO-support.patch [deleted file]
dpdk/dpdk-17.02_patches/0007-add-hardware-checksum-offload-for-tunnel-pa.patch [deleted file]
dpdk/dpdk-17.02_patches/0008-net-mlx5-add-enhanced-multi-packet-send-for-ConnectX.patch [deleted file]
dpdk/dpdk-17.05_patches/0001-Revert-net-virtio-remove-redundant-MSI-X-detection.patch [deleted file]
dpdk/dpdk-17.08_patches/0001-net-bonding-support-for-mlx.patch [deleted file]
dpdk/dpdk-17.08_patches/0002-crypto-qat-align-capabilities.patch [deleted file]
dpdk/dpdk-17.11_patches/0001-ena-pmd-no-tx-flags.patch [deleted file]
dpdk/dpdk-18.05_patches/0001-i40evf-don-t-reset-device_info-data.patch [new file with mode: 0644]
src/plugins/dpdk/buffer.c
src/plugins/dpdk/device/cli.c
src/plugins/dpdk/device/format.c
src/plugins/dpdk/device/init.c
src/plugins/dpdk/ipsec/ipsec.c

index 7b70346..c5d2a79 100644 (file)
@@ -32,21 +32,27 @@ PKG_SUFFIX ?= vpp1
 DPDK_BASE_URL ?= http://fast.dpdk.org/rel
 DPDK_TARBALL := dpdk-$(DPDK_VERSION).tar.xz
 DPDK_TAR_URL := $(DPDK_BASE_URL)/$(DPDK_TARBALL)
-DPDK_17.11_TARBALL_MD5_CKSUM := 53ee9e054a8797c9e67ffa0eb5d0c701
 DPDK_18.02.1_TARBALL_MD5_CKSUM := 3bbb5468f662e1f7472d4abc5c4cf08e
-DPDK_SOURCE := $(B)/dpdk-stable-$(DPDK_VERSION)
+DPDK_18.05_TARBALL_MD5_CKSUM := 9fc86367cd9407ff6a8dfea56c4eddc4
 MACHINE=$(shell uname -m)
 
+# replace dot with space, and if 3rd word exists we deal with stable dpdk rel
+ifeq ($(word 3,$(subst ., ,$(DPDK_VERSION))),)
+DPDK_SOURCE := $(B)/dpdk-$(DPDK_VERSION)
+else
+DPDK_SOURCE := $(B)/dpdk-stable-$(DPDK_VERSION)
+endif
+
 NASM_BASE_URL := http://www.nasm.us/pub/nasm/releasebuilds
 NASM_VER := 2.13.01
 NASM_TARBALL := nasm-$(NASM_VER).tar.xz
 NASM_TAR_URL := $(NASM_BASE_URL)/$(NASM_VER)/$(NASM_TARBALL)
 NASM_SOURCE := $(B)/nasm-$(NASM_VER)
 
-ifeq ($(DPDK_VERSION),17.11)
-IPSEC_MB_VER ?= 0.47
-else
+ifneq (,$(findstring 18.02,$(DPDK_VERSION)))
 IPSEC_MB_VER ?= 0.48
+else
+IPSEC_MB_VER ?= 0.49
 endif
 
 ifeq ($(MACHINE),$(filter $(MACHINE),x86_64))
@@ -158,6 +164,11 @@ else
 DPDK_EXTRA_CFLAGS := -g -O0
 endif
 
+ifeq ($(AESNI),y)
+IPSEC_MB_BUILD_PATH := $(B)/intel-ipsec-mb-$(IPSEC_MB_VER)
+DPDK_EXTRA_CFLAGS += -L$(IPSEC_MB_BUILD_PATH) -I$(IPSEC_MB_BUILD_PATH)
+endif
+
 DPDK_MAKE_EXTRA_ARGS += AESNI_MULTI_BUFFER_LIB_PATH=$(AESNIMB_LIB_SOURCE)
 
 # assemble DPDK make arguments
@@ -210,7 +221,6 @@ $(B)/custom-config: $(B)/.patch.ok Makefile
        $(call set,RTE_LIBRTE_PMD_TAP,$(DPDK_TAP_PMD))
        $(call set,RTE_LIBRTE_PMD_FAILSAFE,$(DPDK_FAILSAFE_PMD))
        @# not needed
-       $(call set,RTE_LIBRTE_TIMER,n)
        $(call set,RTE_LIBRTE_CFGFILE,n)
        $(call set,RTE_LIBRTE_LPM,n)
        $(call set,RTE_LIBRTE_ACL,n)
@@ -308,7 +318,7 @@ build-nasm:
 build-ipsec-mb:
        mkdir -p $(I)/lib/
        # Do not build GCM stuff if we are building ISA_L
-       make -C $(AESNIMB_LIB_SOURCE) -j \
+       make -C $(AESNIMB_LIB_SOURCE) -j SHARED=n \
          EXTRA_CFLAGS=-fPIC NASM=$(NASM_SOURCE)/nasm
        cp $(AESNIMB_LIB_SOURCE)/libIPSec_MB.a $(I)/lib/
 
index b9a5863..7ef6ba1 100755 (executable)
@@ -28,8 +28,10 @@ override_dh_install:
        dh_install -p$(PKG)-dkms \
          $(SRC)/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h \
          $(SRC)/lib/librte_eal/common/include/rte_pci_dev_features.h     \
-         $(SRC)/lib/librte_eal/linuxapp/igb_uio/igb_uio.c                \
-         $(SRC)/lib/librte_eal/linuxapp/igb_uio/compat.h                 \
+         $(wildcard $(SRC)/*/*/*/igb_uio/igb_uio.c)                      \
+         $(wildcard $(SRC)/*/*/*/igb_uio/compat.h)                       \
+         $(wildcard $(SRC)/*/*/igb_uio/igb_uio.c)                        \
+         $(wildcard $(SRC)/*/*/igb_uio/compat.h)                         \
          debian/dkms/Makefile                                            \
          /usr/src/$(PKG)-dkms-$(VERSION)
        dh_install -p$(PKG)-dev --autodest /usr
diff --git a/dpdk/dpdk-17.02_patches/0001-dpdk-dev-net-mlx5-add-out-of-buffer-counter-to-extended-statistic.patch b/dpdk/dpdk-17.02_patches/0001-dpdk-dev-net-mlx5-add-out-of-buffer-counter-to-extended-statistic.patch
deleted file mode 100644 (file)
index 3ebf5e8..0000000
+++ /dev/null
@@ -1,135 +0,0 @@
-diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
-index 879da5e..2b4345a 100644
---- a/drivers/net/mlx5/mlx5.h
-+++ b/drivers/net/mlx5/mlx5.h
-@@ -197,6 +197,8 @@ struct mlx5_secondary_data {
- int mlx5_is_secondary(void);
- int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]);
- int priv_ifreq(const struct priv *, int req, struct ifreq *);
-+int priv_is_ib_cntr(const char *);
-+int priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *);
- int priv_get_num_vfs(struct priv *, uint16_t *);
- int priv_get_mtu(struct priv *, uint16_t *);
- int priv_set_flags(struct priv *, unsigned int, unsigned int);
-diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
-index 2145965..6b64f44 100644
---- a/drivers/net/mlx5/mlx5_ethdev.c
-+++ b/drivers/net/mlx5/mlx5_ethdev.c
-@@ -234,6 +234,23 @@ struct priv *
- }
- /**
-+ * Check if the counter is located on ib counters file.
-+ *
-+ * @param[in] cntr
-+ *   Counter name.
-+ *
-+ * @return
-+ *   1 if counter is located on ib counters file , 0 otherwise.
-+ */
-+int
-+priv_is_ib_cntr(const char *cntr)
-+{
-+      if (!strcmp(cntr, "out_of_buffer"))
-+              return 1;
-+      return 0;
-+}
-+
-+/**
-  * Read from sysfs entry.
-  *
-  * @param[in] priv
-@@ -260,10 +277,15 @@ struct priv *
-       if (priv_get_ifname(priv, &ifname))
-               return -1;
--      MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
--            ifname, entry);
--
--      file = fopen(path, "rb");
-+      if (priv_is_ib_cntr(entry)) {
-+              MKSTR(path, "%s/ports/1/hw_counters/%s",
-+                    priv->ctx->device->ibdev_path, entry);
-+              file = fopen(path, "rb");
-+      } else {
-+              MKSTR(path, "%s/device/net/%s/%s",
-+                    priv->ctx->device->ibdev_path, ifname, entry);
-+              file = fopen(path, "rb");
-+      }
-       if (file == NULL)
-               return -1;
-       ret = fread(buf, 1, size, file);
-@@ -469,6 +491,30 @@ struct priv *
- }
- /**
-+ * Read device counter from sysfs.
-+ *
-+ * @param priv
-+ *   Pointer to private structure.
-+ * @param name
-+ *   Counter name.
-+ * @param[out] cntr
-+ *   Counter output buffer.
-+ *
-+ * @return
-+ *   0 on success, -1 on failure and errno is set.
-+ */
-+int
-+priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
-+{
-+      unsigned long ulong_ctr;
-+
-+      if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
-+              return -1;
-+      *cntr = ulong_ctr;
-+      return 0;
-+}
-+
-+/**
-  * Set device MTU.
-  *
-  * @param priv
-diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
-index 20c957e..a48ebea 100644
---- a/drivers/net/mlx5/mlx5_stats.c
-+++ b/drivers/net/mlx5/mlx5_stats.c
-@@ -125,6 +125,10 @@ struct mlx5_counter_ctrl {
-               .dpdk_name = "tx_errors_phy",
-               .ctr_name = "tx_errors_phy",
-       },
-+      {
-+              .dpdk_name = "rx_out_of_buffer",
-+              .ctr_name = "out_of_buffer",
-+      },
- };
- static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
-@@ -159,9 +163,15 @@ struct mlx5_counter_ctrl {
-               WARN("unable to read statistic values from device");
-               return -1;
-       }
--      for (i = 0; i != xstats_n; ++i)
--              stats[i] = (uint64_t)
--                         et_stats->data[xstats_ctrl->dev_table_idx[i]];
-+      for (i = 0; i != xstats_n; ++i) {
-+              if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
-+                      priv_get_cntr_sysfs(priv,
-+                                          mlx5_counters_init[i].ctr_name,
-+                                          &stats[i]);
-+              else
-+                      stats[i] = (uint64_t)
-+                              et_stats->data[xstats_ctrl->dev_table_idx[i]];
-+      }
-       return 0;
- }
-@@ -233,6 +243,8 @@ struct mlx5_counter_ctrl {
-               }
-       }
-       for (j = 0; j != xstats_n; ++j) {
-+              if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
-+                      continue;
-               if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
-                       WARN("counter \"%s\" is not recognized",
-                            mlx5_counters_init[j].dpdk_name);
diff --git a/dpdk/dpdk-17.02_patches/0002-dpdk-dev-1-2-net-mlx5-remove-unused-interface-name-query.patch b/dpdk/dpdk-17.02_patches/0002-dpdk-dev-1-2-net-mlx5-remove-unused-interface-name-query.patch
deleted file mode 100644 (file)
index aa03639..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
-index 20c957e..0c80e4f 100644
---- a/drivers/net/mlx5/mlx5_stats.c
-+++ b/drivers/net/mlx5/mlx5_stats.c
-@@ -177,17 +177,12 @@ struct mlx5_counter_ctrl {
-       struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
-       unsigned int i;
-       unsigned int j;
--      char ifname[IF_NAMESIZE];
-       struct ifreq ifr;
-       struct ethtool_drvinfo drvinfo;
-       struct ethtool_gstrings *strings = NULL;
-       unsigned int dev_stats_n;
-       unsigned int str_sz;
--      if (priv_get_ifname(priv, &ifname)) {
--              WARN("unable to get interface name");
--              return;
--      }
-       /* How many statistics are available. */
-       drvinfo.cmd = ETHTOOL_GDRVINFO;
-       ifr.ifr_data = (caddr_t)&drvinfo;
diff --git a/dpdk/dpdk-17.02_patches/0003-dpdk-dev-2-2-net-mlx5-fix-extended-statistics-wrong-number.patch b/dpdk/dpdk-17.02_patches/0003-dpdk-dev-2-2-net-mlx5-fix-extended-statistics-wrong-number.patch
deleted file mode 100644 (file)
index 05c2e8d..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
-index 0c80e4f..60ffbaa 100644
---- a/drivers/net/mlx5/mlx5_stats.c
-+++ b/drivers/net/mlx5/mlx5_stats.c
-@@ -166,6 +166,29 @@ struct mlx5_counter_ctrl {
- }
- /**
-+ * Query the number of statistics provided by ETHTOOL.
-+ *
-+ * @param priv
-+ *   Pointer to private structure.
-+ *
-+ * @return
-+ *   Number of statistics on success, -1 on error.
-+ */
-+static int
-+priv_ethtool_get_stats_n(struct priv *priv) {
-+      struct ethtool_drvinfo drvinfo;
-+      struct ifreq ifr;
-+
-+      drvinfo.cmd = ETHTOOL_GDRVINFO;
-+      ifr.ifr_data = (caddr_t)&drvinfo;
-+      if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
-+              WARN("unable to query number of statistics");
-+              return -1;
-+      }
-+      return drvinfo.n_stats;
-+}
-+
-+/**
-  * Init the structures to read device counters.
-  *
-  * @param priv
-@@ -178,19 +201,11 @@ struct mlx5_counter_ctrl {
-       unsigned int i;
-       unsigned int j;
-       struct ifreq ifr;
--      struct ethtool_drvinfo drvinfo;
-       struct ethtool_gstrings *strings = NULL;
-       unsigned int dev_stats_n;
-       unsigned int str_sz;
--      /* How many statistics are available. */
--      drvinfo.cmd = ETHTOOL_GDRVINFO;
--      ifr.ifr_data = (caddr_t)&drvinfo;
--      if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
--              WARN("unable to get driver info");
--              return;
--      }
--      dev_stats_n = drvinfo.n_stats;
-+      dev_stats_n = priv_ethtool_get_stats_n(priv);
-       if (dev_stats_n < 1) {
-               WARN("no extended statistics available");
-               return;
-@@ -410,7 +425,15 @@ struct mlx5_counter_ctrl {
-       int ret = xstats_n;
-       if (n >= xstats_n && stats) {
-+              struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
-+              int stats_n;
-+
-               priv_lock(priv);
-+              stats_n = priv_ethtool_get_stats_n(priv);
-+              if (stats_n < 0)
-+                      return -1;
-+              if (xstats_ctrl->stats_n != stats_n)
-+                      priv_xstats_init(priv);
-               ret = priv_xstats_get(priv, stats);
-               priv_unlock(priv);
-       }
-@@ -427,8 +450,15 @@ struct mlx5_counter_ctrl {
- mlx5_xstats_reset(struct rte_eth_dev *dev)
- {
-       struct priv *priv = mlx5_get_priv(dev);
-+      struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
-+      int stats_n;
-       priv_lock(priv);
-+      stats_n = priv_ethtool_get_stats_n(priv);
-+      if (stats_n < 0)
-+              return;
-+      if (xstats_ctrl->stats_n != stats_n)
-+              priv_xstats_init(priv);
-       priv_xstats_reset(priv);
-       priv_unlock(priv);
- }
diff --git a/dpdk/dpdk-17.02_patches/0004-dpdk-dev-net-mlx5-fix-extended-statistics-counters-identification.patch b/dpdk/dpdk-17.02_patches/0004-dpdk-dev-net-mlx5-fix-extended-statistics-counters-identification.patch
deleted file mode 100644 (file)
index 8c066ad..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
-index 1953293..703f48c 100644
---- a/drivers/net/mlx5/mlx5_stats.c
-+++ b/drivers/net/mlx5/mlx5_stats.c
-@@ -253,7 +253,7 @@ struct mlx5_counter_ctrl {
-               }
-       }
-       for (j = 0; j != xstats_n; ++j) {
--              if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
-+              if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
-                       continue;
-               if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
-                       WARN("counter \"%s\" is not recognized",
diff --git a/dpdk/dpdk-17.02_patches/0005-net-mlx5-fix-startup-when-flow-cannot-be-applied.patch b/dpdk/dpdk-17.02_patches/0005-net-mlx5-fix-startup-when-flow-cannot-be-applied.patch
deleted file mode 100644 (file)
index af928bb..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-From 0866d640e42d6c54b2b3f15ebde9930e756ba4d5 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?N=C3=A9lio=20Laranjeiro?= <nelio.laranjeiro@6wind.com>
-Date: Wed, 22 Feb 2017 10:57:52 +0100
-Subject: [PATCH] net/mlx5: fix startup when flow cannot be applied
-
-When flows cannot be re-applied due to configuration modifications, the
-start function should rollback the configuration done.
-
-Fixes: 2097d0d1e2cc ("net/mlx5: support basic flow items and actions")
-Cc: stable@dpdk.org
-
-Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
----
- drivers/net/mlx5/mlx5_trigger.c | 21 ++++++++++++++++-----
- 1 file changed, 16 insertions(+), 5 deletions(-)
-
-diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
-index 30addd2..0acbf28 100644
---- a/drivers/net/mlx5/mlx5_trigger.c
-+++ b/drivers/net/mlx5/mlx5_trigger.c
-@@ -82,17 +82,28 @@ mlx5_dev_start(struct rte_eth_dev *dev)
-               ERROR("%p: an error occurred while configuring hash RX queues:"
-                     " %s",
-                     (void *)priv, strerror(err));
--              /* Rollback. */
--              priv_special_flow_disable_all(priv);
--              priv_mac_addrs_disable(priv);
--              priv_destroy_hash_rxqs(priv);
-+              goto error;
-       }
-       if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE)
-               priv_fdir_enable(priv);
--      priv_dev_interrupt_handler_install(priv, dev);
-       err = priv_flow_start(priv);
-+      if (err) {
-+              priv->started = 0;
-+              ERROR("%p: an error occurred while configuring flows:"
-+                    " %s",
-+                    (void *)priv, strerror(err));
-+              goto error;
-+      }
-+      priv_dev_interrupt_handler_install(priv, dev);
-       priv_xstats_init(priv);
-       priv_unlock(priv);
-+      return 0;
-+error:
-+      /* Rollback. */
-+      priv_special_flow_disable_all(priv);
-+      priv_mac_addrs_disable(priv);
-+      priv_destroy_hash_rxqs(priv);
-+      priv_flow_stop(priv);
-       return -err;
- }
--- 
-2.7.4
-
diff --git a/dpdk/dpdk-17.02_patches/0006-net-mlx5-add-hardware-TSO-support.patch b/dpdk/dpdk-17.02_patches/0006-net-mlx5-add-hardware-TSO-support.patch
deleted file mode 100644 (file)
index 929a613..0000000
+++ /dev/null
@@ -1,385 +0,0 @@
-From e25bad4a287924d26627ffe307f8a12824b87054 Mon Sep 17 00:00:00 2001
-From: Shahaf Shuler <shahafs@mellanox.com>
-Date: Thu, 2 Mar 2017 11:01:31 +0200
-Subject: [PATCH] net/mlx5: add hardware TSO support
-
-Implement support for hardware TSO.
-
-Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
-Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
----
- doc/guides/nics/features/mlx5.ini |   1 +
- doc/guides/nics/mlx5.rst          |  12 ++++
- drivers/net/mlx5/mlx5.c           |  18 ++++++
- drivers/net/mlx5/mlx5.h           |   2 +
- drivers/net/mlx5/mlx5_defs.h      |   3 +
- drivers/net/mlx5/mlx5_ethdev.c    |   2 +
- drivers/net/mlx5/mlx5_rxtx.c      | 123 +++++++++++++++++++++++++++++++++-----
- drivers/net/mlx5/mlx5_rxtx.h      |   2 +
- drivers/net/mlx5/mlx5_txq.c       |  13 ++++
- 9 files changed, 160 insertions(+), 16 deletions(-)
-
-diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
-index f20d214..8df25ce 100644
---- a/doc/guides/nics/features/mlx5.ini
-+++ b/doc/guides/nics/features/mlx5.ini
-@@ -11,6 +11,7 @@ Queue start/stop     = Y
- MTU update           = Y
- Jumbo frame          = Y
- Scattered Rx         = Y
-+TSO                  = Y
- Promiscuous mode     = Y
- Allmulticast mode    = Y
- Unicast MAC filter   = Y
-diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
-index 5f6e594..9b0ba29 100644
---- a/doc/guides/nics/mlx5.rst
-+++ b/doc/guides/nics/mlx5.rst
-@@ -90,6 +90,7 @@ Features
- - Secondary process TX is supported.
- - KVM and VMware ESX SR-IOV modes are supported.
- - RSS hash result is supported.
-+- Hardware TSO.
- Limitations
- -----------
-@@ -186,9 +187,20 @@ Run-time configuration
-   save PCI bandwidth and improve performance at the cost of a slightly
-   higher CPU usage.
-+  This option cannot be used in conjunction with ``tso`` below. When ``tso``
-+  is set, ``txq_mpw_en`` is disabled.
-+
-   It is currently only supported on the ConnectX-4 Lx and ConnectX-5
-   families of adapters. Enabled by default.
-+- ``tso`` parameter [int]
-+
-+  A nonzero value enables hardware TSO.
-+  When hardware TSO is enabled, packets marked with TCP segmentation
-+  offload will be divided into segments by the hardware.
-+
-+  Disabled by default.
-+
- Prerequisites
- -------------
-diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
-index d4bd469..03ed3b3 100644
---- a/drivers/net/mlx5/mlx5.c
-+++ b/drivers/net/mlx5/mlx5.c
-@@ -84,6 +84,9 @@
- /* Device parameter to enable multi-packet send WQEs. */
- #define MLX5_TXQ_MPW_EN "txq_mpw_en"
-+/* Device parameter to enable hardware TSO offload. */
-+#define MLX5_TSO "tso"
-+
- /**
-  * Retrieve integer value from environment variable.
-  *
-@@ -290,6 +293,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
-               priv->txqs_inline = tmp;
-       } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
-               priv->mps &= !!tmp; /* Enable MPW only if HW supports */
-+      } else if (strcmp(MLX5_TSO, key) == 0) {
-+              priv->tso = !!tmp;
-       } else {
-               WARN("%s: unknown parameter", key);
-               return -EINVAL;
-@@ -316,6 +321,7 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs)
-               MLX5_TXQ_INLINE,
-               MLX5_TXQS_MIN_INLINE,
-               MLX5_TXQ_MPW_EN,
-+              MLX5_TSO,
-               NULL,
-       };
-       struct rte_kvargs *kvlist;
-@@ -479,6 +485,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
-                       IBV_EXP_DEVICE_ATTR_RX_HASH |
-                       IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS |
-                       IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN |
-+                      IBV_EXP_DEVICE_ATTR_TSO_CAPS |
-                       0;
-               DEBUG("using port %u (%08" PRIx32 ")", port, test);
-@@ -580,11 +587,22 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
-               priv_get_num_vfs(priv, &num_vfs);
-               priv->sriov = (num_vfs || sriov);
-+              priv->tso = ((priv->tso) &&
-+                          (exp_device_attr.tso_caps.max_tso > 0) &&
-+                          (exp_device_attr.tso_caps.supported_qpts &
-+                          (1 << IBV_QPT_RAW_ETH)));
-+              if (priv->tso)
-+                      priv->max_tso_payload_sz =
-+                              exp_device_attr.tso_caps.max_tso;
-               if (priv->mps && !mps) {
-                       ERROR("multi-packet send not supported on this device"
-                             " (" MLX5_TXQ_MPW_EN ")");
-                       err = ENOTSUP;
-                       goto port_error;
-+              } else if (priv->mps && priv->tso) {
-+                      WARN("multi-packet send not supported in conjunction "
-+                            "with TSO. MPS disabled");
-+                      priv->mps = 0;
-               }
-               /* Allocate and register default RSS hash keys. */
-               priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
-diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
-index 4c4b9d4..93f129b 100644
---- a/drivers/net/mlx5/mlx5.h
-+++ b/drivers/net/mlx5/mlx5.h
-@@ -126,6 +126,8 @@ struct priv {
-       unsigned int mps:1; /* Whether multi-packet send is supported. */
-       unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */
-       unsigned int pending_alarm:1; /* An alarm is pending. */
-+      unsigned int tso:1; /* Whether TSO is supported. */
-+      unsigned int max_tso_payload_sz; /* Maximum TCP payload for TSO. */
-       unsigned int txq_inline; /* Maximum packet size for inlining. */
-       unsigned int txqs_inline; /* Queue number threshold for inlining. */
-       /* RX/TX queues. */
-diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
-index e91d245..eecb908 100644
---- a/drivers/net/mlx5/mlx5_defs.h
-+++ b/drivers/net/mlx5/mlx5_defs.h
-@@ -79,4 +79,7 @@
- /* Maximum number of extended statistics counters. */
- #define MLX5_MAX_XSTATS 32
-+/* Maximum Packet headers size (L2+L3+L4) for TSO. */
-+#define MLX5_MAX_TSO_HEADER 128
-+
- #endif /* RTE_PMD_MLX5_DEFS_H_ */
-diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
-index 5677f03..5542193 100644
---- a/drivers/net/mlx5/mlx5_ethdev.c
-+++ b/drivers/net/mlx5/mlx5_ethdev.c
-@@ -693,6 +693,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
-                       (DEV_TX_OFFLOAD_IPV4_CKSUM |
-                        DEV_TX_OFFLOAD_UDP_CKSUM |
-                        DEV_TX_OFFLOAD_TCP_CKSUM);
-+      if (priv->tso)
-+              info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
-       if (priv_get_ifname(priv, &ifname) == 0)
-               info->if_index = if_nametoindex(ifname);
-       /* FIXME: RETA update/query API expects the callee to know the size of
-diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
-index 4d5455b..98889f6 100644
---- a/drivers/net/mlx5/mlx5_rxtx.c
-+++ b/drivers/net/mlx5/mlx5_rxtx.c
-@@ -365,6 +365,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-       const unsigned int elts_n = 1 << txq->elts_n;
-       unsigned int i = 0;
-       unsigned int j = 0;
-+      unsigned int k = 0;
-       unsigned int max;
-       uint16_t max_wqe;
-       unsigned int comp;
-@@ -392,8 +393,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-               uintptr_t addr;
-               uint64_t naddr;
-               uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;
-+              uint16_t tso_header_sz = 0;
-               uint16_t ehdr;
-               uint8_t cs_flags = 0;
-+              uint64_t tso = 0;
- #ifdef MLX5_PMD_SOFT_COUNTERS
-               uint32_t total_length = 0;
- #endif
-@@ -465,14 +468,74 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-                       length -= pkt_inline_sz;
-                       addr += pkt_inline_sz;
-               }
-+              if (txq->tso_en) {
-+                      tso = buf->ol_flags & PKT_TX_TCP_SEG;
-+                      if (tso) {
-+                              uintptr_t end = (uintptr_t)
-+                                              (((uintptr_t)txq->wqes) +
-+                                              (1 << txq->wqe_n) *
-+                                              MLX5_WQE_SIZE);
-+                              unsigned int copy_b;
-+                              uint8_t vlan_sz = (buf->ol_flags &
-+                                                PKT_TX_VLAN_PKT) ? 4 : 0;
-+
-+                              tso_header_sz = buf->l2_len + vlan_sz +
-+                                              buf->l3_len + buf->l4_len;
-+
-+                              if (unlikely(tso_header_sz >
-+                                           MLX5_MAX_TSO_HEADER))
-+                                      break;
-+                              copy_b = tso_header_sz - pkt_inline_sz;
-+                              /* First seg must contain all headers. */
-+                              assert(copy_b <= length);
-+                              raw += MLX5_WQE_DWORD_SIZE;
-+                              if (copy_b &&
-+                                 ((end - (uintptr_t)raw) > copy_b)) {
-+                                      uint16_t n = (MLX5_WQE_DS(copy_b) -
-+                                                    1 + 3) / 4;
-+
-+                                      if (unlikely(max_wqe < n))
-+                                              break;
-+                                      max_wqe -= n;
-+                                      rte_memcpy((void *)raw,
-+                                                 (void *)addr, copy_b);
-+                                      addr += copy_b;
-+                                      length -= copy_b;
-+                                      pkt_inline_sz += copy_b;
-+                                      /*
-+                                       * Another DWORD will be added
-+                                       * in the inline part.
-+                                       */
-+                                      raw += MLX5_WQE_DS(copy_b) *
-+                                             MLX5_WQE_DWORD_SIZE -
-+                                             MLX5_WQE_DWORD_SIZE;
-+                              } else {
-+                                      /* NOP WQE. */
-+                                      wqe->ctrl = (rte_v128u32_t){
-+                                                   htonl(txq->wqe_ci << 8),
-+                                                   htonl(txq->qp_num_8s | 1),
-+                                                   0,
-+                                                   0,
-+                                      };
-+                                      ds = 1;
-+                                      total_length = 0;
-+                                      pkts--;
-+                                      pkts_n++;
-+                                      elts_head = (elts_head - 1) &
-+                                                  (elts_n - 1);
-+                                      k++;
-+                                      goto next_wqe;
-+                              }
-+                      }
-+              }
-               /* Inline if enough room. */
--              if (txq->max_inline) {
-+              if (txq->inline_en || tso) {
-                       uintptr_t end = (uintptr_t)
-                               (((uintptr_t)txq->wqes) +
-                                (1 << txq->wqe_n) * MLX5_WQE_SIZE);
-                       unsigned int max_inline = txq->max_inline *
-                                                 RTE_CACHE_LINE_SIZE -
--                                                MLX5_WQE_DWORD_SIZE;
-+                                                (pkt_inline_sz - 2);
-                       uintptr_t addr_end = (addr + max_inline) &
-                                            ~(RTE_CACHE_LINE_SIZE - 1);
-                       unsigned int copy_b = (addr_end > addr) ?
-@@ -491,6 +554,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-                               if (unlikely(max_wqe < n))
-                                       break;
-                               max_wqe -= n;
-+                              if (tso) {
-+                                      uint32_t inl =
-+                                              htonl(copy_b | MLX5_INLINE_SEG);
-+
-+                                      pkt_inline_sz =
-+                                              MLX5_WQE_DS(tso_header_sz) *
-+                                              MLX5_WQE_DWORD_SIZE;
-+                                      rte_memcpy((void *)raw,
-+                                                 (void *)&inl, sizeof(inl));
-+                                      raw += sizeof(inl);
-+                                      pkt_inline_sz += sizeof(inl);
-+                              }
-                               rte_memcpy((void *)raw, (void *)addr, copy_b);
-                               addr += copy_b;
-                               length -= copy_b;
-@@ -591,18 +666,34 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
- next_pkt:
-               ++i;
-               /* Initialize known and common part of the WQE structure. */
--              wqe->ctrl = (rte_v128u32_t){
--                      htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
--                      htonl(txq->qp_num_8s | ds),
--                      0,
--                      0,
--              };
--              wqe->eseg = (rte_v128u32_t){
--                      0,
--                      cs_flags,
--                      0,
--                      (ehdr << 16) | htons(pkt_inline_sz),
--              };
-+              if (tso) {
-+                      wqe->ctrl = (rte_v128u32_t){
-+                              htonl((txq->wqe_ci << 8) | MLX5_OPCODE_TSO),
-+                              htonl(txq->qp_num_8s | ds),
-+                              0,
-+                              0,
-+                      };
-+                      wqe->eseg = (rte_v128u32_t){
-+                              0,
-+                              cs_flags | (htons(buf->tso_segsz) << 16),
-+                              0,
-+                              (ehdr << 16) | htons(tso_header_sz),
-+                      };
-+              } else {
-+                      wqe->ctrl = (rte_v128u32_t){
-+                              htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
-+                              htonl(txq->qp_num_8s | ds),
-+                              0,
-+                              0,
-+                      };
-+                      wqe->eseg = (rte_v128u32_t){
-+                              0,
-+                              cs_flags,
-+                              0,
-+                              (ehdr << 16) | htons(pkt_inline_sz),
-+                      };
-+              }
-+next_wqe:
-               txq->wqe_ci += (ds + 3) / 4;
- #ifdef MLX5_PMD_SOFT_COUNTERS
-               /* Increment sent bytes counter. */
-@@ -610,10 +701,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
- #endif
-       } while (pkts_n);
-       /* Take a shortcut if nothing must be sent. */
--      if (unlikely(i == 0))
-+      if (unlikely((i + k) == 0))
-               return 0;
-       /* Check whether completion threshold has been reached. */
--      comp = txq->elts_comp + i + j;
-+      comp = txq->elts_comp + i + j + k;
-       if (comp >= MLX5_TX_COMP_THRESH) {
-               volatile struct mlx5_wqe_ctrl *w =
-                       (volatile struct mlx5_wqe_ctrl *)wqe;
-diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
-index 41a34d7..6b328cf 100644
---- a/drivers/net/mlx5/mlx5_rxtx.h
-+++ b/drivers/net/mlx5/mlx5_rxtx.h
-@@ -254,6 +254,8 @@ struct txq {
-       uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
-       uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */
-       uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
-+      uint16_t inline_en:1; /* When set inline is enabled. */
-+      uint16_t tso_en:1; /* When set hardware TSO is enabled. */
-       uint32_t qp_num_8s; /* QP number shifted by 8. */
-       volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
-       volatile void *wqes; /* Work queue (use volatile to write into). */
-diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
-index 949035b..995b763 100644
---- a/drivers/net/mlx5/mlx5_txq.c
-+++ b/drivers/net/mlx5/mlx5_txq.c
-@@ -342,6 +342,19 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-                        RTE_CACHE_LINE_SIZE);
-               attr.init.cap.max_inline_data =
-                       tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
-+              tmpl.txq.inline_en = 1;
-+      }
-+      if (priv->tso) {
-+              uint16_t max_tso_inline = ((MLX5_MAX_TSO_HEADER +
-+                                         (RTE_CACHE_LINE_SIZE - 1)) /
-+                                          RTE_CACHE_LINE_SIZE);
-+
-+              attr.init.max_tso_header =
-+                      max_tso_inline * RTE_CACHE_LINE_SIZE;
-+              attr.init.comp_mask |= IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER;
-+              tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
-+                                            max_tso_inline);
-+              tmpl.txq.tso_en = 1;
-       }
-       tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
-       if (tmpl.qp == NULL) {
--- 
-2.7.4
-
diff --git a/dpdk/dpdk-17.02_patches/0007-add-hardware-checksum-offload-for-tunnel-pa.patch b/dpdk/dpdk-17.02_patches/0007-add-hardware-checksum-offload-for-tunnel-pa.patch
deleted file mode 100644 (file)
index bbcce48..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-From f0dda2ab16635894b1e3836d0b960b9270a3b491 Mon Sep 17 00:00:00 2001
-From: Shahaf Shuler <shahafs@mellanox.com>
-Date: Thu, 2 Mar 2017 11:05:44 +0200
-Subject: [PATCH] net/mlx5: add hardware checksum offload for tunnel packets
-
-Prior to this commit Tx checksum offload was supported only for the
-inner headers.
-This commit adds support for the hardware to compute the checksum for the
-outer headers as well.
-
-The support is for tunneling protocols GRE and VXLAN.
-
-Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
-Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
----
- doc/guides/nics/features/mlx5.ini |  2 ++
- doc/guides/nics/mlx5.rst          |  3 ++-
- drivers/net/mlx5/mlx5.c           |  7 +++++++
- drivers/net/mlx5/mlx5.h           |  2 ++
- drivers/net/mlx5/mlx5_ethdev.c    |  2 ++
- drivers/net/mlx5/mlx5_prm.h       |  6 ++++++
- drivers/net/mlx5/mlx5_rxtx.c      | 14 +++++++++++++-
- drivers/net/mlx5/mlx5_rxtx.h      |  2 ++
- drivers/net/mlx5/mlx5_txq.c       |  2 ++
- 9 files changed, 38 insertions(+), 2 deletions(-)
-
-diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
-index 8df25ce..1814f82 100644
---- a/doc/guides/nics/features/mlx5.ini
-+++ b/doc/guides/nics/features/mlx5.ini
-@@ -27,6 +27,8 @@ CRC offload          = Y
- VLAN offload         = Y
- L3 checksum offload  = Y
- L4 checksum offload  = Y
-+Inner L3 checksum    = Y
-+Inner L4 checksum    = Y
- Packet type parsing  = Y
- Basic stats          = Y
- Stats per queue      = Y
-diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
-index 9b0ba29..41f3a47 100644
---- a/doc/guides/nics/mlx5.rst
-+++ b/doc/guides/nics/mlx5.rst
-@@ -91,13 +91,14 @@ Features
- - KVM and VMware ESX SR-IOV modes are supported.
- - RSS hash result is supported.
- - Hardware TSO.
-+- Hardware checksum TX offload for VXLAN and GRE.
- Limitations
- -----------
- - Inner RSS for VXLAN frames is not supported yet.
- - Port statistics through software counters only.
--- Hardware checksum offloads for VXLAN inner header are not supported yet.
-+- Hardware checksum RX offloads for VXLAN inner header are not supported yet.
- - Secondary process RX is not supported.
- Configuration
-diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
-index 03ed3b3..6f42948 100644
---- a/drivers/net/mlx5/mlx5.c
-+++ b/drivers/net/mlx5/mlx5.c
-@@ -375,6 +375,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
-       struct ibv_device_attr device_attr;
-       unsigned int sriov;
-       unsigned int mps;
-+      unsigned int tunnel_en;
-       int idx;
-       int i;
-@@ -429,12 +430,17 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
-                * as all ConnectX-5 devices.
-                */
-               switch (pci_dev->id.device_id) {
-+              case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
-+                      tunnel_en = 1;
-+                      mps = 0;
-+                      break;
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX5:
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX:
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
-                       mps = 1;
-+                      tunnel_en = 1;
-                       break;
-               default:
-                       mps = 0;
-@@ -539,6 +545,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
-               priv->mtu = ETHER_MTU;
-               priv->mps = mps; /* Enable MPW by default if supported. */
-               priv->cqe_comp = 1; /* Enable compression by default. */
-+              priv->tunnel_en = tunnel_en;
-               err = mlx5_args(priv, pci_dev->device.devargs);
-               if (err) {
-                       ERROR("failed to process device arguments: %s",
-diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
-index 93f129b..870e01f 100644
---- a/drivers/net/mlx5/mlx5.h
-+++ b/drivers/net/mlx5/mlx5.h
-@@ -127,6 +127,8 @@ struct priv {
-       unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */
-       unsigned int pending_alarm:1; /* An alarm is pending. */
-       unsigned int tso:1; /* Whether TSO is supported. */
-+      unsigned int tunnel_en:1;
-+      /* Whether Tx offloads for tunneled packets are supported. */
-       unsigned int max_tso_payload_sz; /* Maximum TCP payload for TSO. */
-       unsigned int txq_inline; /* Maximum packet size for inlining. */
-       unsigned int txqs_inline; /* Queue number threshold for inlining. */
-diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
-index 5542193..8be9e77 100644
---- a/drivers/net/mlx5/mlx5_ethdev.c
-+++ b/drivers/net/mlx5/mlx5_ethdev.c
-@@ -695,6 +695,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
-                        DEV_TX_OFFLOAD_TCP_CKSUM);
-       if (priv->tso)
-               info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
-+      if (priv->tunnel_en)
-+              info->tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
-       if (priv_get_ifname(priv, &ifname) == 0)
-               info->if_index = if_nametoindex(ifname);
-       /* FIXME: RETA update/query API expects the callee to know the size of
-diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
-index 3318668..0a77f5b 100644
---- a/drivers/net/mlx5/mlx5_prm.h
-+++ b/drivers/net/mlx5/mlx5_prm.h
-@@ -120,6 +120,12 @@
- /* Tunnel packet bit in the CQE. */
- #define MLX5_CQE_RX_TUNNEL_PACKET (1u << 0)
-+/* Inner L3 checksum offload (Tunneled packets only). */
-+#define MLX5_ETH_WQE_L3_INNER_CSUM (1u << 4)
-+
-+/* Inner L4 checksum offload (Tunneled packets only). */
-+#define MLX5_ETH_WQE_L4_INNER_CSUM (1u << 5)
-+
- /* INVALID is used by packets matching no flow rules. */
- #define MLX5_FLOW_MARK_INVALID 0
-diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
-index 98889f6..c2eb891 100644
---- a/drivers/net/mlx5/mlx5_rxtx.c
-+++ b/drivers/net/mlx5/mlx5_rxtx.c
-@@ -443,7 +443,19 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-               /* Should we enable HW CKSUM offload */
-               if (buf->ol_flags &
-                   (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
--                      cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
-+                      const uint64_t is_tunneled = buf->ol_flags &
-+                                                   (PKT_TX_TUNNEL_GRE |
-+                                                    PKT_TX_TUNNEL_VXLAN);
-+
-+                      if (is_tunneled && txq->tunnel_en) {
-+                              cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
-+                                         MLX5_ETH_WQE_L4_INNER_CSUM;
-+                              if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)
-+                                      cs_flags |= MLX5_ETH_WQE_L3_CSUM;
-+                      } else {
-+                              cs_flags = MLX5_ETH_WQE_L3_CSUM |
-+                                         MLX5_ETH_WQE_L4_CSUM;
-+                      }
-               }
-               raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
-               /* Replace the Ethernet type by the VLAN if necessary. */
-diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
-index 6b328cf..9669564 100644
---- a/drivers/net/mlx5/mlx5_rxtx.h
-+++ b/drivers/net/mlx5/mlx5_rxtx.h
-@@ -256,6 +256,8 @@ struct txq {
-       uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
-       uint16_t inline_en:1; /* When set inline is enabled. */
-       uint16_t tso_en:1; /* When set hardware TSO is enabled. */
-+      uint16_t tunnel_en:1;
-+      /* When set TX offload for tunneled packets are supported. */
-       uint32_t qp_num_8s; /* QP number shifted by 8. */
-       volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
-       volatile void *wqes; /* Work queue (use volatile to write into). */
-diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
-index 995b763..9d0c00f 100644
---- a/drivers/net/mlx5/mlx5_txq.c
-+++ b/drivers/net/mlx5/mlx5_txq.c
-@@ -356,6 +356,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-                                             max_tso_inline);
-               tmpl.txq.tso_en = 1;
-       }
-+      if (priv->tunnel_en)
-+              tmpl.txq.tunnel_en = 1;
-       tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
-       if (tmpl.qp == NULL) {
-               ret = (errno ? errno : EINVAL);
--- 
-2.7.4
-
diff --git a/dpdk/dpdk-17.02_patches/0008-net-mlx5-add-enhanced-multi-packet-send-for-ConnectX.patch b/dpdk/dpdk-17.02_patches/0008-net-mlx5-add-enhanced-multi-packet-send-for-ConnectX.patch
deleted file mode 100644 (file)
index 6ff076c..0000000
+++ /dev/null
@@ -1,809 +0,0 @@
-From 7ca5c8de65acabe4cb60960adcfa9247efdd2a5c Mon Sep 17 00:00:00 2001
-From: Yongseok Koh <yskoh@mellanox.com>
-Date: Wed, 15 Mar 2017 16:55:44 -0700
-Subject: [PATCH] net/mlx5: add enhanced multi-packet send for ConnectX-5
-
-ConnectX-5 supports enhanced version of multi-packet send (MPS). An MPS Tx
-descriptor can carry multiple packets either by including pointers of
-packets or by inlining packets. Inlining packet data can be helpful to
-better utilize PCIe bandwidth. In addition, Enhanced MPS supports hybrid
-mode - mixing inlined packets and pointers in a descriptor. This feature is
-enabled by default if supported by HW.
-
-Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
----
- doc/guides/nics/mlx5.rst       |  31 +++-
- drivers/net/mlx5/mlx5.c        |  37 +++-
- drivers/net/mlx5/mlx5.h        |   4 +-
- drivers/net/mlx5/mlx5_defs.h   |   7 +
- drivers/net/mlx5/mlx5_ethdev.c |   6 +-
- drivers/net/mlx5/mlx5_prm.h    |  20 ++
- drivers/net/mlx5/mlx5_rxtx.c   | 410 +++++++++++++++++++++++++++++++++++++++++
- drivers/net/mlx5/mlx5_rxtx.h   |   7 +-
- drivers/net/mlx5/mlx5_txq.c    |  29 ++-
- 9 files changed, 534 insertions(+), 17 deletions(-)
-
-diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
-index 41f3a47..0783aeb 100644
---- a/doc/guides/nics/mlx5.rst
-+++ b/doc/guides/nics/mlx5.rst
-@@ -183,10 +183,17 @@ Run-time configuration
- - ``txq_mpw_en`` parameter [int]
--  A nonzero value enables multi-packet send. This feature allows the TX
--  burst function to pack up to five packets in two descriptors in order to
--  save PCI bandwidth and improve performance at the cost of a slightly
--  higher CPU usage.
-+  A nonzero value enables multi-packet send (MPS) for ConnectX-4 Lx and
-+  enhanced multi-packet send (Enhanced MPS) for ConnectX-5. MPS allows the
-+  TX burst function to pack up multiple packets in a single descriptor
-+  session in order to save PCI bandwidth and improve performance at the
-+  cost of a slightly higher CPU usage. When ``txq_inline`` is set along
-+  with ``txq_mpw_en``, TX burst function tries to copy entire packet data
-+  on to TX descriptor instead of including pointer of packet only if there
-+  is enough room remained in the descriptor. ``txq_inline`` sets
-+  per-descriptor space for either pointers or inlined packets. In addition,
-+  Enhanced MPS supports hybrid mode - mixing inlined packets and pointers
-+  in the same descriptor.
-   This option cannot be used in conjunction with ``tso`` below. When ``tso``
-   is set, ``txq_mpw_en`` is disabled.
-@@ -194,6 +201,22 @@ Run-time configuration
-   It is currently only supported on the ConnectX-4 Lx and ConnectX-5
-   families of adapters. Enabled by default.
-+- ``txq_mpw_hdr_dseg_en`` parameter [int]
-+
-+  A nonzero value enables including two pointers in the first block of TX
-+  descriptor. This can be used to lessen CPU load for memory copy.
-+
-+  Effective only when Enhanced MPS is supported. Disabled by default.
-+
-+- ``txq_max_inline_len`` parameter [int]
-+
-+  Maximum size of packet to be inlined. This limits the size of packet to
-+  be inlined. If the size of a packet is larger than configured value, the
-+  packet isn't inlined even though there's enough space remained in the
-+  descriptor. Instead, the packet is included with pointer.
-+
-+  Effective only when Enhanced MPS is supported. The default value is 256.
-+
- - ``tso`` parameter [int]
-   A nonzero value enables hardware TSO.
-diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
-index ebc7984..bc6a34f 100644
---- a/drivers/net/mlx5/mlx5.c
-+++ b/drivers/net/mlx5/mlx5.c
-@@ -84,6 +84,12 @@
- /* Device parameter to enable multi-packet send WQEs. */
- #define MLX5_TXQ_MPW_EN "txq_mpw_en"
-+/* Device parameter to include 2 dsegs in the title WQEBB. */
-+#define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en"
-+
-+/* Device parameter to limit the size of inlining packet. */
-+#define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len"
-+
- /* Device parameter to enable hardware TSO offload. */
- #define MLX5_TSO "tso"
-@@ -294,7 +300,11 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
-       } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
-               priv->txqs_inline = tmp;
-       } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
--              priv->mps &= !!tmp; /* Enable MPW only if HW supports */
-+              priv->mps = !!tmp ? priv->mps : MLX5_MPW_DISABLED;
-+      } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) {
-+              priv->mpw_hdr_dseg = !!tmp;
-+      } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) {
-+              priv->inline_max_packet_sz = tmp;
-       } else if (strcmp(MLX5_TSO, key) == 0) {
-               priv->tso = !!tmp;
-       } else {
-@@ -323,6 +333,8 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs)
-               MLX5_TXQ_INLINE,
-               MLX5_TXQS_MIN_INLINE,
-               MLX5_TXQ_MPW_EN,
-+              MLX5_TXQ_MPW_HDR_DSEG_EN,
-+              MLX5_TXQ_MAX_INLINE_LEN,
-               MLX5_TSO,
-               NULL,
-       };
-@@ -434,24 +446,27 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
-               switch (pci_dev->id.device_id) {
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
-                       tunnel_en = 1;
--                      mps = 0;
-+                      mps = MLX5_MPW_DISABLED;
-                       break;
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
-+                      mps = MLX5_MPW;
-+                      break;
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX5:
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX:
-               case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
--                      mps = 1;
-                       tunnel_en = 1;
-+                      mps = MLX5_MPW_ENHANCED;
-                       break;
-               default:
--                      mps = 0;
-+                      mps = MLX5_MPW_DISABLED;
-               }
-               INFO("PCI information matches, using device \"%s\""
--                   " (SR-IOV: %s, MPS: %s)",
-+                   " (SR-IOV: %s, %sMPS: %s)",
-                    list[i]->name,
-                    sriov ? "true" : "false",
--                   mps ? "true" : "false");
-+                   mps == MLX5_MPW_ENHANCED ? "Enhanced " : "",
-+                   mps != MLX5_MPW_DISABLED ? "true" : "false");
-               attr_ctx = ibv_open_device(list[i]);
-               err = errno;
-               break;
-@@ -546,6 +561,13 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
-               priv->pd = pd;
-               priv->mtu = ETHER_MTU;
-               priv->mps = mps; /* Enable MPW by default if supported. */
-+              /* Set default values for Enhanced MPW, a.k.a MPWv2. */
-+              if (mps == MLX5_MPW_ENHANCED) {
-+                      priv->mpw_hdr_dseg = 0;
-+                      priv->txqs_inline = MLX5_EMPW_MIN_TXQS;
-+                      priv->inline_max_packet_sz = MLX5_EMPW_MAX_INLINE_LEN;
-+                      priv->txq_inline = MLX5_WQE_SIZE_MAX - MLX5_WQE_SIZE;
-+              }
-               priv->cqe_comp = 1; /* Enable compression by default. */
-               priv->tunnel_en = tunnel_en;
-               err = mlx5_args(priv, pci_dev->device.devargs);
-@@ -613,6 +635,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
-                             "with TSO. MPS disabled");
-                       priv->mps = 0;
-               }
-+              INFO("%sMPS is %s",
-+                   priv->mps == MLX5_MPW_ENHANCED ? "Enhanced " : "",
-+                   priv->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled");
-               /* Allocate and register default RSS hash keys. */
-               priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
-                                           sizeof((*priv->rss_conf)[0]), 0);
-diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
-index 870e01f..d26d465 100644
---- a/drivers/net/mlx5/mlx5.h
-+++ b/drivers/net/mlx5/mlx5.h
-@@ -123,7 +123,8 @@ struct priv {
-       unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */
-       unsigned int hw_padding:1; /* End alignment padding is supported. */
-       unsigned int sriov:1; /* This is a VF or PF with VF devices. */
--      unsigned int mps:1; /* Whether multi-packet send is supported. */
-+      unsigned int mps:2; /* Multi-packet send mode (0: disabled). */
-+      unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
-       unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */
-       unsigned int pending_alarm:1; /* An alarm is pending. */
-       unsigned int tso:1; /* Whether TSO is supported. */
-@@ -132,6 +133,7 @@ struct priv {
-       unsigned int max_tso_payload_sz; /* Maximum TCP payload for TSO. */
-       unsigned int txq_inline; /* Maximum packet size for inlining. */
-       unsigned int txqs_inline; /* Queue number threshold for inlining. */
-+      unsigned int inline_max_packet_sz; /* Max packet size for inlining. */
-       /* RX/TX queues. */
-       unsigned int rxqs_n; /* RX queues array size. */
-       unsigned int txqs_n; /* TX queues array size. */
-diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
-index eecb908..201bb33 100644
---- a/drivers/net/mlx5/mlx5_defs.h
-+++ b/drivers/net/mlx5/mlx5_defs.h
-@@ -55,6 +55,13 @@
- #define MLX5_TX_COMP_THRESH 32
- /*
-+ * Request TX completion every time the total number of WQEBBs used for inlining
-+ * packets exceeds the size of WQ divided by this divisor. Better to be power of
-+ * two for performance.
-+ */
-+#define MLX5_TX_COMP_THRESH_INLINE_DIV (1 << 3)
-+
-+/*
-  * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP
-  * from which buffers are to be transmitted will have to be mapped by this
-  * driver to their own Memory Region (MR). This is a slow operation.
-diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
-index 5deb6e8..dd5fe5c 100644
---- a/drivers/net/mlx5/mlx5_ethdev.c
-+++ b/drivers/net/mlx5/mlx5_ethdev.c
-@@ -1590,7 +1590,11 @@ priv_select_tx_function(struct priv *priv)
- {
-       priv->dev->tx_pkt_burst = mlx5_tx_burst;
-       /* Select appropriate TX function. */
--      if (priv->mps && priv->txq_inline) {
-+      if (priv->mps == MLX5_MPW_ENHANCED) {
-+              priv->dev->tx_pkt_burst =
-+                      mlx5_tx_burst_empw;
-+              DEBUG("selected Enhanced MPW TX function");
-+      } else if (priv->mps && priv->txq_inline) {
-               priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
-               DEBUG("selected MPW inline TX function");
-       } else if (priv->mps) {
-diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
-index 0a77f5b..155bdba 100644
---- a/drivers/net/mlx5/mlx5_prm.h
-+++ b/drivers/net/mlx5/mlx5_prm.h
-@@ -73,6 +73,9 @@
- /* WQE size */
- #define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE)
-+/* Max size of a WQE session. */
-+#define MLX5_WQE_SIZE_MAX 960U
-+
- /* Compute the number of DS. */
- #define MLX5_WQE_DS(n) \
-       (((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE)
-@@ -80,10 +83,19 @@
- /* Room for inline data in multi-packet WQE. */
- #define MLX5_MWQE64_INL_DATA 28
-+/* Default minimum number of Tx queues for inlining packets. */
-+#define MLX5_EMPW_MIN_TXQS 8
-+
-+/* Default max packet length to be inlined. */
-+#define MLX5_EMPW_MAX_INLINE_LEN (4U * MLX5_WQE_SIZE)
-+
- #ifndef HAVE_VERBS_MLX5_OPCODE_TSO
- #define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */
- #endif
-+#define MLX5_OPC_MOD_ENHANCED_MPSW 0
-+#define MLX5_OPCODE_ENHANCED_MPSW 0x29
-+
- /* CQE value to inform that VLAN is stripped. */
- #define MLX5_CQE_VLAN_STRIPPED (1u << 0)
-@@ -176,10 +188,18 @@ struct mlx5_wqe64 {
-       uint8_t raw[32];
- } __rte_aligned(MLX5_WQE_SIZE);
-+/* MPW mode. */
-+enum mlx5_mpw_mode {
-+      MLX5_MPW_DISABLED,
-+      MLX5_MPW,
-+      MLX5_MPW_ENHANCED, /* Enhanced Multi-Packet Send WQE, a.k.a MPWv2. */
-+};
-+
- /* MPW session status. */
- enum mlx5_mpw_state {
-       MLX5_MPW_STATE_OPENED,
-       MLX5_MPW_INL_STATE_OPENED,
-+      MLX5_MPW_ENHANCED_STATE_OPENED,
-       MLX5_MPW_STATE_CLOSED,
- };
-diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
-index 9fc433e..a1dd84a 100644
---- a/drivers/net/mlx5/mlx5_rxtx.c
-+++ b/drivers/net/mlx5/mlx5_rxtx.c
-@@ -195,6 +195,62 @@ tx_mlx5_wqe(struct txq *txq, uint16_t ci)
- }
- /**
-+ * Return the size of tailroom of WQ.
-+ *
-+ * @param txq
-+ *   Pointer to TX queue structure.
-+ * @param addr
-+ *   Pointer to tail of WQ.
-+ *
-+ * @return
-+ *   Size of tailroom.
-+ */
-+static inline size_t
-+tx_mlx5_wq_tailroom(struct txq *txq, void *addr)
-+{
-+      size_t tailroom;
-+      tailroom = (uintptr_t)(txq->wqes) +
-+                 (1 << txq->wqe_n) * MLX5_WQE_SIZE -
-+                 (uintptr_t)addr;
-+      return tailroom;
-+}
-+
-+/**
-+ * Copy data to tailroom of circular queue.
-+ *
-+ * @param dst
-+ *   Pointer to destination.
-+ * @param src
-+ *   Pointer to source.
-+ * @param n
-+ *   Number of bytes to copy.
-+ * @param base
-+ *   Pointer to head of queue.
-+ * @param tailroom
-+ *   Size of tailroom from dst.
-+ *
-+ * @return
-+ *   Pointer after copied data.
-+ */
-+static inline void *
-+mlx5_copy_to_wq(void *dst, const void *src, size_t n,
-+              void *base, size_t tailroom)
-+{
-+      void *ret;
-+
-+      if (n > tailroom) {
-+              rte_memcpy(dst, src, tailroom);
-+              rte_memcpy(base, (void *)((uintptr_t)src + tailroom),
-+                         n - tailroom);
-+              ret = (uint8_t *)base + n - tailroom;
-+      } else {
-+              rte_memcpy(dst, src, n);
-+              ret = (n == tailroom) ? base : (uint8_t *)dst + n;
-+      }
-+      return ret;
-+}
-+
-+/**
-  * Manage TX completions.
-  *
-  * When sending a burst, mlx5_tx_burst() posts several WRs.
-@@ -1269,6 +1325,360 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
- }
- /**
-+ * Open an Enhanced MPW session.
-+ *
-+ * @param txq
-+ *   Pointer to TX queue structure.
-+ * @param mpw
-+ *   Pointer to MPW session structure.
-+ * @param length
-+ *   Packet length.
-+ */
-+static inline void
-+mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
-+{
-+      uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
-+
-+      mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED;
-+      mpw->pkts_n = 0;
-+      mpw->total_len = sizeof(struct mlx5_wqe);
-+      mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
-+      mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |
-+                                (txq->wqe_ci << 8) |
-+                                MLX5_OPCODE_ENHANCED_MPSW);
-+      mpw->wqe->ctrl[2] = 0;
-+      mpw->wqe->ctrl[3] = 0;
-+      memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE);
-+      if (unlikely(padding)) {
-+              uintptr_t addr = (uintptr_t)(mpw->wqe + 1);
-+
-+              /* Pad the first 2 DWORDs with zero-length inline header. */
-+              *(volatile uint32_t *)addr = htonl(MLX5_INLINE_SEG);
-+              *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) =
-+                      htonl(MLX5_INLINE_SEG);
-+              mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE;
-+              /* Start from the next WQEBB. */
-+              mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1));
-+      } else {
-+              mpw->data.raw = (volatile void *)(mpw->wqe + 1);
-+      }
-+}
-+
-+/**
-+ * Close an Enhanced MPW session.
-+ *
-+ * @param txq
-+ *   Pointer to TX queue structure.
-+ * @param mpw
-+ *   Pointer to MPW session structure.
-+ *
-+ * @return
-+ *   Number of consumed WQEs.
-+ */
-+static inline uint16_t
-+mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
-+{
-+      uint16_t ret;
-+
-+      /* Store size in multiple of 16 bytes. Control and Ethernet segments
-+       * count as 2.
-+       */
-+      mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(mpw->total_len));
-+      mpw->state = MLX5_MPW_STATE_CLOSED;
-+      ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
-+      txq->wqe_ci += ret;
-+      return ret;
-+}
-+
-+/**
-+ * DPDK callback for TX with Enhanced MPW support.
-+ *
-+ * @param dpdk_txq
-+ *   Generic pointer to TX queue structure.
-+ * @param[in] pkts
-+ *   Packets to transmit.
-+ * @param pkts_n
-+ *   Number of packets in array.
-+ *
-+ * @return
-+ *   Number of packets successfully transmitted (<= pkts_n).
-+ */
-+uint16_t
-+mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-+{
-+      struct txq *txq = (struct txq *)dpdk_txq;
-+      uint16_t elts_head = txq->elts_head;
-+      const unsigned int elts_n = 1 << txq->elts_n;
-+      unsigned int i = 0;
-+      unsigned int j = 0;
-+      unsigned int max_elts;
-+      uint16_t max_wqe;
-+      unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
-+      unsigned int mpw_room = 0;
-+      unsigned int inl_pad = 0;
-+      uint32_t inl_hdr;
-+      struct mlx5_mpw mpw = {
-+              .state = MLX5_MPW_STATE_CLOSED,
-+      };
-+
-+      if (unlikely(!pkts_n))
-+              return 0;
-+      /* Start processing. */
-+      txq_complete(txq);
-+      max_elts = (elts_n - (elts_head - txq->elts_tail));
-+      if (max_elts > elts_n)
-+              max_elts -= elts_n;
-+      /* A CQE slot must always be available. */
-+      assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
-+      max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
-+      if (unlikely(!max_wqe))
-+              return 0;
-+      do {
-+              struct rte_mbuf *buf = *(pkts++);
-+              unsigned int elts_head_next;
-+              uintptr_t addr;
-+              uint64_t naddr;
-+              unsigned int n;
-+              unsigned int do_inline = 0; /* Whether inline is possible. */
-+              uint32_t length;
-+              unsigned int segs_n = buf->nb_segs;
-+              uint32_t cs_flags = 0;
-+
-+              /*
-+               * Make sure there is enough room to store this packet and
-+               * that one ring entry remains unused.
-+               */
-+              assert(segs_n);
-+              if (max_elts - j < segs_n + 1)
-+                      break;
-+              /* Do not bother with large packets MPW cannot handle. */
-+              if (segs_n > MLX5_MPW_DSEG_MAX)
-+                      break;
-+              /* Should we enable HW CKSUM offload. */
-+              if (buf->ol_flags &
-+                  (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
-+                      cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
-+              /* Retrieve packet information. */
-+              length = PKT_LEN(buf);
-+              /* Start new session if:
-+               * - multi-segment packet
-+               * - no space left even for a dseg
-+               * - next packet can be inlined with a new WQE
-+               * - cs_flag differs
-+               * It can't be MLX5_MPW_STATE_OPENED as always have a single
-+               * segmented packet.
-+               */
-+              if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) {
-+                      if ((segs_n != 1) ||
-+                          (inl_pad + sizeof(struct mlx5_wqe_data_seg) >
-+                            mpw_room) ||
-+                          (length <= txq->inline_max_packet_sz &&
-+                           inl_pad + sizeof(inl_hdr) + length >
-+                            mpw_room) ||
-+                          (mpw.wqe->eseg.cs_flags != cs_flags))
-+                              max_wqe -= mlx5_empw_close(txq, &mpw);
-+              }
-+              if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) {
-+                      if (unlikely(segs_n != 1)) {
-+                              /* Fall back to legacy MPW.
-+                               * A MPW session consumes 2 WQEs at most to
-+                               * include MLX5_MPW_DSEG_MAX pointers.
-+                               */
-+                              if (unlikely(max_wqe < 2))
-+                                      break;
-+                              mlx5_mpw_new(txq, &mpw, length);
-+                      } else {
-+                              /* In Enhanced MPW, inline as much as the budget
-+                               * is allowed. The remaining space is to be
-+                               * filled with dsegs. If the title WQEBB isn't
-+                               * padded, it will have 2 dsegs there.
-+                               */
-+                              mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX,
-+                                          (max_inline ? max_inline :
-+                                           pkts_n * MLX5_WQE_DWORD_SIZE) +
-+                                          MLX5_WQE_SIZE);
-+                              if (unlikely(max_wqe * MLX5_WQE_SIZE <
-+                                            mpw_room))
-+                                      break;
-+                              /* Don't pad the title WQEBB to not waste WQ. */
-+                              mlx5_empw_new(txq, &mpw, 0);
-+                              mpw_room -= mpw.total_len;
-+                              inl_pad = 0;
-+                              do_inline =
-+                                      length <= txq->inline_max_packet_sz &&
-+                                      sizeof(inl_hdr) + length <= mpw_room &&
-+                                      !txq->mpw_hdr_dseg;
-+                      }
-+                      mpw.wqe->eseg.cs_flags = cs_flags;
-+              } else {
-+                      /* Evaluate whether the next packet can be inlined.
-+                       * Inlininig is possible when:
-+                       * - length is less than configured value
-+                       * - length fits for remaining space
-+                       * - not required to fill the title WQEBB with dsegs
-+                       */
-+                      do_inline =
-+                              length <= txq->inline_max_packet_sz &&
-+                              inl_pad + sizeof(inl_hdr) + length <=
-+                               mpw_room &&
-+                              (!txq->mpw_hdr_dseg ||
-+                               mpw.total_len >= MLX5_WQE_SIZE);
-+              }
-+              /* Multi-segment packets must be alone in their MPW. */
-+              assert((segs_n == 1) || (mpw.pkts_n == 0));
-+              if (unlikely(mpw.state == MLX5_MPW_STATE_OPENED)) {
-+#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
-+                      length = 0;
-+#endif
-+                      do {
-+                              volatile struct mlx5_wqe_data_seg *dseg;
-+
-+                              elts_head_next =
-+                                      (elts_head + 1) & (elts_n - 1);
-+                              assert(buf);
-+                              (*txq->elts)[elts_head] = buf;
-+                              dseg = mpw.data.dseg[mpw.pkts_n];
-+                              addr = rte_pktmbuf_mtod(buf, uintptr_t);
-+                              *dseg = (struct mlx5_wqe_data_seg){
-+                                      .byte_count = htonl(DATA_LEN(buf)),
-+                                      .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
-+                                      .addr = htonll(addr),
-+                              };
-+                              elts_head = elts_head_next;
-+#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
-+                              length += DATA_LEN(buf);
-+#endif
-+                              buf = buf->next;
-+                              ++j;
-+                              ++mpw.pkts_n;
-+                      } while (--segs_n);
-+                      /* A multi-segmented packet takes one MPW session.
-+                       * TODO: Pack more multi-segmented packets if possible.
-+                       */
-+                      mlx5_mpw_close(txq, &mpw);
-+                      if (mpw.pkts_n < 3)
-+                              max_wqe--;
-+                      else
-+                              max_wqe -= 2;
-+              } else if (do_inline) {
-+                      /* Inline packet into WQE. */
-+                      unsigned int max;
-+
-+                      assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);
-+                      assert(length == DATA_LEN(buf));
-+                      inl_hdr = htonl(length | MLX5_INLINE_SEG);
-+                      addr = rte_pktmbuf_mtod(buf, uintptr_t);
-+                      mpw.data.raw = (volatile void *)
-+                              ((uintptr_t)mpw.data.raw + inl_pad);
-+                      max = tx_mlx5_wq_tailroom(txq,
-+                                      (void *)(uintptr_t)mpw.data.raw);
-+                      /* Copy inline header. */
-+                      mpw.data.raw = (volatile void *)
-+                              mlx5_copy_to_wq(
-+                                        (void *)(uintptr_t)mpw.data.raw,
-+                                        &inl_hdr,
-+                                        sizeof(inl_hdr),
-+                                        (void *)(uintptr_t)txq->wqes,
-+                                        max);
-+                      max = tx_mlx5_wq_tailroom(txq,
-+                                      (void *)(uintptr_t)mpw.data.raw);
-+                      /* Copy packet data. */
-+                      mpw.data.raw = (volatile void *)
-+                              mlx5_copy_to_wq(
-+                                        (void *)(uintptr_t)mpw.data.raw,
-+                                        (void *)addr,
-+                                        length,
-+                                        (void *)(uintptr_t)txq->wqes,
-+                                        max);
-+                      ++mpw.pkts_n;
-+                      mpw.total_len += (inl_pad + sizeof(inl_hdr) + length);
-+                      /* No need to get completion as the entire packet is
-+                       * copied to WQ. Free the buf right away.
-+                       */
-+                      elts_head_next = elts_head;
-+                      rte_pktmbuf_free_seg(buf);
-+                      mpw_room -= (inl_pad + sizeof(inl_hdr) + length);
-+                      /* Add pad in the next packet if any. */
-+                      inl_pad = (((uintptr_t)mpw.data.raw +
-+                                      (MLX5_WQE_DWORD_SIZE - 1)) &
-+                                      ~(MLX5_WQE_DWORD_SIZE - 1)) -
-+                                (uintptr_t)mpw.data.raw;
-+              } else {
-+                      /* No inline. Load a dseg of packet pointer. */
-+                      volatile rte_v128u32_t *dseg;
-+
-+                      assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);
-+                      assert((inl_pad + sizeof(*dseg)) <= mpw_room);
-+                      assert(length == DATA_LEN(buf));
-+                      if (!tx_mlx5_wq_tailroom(txq,
-+                                      (void *)((uintptr_t)mpw.data.raw
-+                                              + inl_pad)))
-+                              dseg = (volatile void *)txq->wqes;
-+                      else
-+                              dseg = (volatile void *)
-+                                      ((uintptr_t)mpw.data.raw +
-+                                       inl_pad);
-+                      elts_head_next = (elts_head + 1) & (elts_n - 1);
-+                      (*txq->elts)[elts_head] = buf;
-+                      addr = rte_pktmbuf_mtod(buf, uintptr_t);
-+                      for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++)
-+                              rte_prefetch2((void *)(addr +
-+                                              n * RTE_CACHE_LINE_SIZE));
-+                      naddr = htonll(addr);
-+                      *dseg = (rte_v128u32_t) {
-+                              htonl(length),
-+                              txq_mp2mr(txq, txq_mb2mp(buf)),
-+                              naddr,
-+                              naddr >> 32,
-+                      };
-+                      mpw.data.raw = (volatile void *)(dseg + 1);
-+                      mpw.total_len += (inl_pad + sizeof(*dseg));
-+                      ++j;
-+                      ++mpw.pkts_n;
-+                      mpw_room -= (inl_pad + sizeof(*dseg));
-+                      inl_pad = 0;
-+              }
-+              elts_head = elts_head_next;
-+#ifdef MLX5_PMD_SOFT_COUNTERS
-+              /* Increment sent bytes counter. */
-+              txq->stats.obytes += length;
-+#endif
-+              ++i;
-+      } while (i < pkts_n);
-+      /* Take a shortcut if nothing must be sent. */
-+      if (unlikely(i == 0))
-+              return 0;
-+      /* Check whether completion threshold has been reached. */
-+      if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH ||
-+                      (uint16_t)(txq->wqe_ci - txq->mpw_comp) >=
-+                       (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) {
-+              volatile struct mlx5_wqe *wqe = mpw.wqe;
-+
-+              /* Request completion on last WQE. */
-+              wqe->ctrl[2] = htonl(8);
-+              /* Save elts_head in unused "immediate" field of WQE. */
-+              wqe->ctrl[3] = elts_head;
-+              txq->elts_comp = 0;
-+              txq->mpw_comp = txq->wqe_ci;
-+              txq->cq_pi++;
-+      } else {
-+              txq->elts_comp += j;
-+      }
-+#ifdef MLX5_PMD_SOFT_COUNTERS
-+      /* Increment sent packets counter. */
-+      txq->stats.opackets += i;
-+#endif
-+      if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED)
-+              mlx5_empw_close(txq, &mpw);
-+      else if (mpw.state == MLX5_MPW_STATE_OPENED)
-+              mlx5_mpw_close(txq, &mpw);
-+      /* Ring QP doorbell. */
-+      mlx5_tx_dbrec(txq, mpw.wqe);
-+      txq->elts_head = elts_head;
-+      return i;
-+}
-+
-+/**
-  * Translate RX completion flags to packet type.
-  *
-  * @param[in] cqe
-diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
-index 0db810c..4a4bd84 100644
---- a/drivers/net/mlx5/mlx5_rxtx.h
-+++ b/drivers/net/mlx5/mlx5_rxtx.h
-@@ -248,17 +248,21 @@ struct txq {
-       uint16_t elts_head; /* Current index in (*elts)[]. */
-       uint16_t elts_tail; /* First element awaiting completion. */
-       uint16_t elts_comp; /* Counter since last completion request. */
-+      uint16_t mpw_comp; /* WQ index since last completion request. */
-       uint16_t cq_ci; /* Consumer index for completion queue. */
-+      uint16_t cq_pi; /* Producer index for completion queue. */
-       uint16_t wqe_ci; /* Consumer index for work queue. */
-       uint16_t wqe_pi; /* Producer index for work queue. */
-       uint16_t elts_n:4; /* (*elts)[] length (in log2). */
-       uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
-       uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */
--      uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
-       uint16_t inline_en:1; /* When set inline is enabled. */
-       uint16_t tso_en:1; /* When set hardware TSO is enabled. */
-       uint16_t tunnel_en:1;
-       /* When set TX offload for tunneled packets are supported. */
-+      uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
-+      uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
-+      uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
-       uint32_t qp_num_8s; /* QP number shifted by 8. */
-       volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
-       volatile void *wqes; /* Work queue (use volatile to write into). */
-@@ -329,6 +333,7 @@ uint16_t mlx5_tx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
- uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t);
- uint16_t mlx5_tx_burst_mpw(void *, struct rte_mbuf **, uint16_t);
- uint16_t mlx5_tx_burst_mpw_inline(void *, struct rte_mbuf **, uint16_t);
-+uint16_t mlx5_tx_burst_empw(void *, struct rte_mbuf **, uint16_t);
- uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t);
- uint16_t removed_tx_burst(void *, struct rte_mbuf **, uint16_t);
- uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);
-diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
-index 9d0c00f..bbfce75 100644
---- a/drivers/net/mlx5/mlx5_txq.c
-+++ b/drivers/net/mlx5/mlx5_txq.c
-@@ -266,6 +266,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-               struct ibv_exp_cq_attr cq_attr;
-       } attr;
-       enum ibv_exp_query_intf_status status;
-+      unsigned int cqe_n;
-       int ret = 0;
-       if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
-@@ -276,6 +277,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-       (void)conf; /* Thresholds configuration (ignored). */
-       assert(desc > MLX5_TX_COMP_THRESH);
-       tmpl.txq.elts_n = log2above(desc);
-+      if (priv->mps == MLX5_MPW_ENHANCED)
-+              tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
-       /* MRs will be registered in mp2mr[] later. */
-       attr.rd = (struct ibv_exp_res_domain_init_attr){
-               .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
-@@ -294,9 +297,12 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-               .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
-               .res_domain = tmpl.rd,
-       };
-+      cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
-+              ((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
-+      if (priv->mps == MLX5_MPW_ENHANCED)
-+              cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
-       tmpl.cq = ibv_exp_create_cq(priv->ctx,
--                                  (((desc / MLX5_TX_COMP_THRESH) - 1) ?
--                                   ((desc / MLX5_TX_COMP_THRESH) - 1) : 1),
-+                                  cqe_n,
-                                   NULL, NULL, 0, &attr.cq);
-       if (tmpl.cq == NULL) {
-               ret = ENOMEM;
-@@ -340,9 +346,24 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
-               tmpl.txq.max_inline =
-                       ((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
-                        RTE_CACHE_LINE_SIZE);
--              attr.init.cap.max_inline_data =
--                      tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
-               tmpl.txq.inline_en = 1;
-+              /* TSO and MPS can't be enabled concurrently. */
-+              assert(!priv->tso || !priv->mps);
-+              if (priv->mps == MLX5_MPW_ENHANCED) {
-+                      tmpl.txq.inline_max_packet_sz =
-+                              priv->inline_max_packet_sz;
-+                      /* To minimize the size of data set, avoid requesting
-+                       * too large WQ.
-+                       */
-+                      attr.init.cap.max_inline_data =
-+                              ((RTE_MIN(priv->txq_inline,
-+                                        priv->inline_max_packet_sz) +
-+                                (RTE_CACHE_LINE_SIZE - 1)) /
-+                               RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
-+              } else {
-+                      attr.init.cap.max_inline_data =
-+                              tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
-+              }
-       }
-       if (priv->tso) {
-               uint16_t max_tso_inline = ((MLX5_MAX_TSO_HEADER +
--- 
-2.7.4
-
diff --git a/dpdk/dpdk-17.05_patches/0001-Revert-net-virtio-remove-redundant-MSI-X-detection.patch b/dpdk/dpdk-17.05_patches/0001-Revert-net-virtio-remove-redundant-MSI-X-detection.patch
deleted file mode 100644 (file)
index 28dc68d..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-From 3a1470e031ff33ac99da33b41dae0e9082d4da78 Mon Sep 17 00:00:00 2001
-From: Damjan Marion <damarion@cisco.com>
-Date: Mon, 15 May 2017 12:27:37 +0200
-Subject: [PATCH] Revert "net/virtio: remove redundant MSI-X detection"
-
-This reverts commit ee1843bd89076c59e50cadbef5c935613f543765.
----
- drivers/net/virtio/virtio_pci.c | 27 +++++++++++++++++++++++++++
- 1 file changed, 27 insertions(+)
-
-diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
-index b7b3d6157..127f25791 100644
---- a/drivers/net/virtio/virtio_pci.c
-+++ b/drivers/net/virtio/virtio_pci.c
-@@ -274,6 +274,32 @@ legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
-               VIRTIO_PCI_QUEUE_NOTIFY);
- }
-+#ifdef RTE_EXEC_ENV_LINUXAPP
-+static int
-+legacy_virtio_has_msix(const struct rte_pci_addr *loc)
-+{
-+      DIR *d;
-+      char dirname[PATH_MAX];
-+
-+      snprintf(dirname, sizeof(dirname),
-+                   "%s/" PCI_PRI_FMT "/msi_irqs", pci_get_sysfs_path(),
-+                   loc->domain, loc->bus, loc->devid, loc->function);
-+
-+      d = opendir(dirname);
-+      if (d)
-+              closedir(d);
-+
-+      return d != NULL;
-+}
-+#else
-+static int
-+legacy_virtio_has_msix(const struct rte_pci_addr *loc __rte_unused)
-+{
-+      /* nic_uio does not enable interrupts, return 0 (false). */
-+      return 0;
-+}
-+#endif
-+
- const struct virtio_pci_ops legacy_ops = {
-       .read_dev_cfg   = legacy_read_dev_config,
-       .write_dev_cfg  = legacy_write_dev_config,
-@@ -694,6 +720,7 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
-       }
-       virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
-+      hw->use_msix = legacy_virtio_has_msix(&dev->addr);
-       hw->modern   = 0;
-       return 0;
--- 
-2.11.0
-
diff --git a/dpdk/dpdk-17.08_patches/0001-net-bonding-support-for-mlx.patch b/dpdk/dpdk-17.08_patches/0001-net-bonding-support-for-mlx.patch
deleted file mode 100644 (file)
index 13a2ba6..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-diff --git a/drivers/net/bonding/rte_eth_bond_args.c b/drivers/net/bonding/rte_eth_bond_args.c
-index bb634c6..7c65dda 100644
---- a/drivers/net/bonding/rte_eth_bond_args.c
-+++ b/drivers/net/bonding/rte_eth_bond_args.c
-@@ -61,16 +61,6 @@
-       unsigned i;
-       for (i = 0; i < rte_eth_dev_count(); i++) {
--
--              /* Currently populated by rte_eth_copy_pci_info().
--               *
--               * TODO: Once the PCI bus has arrived we should have a better
--               * way to test for being a PCI device or not.
--               */
--              if (rte_eth_devices[i].data->kdrv == RTE_KDRV_UNKNOWN ||
--                  rte_eth_devices[i].data->kdrv == RTE_KDRV_NONE)
--                      continue;
--
-               pci_dev = RTE_ETH_DEV_TO_PCI(&rte_eth_devices[i]);
-               eth_pci_addr = &pci_dev->addr;
-@@ -98,6 +88,16 @@
-       return -1;
- }
-+static inline int
-+pci_addr_cmp(const struct rte_device *dev, const void *_pci_addr)
-+{
-+      struct rte_pci_device *pdev;
-+      const struct rte_pci_addr *paddr = _pci_addr;
-+
-+      pdev = RTE_DEV_TO_PCI(*(struct rte_device **)(void *)&dev);
-+      return rte_eal_compare_pci_addr(&pdev->addr, paddr);
-+}
-+
- /**
-  * Parses a port identifier string to a port id by pci address, then by name,
-  * and finally port id.
-@@ -106,10 +106,23 @@
- parse_port_id(const char *port_str)
- {
-       struct rte_pci_addr dev_addr;
-+      struct rte_bus *pci_bus;
-+      struct rte_device *dev;
-       int port_id;
-+      pci_bus = rte_bus_find_by_name("pci");
-+      if (pci_bus == NULL) {
-+              RTE_LOG(ERR, PMD, "unable to find PCI bus\n");
-+              return -1;
-+      }
-+
-       /* try parsing as pci address, physical devices */
--      if (eal_parse_pci_DomBDF(port_str, &dev_addr) == 0) {
-+      if (pci_bus->parse(port_str, &dev_addr) == 0) {
-+              dev = pci_bus->find_device(NULL, pci_addr_cmp, &dev_addr);
-+              if (dev == NULL) {
-+                      RTE_LOG(ERR, PMD, "unable to find PCI device\n");
-+                      return -1;
-+              }
-               port_id = find_port_id_by_pci_addr(&dev_addr);
-               if (port_id < 0)
-                       return -1;
diff --git a/dpdk/dpdk-17.08_patches/0002-crypto-qat-align-capabilities.patch b/dpdk/dpdk-17.08_patches/0002-crypto-qat-align-capabilities.patch
deleted file mode 100644 (file)
index 8179e5e..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-From f8184af94214f1c76c0ffda45b9de9243aea287c Mon Sep 17 00:00:00 2001
-From: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
-Date: Tue, 17 Oct 2017 20:05:59 +0100
-Subject: [PATCH] crypto/qat: align capabilities
-
-Signed-off-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
----
- drivers/crypto/qat/qat_crypto_capabilities.h | 28 ++++++++++++++--------------
- 1 file changed, 14 insertions(+), 14 deletions(-)
-
-diff --git a/drivers/crypto/qat/qat_crypto_capabilities.h b/drivers/crypto/qat/qat_crypto_capabilities.h
-index d8d3fa1..00f8056 100644
---- a/drivers/crypto/qat/qat_crypto_capabilities.h
-+++ b/drivers/crypto/qat/qat_crypto_capabilities.h
-@@ -48,9 +48,9 @@
-                                       .increment = 1                  \
-                               },                                      \
-                               .digest_size = {                        \
--                                      .min = 20,                      \
-+                                      .min = 12,                      \
-                                       .max = 20,                      \
--                                      .increment = 0                  \
-+                                      .increment = 1                  \
-                               },                                      \
-                               .iv_size = { 0 }                        \
-                       }, }                                            \
-@@ -69,9 +69,9 @@
-                                       .increment = 1                  \
-                               },                                      \
-                               .digest_size = {                        \
--                                      .min = 28,                      \
-+                                      .min = 14,                      \
-                                       .max = 28,                      \
--                                      .increment = 0                  \
-+                                      .increment = 1                  \
-                               },                                      \
-                               .iv_size = { 0 }                        \
-                       }, }                                            \
-@@ -90,9 +90,9 @@
-                                       .increment = 1                  \
-                               },                                      \
-                               .digest_size = {                        \
--                                      .min = 32,                      \
-+                                      .min = 16,                      \
-                                       .max = 32,                      \
--                                      .increment = 0                  \
-+                                      .increment = 1                  \
-                               },                                      \
-                               .iv_size = { 0 }                        \
-                       }, }                                            \
-@@ -111,9 +111,9 @@
-                                       .increment = 1                  \
-                               },                                      \
-                               .digest_size = {                        \
--                                      .min = 48,                      \
-+                                      .min = 24,                      \
-                                       .max = 48,                      \
--                                      .increment = 0                  \
-+                                      .increment = 1                  \
-                               },                                      \
-                               .iv_size = { 0 }                        \
-                       }, }                                            \
-@@ -132,9 +132,9 @@
-                                       .increment = 1                  \
-                               },                                      \
-                               .digest_size = {                        \
--                                      .min = 64,                      \
-+                                      .min = 32,                      \
-                                       .max = 64,                      \
--                                      .increment = 0                  \
-+                                      .increment = 1                  \
-                               },                                      \
-                               .iv_size = { 0 }                        \
-                       }, }                                            \
-@@ -153,9 +153,9 @@
-                                       .increment = 1                  \
-                               },                                      \
-                               .digest_size = {                        \
--                                      .min = 16,                      \
-+                                      .min = 12,                      \
-                                       .max = 16,                      \
--                                      .increment = 0                  \
-+                                      .increment = 1                  \
-                               },                                      \
-                               .iv_size = { 0 }                        \
-                       }, }                                            \
-@@ -174,9 +174,9 @@
-                                       .increment = 0                  \
-                               },                                      \
-                               .digest_size = {                        \
--                                      .min = 16,                      \
-+                                      .min = 12,                      \
-                                       .max = 16,                      \
--                                      .increment = 0                  \
-+                                      .increment = 1                  \
-                               },                                      \
-                               .aad_size = { 0 },                      \
-                               .iv_size = { 0 }                        \
--- 
-2.9.5
-
diff --git a/dpdk/dpdk-17.11_patches/0001-ena-pmd-no-tx-flags.patch b/dpdk/dpdk-17.11_patches/0001-ena-pmd-no-tx-flags.patch
deleted file mode 100644 (file)
index 97dbadd..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
-index 22db895..6f982f6 100644
---- a/drivers/net/ena/ena_ethdev.c
-+++ b/drivers/net/ena/ena_ethdev.c
-@@ -261,16 +261,6 @@ static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf,
- {
-       uint64_t ol_flags = 0;
--      if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP)
--              ol_flags |= PKT_TX_TCP_CKSUM;
--      else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)
--              ol_flags |= PKT_TX_UDP_CKSUM;
--
--      if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4)
--              ol_flags |= PKT_TX_IPV4;
--      else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6)
--              ol_flags |= PKT_TX_IPV6;
--
-       if (unlikely(ena_rx_ctx->l4_csum_err))
-               ol_flags |= PKT_RX_L4_CKSUM_BAD;
-       if (unlikely(ena_rx_ctx->l3_csum_err))
diff --git a/dpdk/dpdk-18.05_patches/0001-i40evf-don-t-reset-device_info-data.patch b/dpdk/dpdk-18.05_patches/0001-i40evf-don-t-reset-device_info-data.patch
new file mode 100644 (file)
index 0000000..b737461
--- /dev/null
@@ -0,0 +1,28 @@
+From 65a8641604212d58defd71491c900d84d662a086 Mon Sep 17 00:00:00 2001
+From: Damjan Marion <damarion@cisco.com>
+Date: Wed, 6 Jun 2018 21:57:58 +0200
+Subject: [PATCH] i40evf: don't reset device_info data
+
+At this point valid data is already set by rte_eth_get_device_info.
+device field becomes zero and consumer is not able retrieve pci data.
+
+Signed-off-by: Damjan Marion <damarion@cisco.com>
+---
+ drivers/net/i40e/i40e_ethdev_vf.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
+index 804e44530..86b38d202 100644
+--- a/drivers/net/i40e/i40e_ethdev_vf.c
++++ b/drivers/net/i40e/i40e_ethdev_vf.c
+@@ -2182,7 +2182,6 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+ {
+       struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+-      memset(dev_info, 0, sizeof(*dev_info));
+       dev_info->max_rx_queues = vf->vsi_res->num_queue_pairs;
+       dev_info->max_tx_queues = vf->vsi_res->num_queue_pairs;
+       dev_info->min_rx_bufsize = I40E_BUF_SIZE_MIN;
+-- 
+2.17.1
+
index 31e8eee..6afd20e 100644 (file)
@@ -463,22 +463,26 @@ dpdk_pool_create (vlib_main_t * vm, u8 * pool_name, u32 elt_size,
   clib_error_t *error = 0;
   u32 size, obj_size;
   i32 ret;
+  uword i;
 
   obj_size = rte_mempool_calc_obj_size (elt_size, 0, 0);
+
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
   size = rte_mempool_xmem_size (num_elts, obj_size, 21, 0);
+#else
+  size = rte_mempool_calc_mem_size_helper (num_elts, obj_size, 21);
+#endif
 
-  error =
-    vlib_physmem_region_alloc (vm, (char *) pool_name, size, numa,
-                              VLIB_PHYSMEM_F_HUGETLB | VLIB_PHYSMEM_F_SHARED,
-                              pri);
+  error = vlib_physmem_region_alloc (vm, (char *) pool_name, size, numa,
+                                    VLIB_PHYSMEM_F_HUGETLB |
+                                    VLIB_PHYSMEM_F_SHARED, pri);
   if (error)
     return error;
 
   pr = vlib_physmem_get_region (vm, pri[0]);
 
-  mp =
-    rte_mempool_create_empty ((char *) pool_name, num_elts, elt_size,
-                             512, pool_priv_size, numa, 0);
+  mp = rte_mempool_create_empty ((char *) pool_name, num_elts, elt_size,
+                                512, pool_priv_size, numa, 0);
   if (!mp)
     return clib_error_return (0, "failed to create %s", pool_name);
 
@@ -490,13 +494,16 @@ dpdk_pool_create (vlib_main_t * vm, u8 * pool_name, u32 elt_size,
   priv.mbp_priv.mbuf_priv_size = VLIB_BUFFER_HDR_SIZE;
   rte_pktmbuf_pool_init (mp, &priv);
 
-  ret =
-    rte_mempool_populate_iova_tab (mp, pr->mem, pr->page_table, pr->n_pages,
-                                  pr->log2_page_size, NULL, NULL);
-  if (ret != (i32) mp->size)
+  for (i = 0; i < pr->n_pages; i++)
     {
-      rte_mempool_free (mp);
-      return clib_error_return (0, "failed to populate %s", pool_name);
+      size_t page_size = 1 << pr->log2_page_size;
+      ret = rte_mempool_populate_iova (mp, ((char *) pr->mem) + i * page_size,
+                                      pr->page_table[i], page_size, 0, 0);
+      if (ret < 0)
+       {
+         rte_mempool_free (mp);
+         return clib_error_return (0, "failed to populate %s", pool_name);
+       }
     }
 
   _mp[0] = mp;
index 2530858..2a49771 100644 (file)
@@ -46,6 +46,7 @@ get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd,
   dpdk_main_t *dm = &dpdk_main;
   vnet_hw_interface_t *hw;
   struct rte_eth_dev_info dev_info;
+  struct rte_pci_device *pci_dev;
   uword *p = 0;
   clib_error_t *error = NULL;
 
@@ -66,14 +67,21 @@ get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd,
   *xd = vec_elt_at_index (dm->devices, hw->dev_instance);
 
   rte_eth_dev_info_get ((*xd)->port_id, &dev_info);
-  if (dev_info.pci_dev)
+
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
+  pci_dev = dev_info.pci_dev;
+#else
+  pci_dev = RTE_DEV_TO_PCI (dev_info.device);
+#endif
+
+  if (pci_dev)
     {                          /* bonded interface has no pci info */
       vlib_pci_addr_t pci_addr;
 
-      pci_addr.domain = dev_info.pci_dev->addr.domain;
-      pci_addr.bus = dev_info.pci_dev->addr.bus;
-      pci_addr.slot = dev_info.pci_dev->addr.devid;
-      pci_addr.function = dev_info.pci_dev->addr.function;
+      pci_addr.domain = pci_dev->addr.domain;
+      pci_addr.bus = pci_dev->addr.bus;
+      pci_addr.slot = pci_dev->addr.devid;
+      pci_addr.function = pci_dev->addr.function;
 
       p =
        hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
@@ -1218,6 +1226,7 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input,
 
   /* Device specific data */
   struct rte_eth_dev_info dev_info;
+  struct rte_pci_device *pci_dev;
   dpdk_device_config_t *devconf = 0;
   vnet_hw_interface_t *hw;
   dpdk_device_t *xd;
@@ -1284,14 +1293,21 @@ set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input,
   xd = vec_elt_at_index (dm->devices, hw->dev_instance);
 
   rte_eth_dev_info_get (xd->port_id, &dev_info);
-  if (dev_info.pci_dev)
+
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
+  pci_dev = dev_info.pci_dev;
+#else
+  pci_dev = RTE_DEV_TO_PCI (dev_info.device);
+#endif
+
+  if (pci_dev)
     {                          /* bonded interface has no pci info */
       vlib_pci_addr_t pci_addr;
 
-      pci_addr.domain = dev_info.pci_dev->addr.domain;
-      pci_addr.bus = dev_info.pci_dev->addr.bus;
-      pci_addr.slot = dev_info.pci_dev->addr.devid;
-      pci_addr.function = dev_info.pci_dev->addr.function;
+      pci_addr.domain = pci_dev->addr.domain;
+      pci_addr.bus = pci_dev->addr.bus;
+      pci_addr.slot = pci_dev->addr.devid;
+      pci_addr.function = pci_dev->addr.function;
 
       p =
        hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
@@ -1443,6 +1459,7 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input,
   u32 hw_if_index = (u32) ~ 0;
   u32 profile_id, subport_id, i;
   struct rte_eth_dev_info dev_info;
+  struct rte_pci_device *pci_dev;
   dpdk_device_config_t *devconf = 0;
   vlib_thread_registration_t *tr;
   uword *p = 0;
@@ -1475,14 +1492,21 @@ show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input,
   xd = vec_elt_at_index (dm->devices, hw->dev_instance);
 
   rte_eth_dev_info_get (xd->port_id, &dev_info);
-  if (dev_info.pci_dev)
+
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
+  pci_dev = dev_info.pci_dev;
+#else
+  pci_dev = RTE_DEV_TO_PCI (dev_info.device);
+#endif
+
+  if (pci_dev)
     {                          /* bonded interface has no pci info */
       vlib_pci_addr_t pci_addr;
 
-      pci_addr.domain = dev_info.pci_dev->addr.domain;
-      pci_addr.bus = dev_info.pci_dev->addr.bus;
-      pci_addr.slot = dev_info.pci_dev->addr.devid;
-      pci_addr.function = dev_info.pci_dev->addr.function;
+      pci_addr.domain = pci_dev->addr.domain;
+      pci_addr.bus = pci_dev->addr.bus;
+      pci_addr.slot = pci_dev->addr.devid;
+      pci_addr.function = pci_dev->addr.function;
 
       p =
        hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
index f10b00e..4b8e3be 100644 (file)
@@ -163,6 +163,7 @@ format_dpdk_device_name (u8 * s, va_list * args)
   char *device_name;
   u32 i = va_arg (*args, u32);
   struct rte_eth_dev_info dev_info;
+  struct rte_pci_device *pci_dev;
   u8 *ret;
 
   if (dm->conf->interface_name_format_decimal)
@@ -246,12 +247,15 @@ format_dpdk_device_name (u8 * s, va_list * args)
     }
 
   rte_eth_dev_info_get (i, &dev_info);
-
-  if (dev_info.pci_dev &&
-      dm->devices[i].port_type != VNET_DPDK_PORT_TYPE_FAILSAFE)
-    ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus,
-                 dev_info.pci_dev->addr.devid,
-                 dev_info.pci_dev->addr.function);
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
+  pci_dev = dev_info.pci_dev;
+#else
+  pci_dev = RTE_DEV_TO_PCI (dev_info.device);
+#endif
+
+  if (pci_dev && dm->devices[i].port_type != VNET_DPDK_PORT_TYPE_FAILSAFE)
+    ret = format (s, devname_format, device_name, pci_dev->addr.bus,
+                 pci_dev->addr.devid, pci_dev->addr.function);
   else
     ret = format (s, "%s%d", device_name, dm->devices[i].port_id);
 
@@ -507,7 +511,11 @@ format_dpdk_device (u8 * s, va_list * args)
       retval = rte_eth_dev_rss_hash_conf_get (xd->port_id, &rss_conf);
       if (retval < 0)
        clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval);
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
       pci = di.pci_dev;
+#else
+      pci = RTE_DEV_TO_PCI (di.device);
+#endif
 
       if (pci)
        s =
index ceaa5bb..2232b89 100644 (file)
@@ -227,7 +227,12 @@ dpdk_lib_init (dpdk_main_t * dm)
   vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1,
                        CLIB_CACHE_LINE_BYTES);
 
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
   nports = rte_eth_dev_count ();
+#else
+  nports = rte_eth_dev_count_avail ();
+#endif
+
   if (nports < 1)
     {
       dpdk_log_notice ("DPDK drivers found no ports...");
@@ -260,19 +265,36 @@ dpdk_lib_init (dpdk_main_t * dm)
       u8 addr[6];
       u8 vlan_strip = 0;
       struct rte_eth_dev_info dev_info;
+      struct rte_pci_device *pci_dev;
       struct rte_eth_link l;
       dpdk_device_config_t *devconf = 0;
       vlib_pci_addr_t pci_addr;
       uword *p = 0;
 
+      if (!rte_eth_dev_is_valid_port(i))
+       continue;
+
       rte_eth_link_get_nowait (i, &l);
       rte_eth_dev_info_get (i, &dev_info);
-      if (dev_info.pci_dev)    /* bonded interface has no pci info */
+
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
+      pci_dev = dev_info.pci_dev;
+#else
+      if (dev_info.device == 0)
+       {
+         clib_warning ("DPDK bug: missing device info. Skipping  %s device",
+                       dev_info.driver_name);
+         continue;
+       }
+      pci_dev = RTE_DEV_TO_PCI (dev_info.device);
+#endif
+
+      if (pci_dev)     /* bonded interface has no pci info */
        {
-         pci_addr.domain = dev_info.pci_dev->addr.domain;
-         pci_addr.bus = dev_info.pci_dev->addr.bus;
-         pci_addr.slot = dev_info.pci_dev->addr.devid;
-         pci_addr.function = dev_info.pci_dev->addr.function;
+         pci_addr.domain = pci_dev->addr.domain;
+         pci_addr.bus = pci_dev->addr.bus;
+         pci_addr.slot = pci_dev->addr.devid;
+         pci_addr.function = pci_dev->addr.function;
          p =
            hash_get (dm->conf->device_config_index_by_pci_addr,
                      pci_addr.as_u32);
@@ -290,12 +312,12 @@ dpdk_lib_init (dpdk_main_t * dm)
       xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
 
       /* Handle interface naming for devices with multiple ports sharing same PCI ID */
-      if (dev_info.pci_dev)
+      if (pci_dev)
        {
          struct rte_eth_dev_info di = { 0 };
          rte_eth_dev_info_get (i + 1, &di);
-         if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 &&
-             memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr,
+         if (pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 &&
+             memcmp (&pci_dev->addr, &pci_dev->addr,
                      sizeof (struct rte_pci_addr)) == 0)
            {
              xd->interface_name_suffix = format (0, "0");
@@ -358,8 +380,8 @@ dpdk_lib_init (dpdk_main_t * dm)
       xd->flags |= DPDK_DEVICE_FLAG_PMD;
 
       /* workaround for drivers not setting driver_name */
-      if ((!dev_info.driver_name) && (dev_info.pci_dev))
-       dev_info.driver_name = dev_info.pci_dev->driver->driver.name;
+      if ((!dev_info.driver_name) && (pci_dev))
+       dev_info.driver_name = pci_dev->driver->driver.name;
 
       ASSERT (dev_info.driver_name);
 
@@ -1533,7 +1555,11 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
      *  2. Set up info and register slave link state change callback handling.
      *  3. Set up info for bond interface related CLI support.
      */
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
     int nports = rte_eth_dev_count ();
+#else
+    int nports = rte_eth_dev_count_avail ();
+#endif
     if (nports > 0)
       {
        /* *INDENT-OFF* */
index 268f27b..731613b 100644 (file)
@@ -873,7 +873,12 @@ crypto_create_session_h_pool (vlib_main_t * vm, u8 numa)
 
   pool_name = format (0, "session_h_pool_numa%u%c", numa, 0);
 
+
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
   elt_size = rte_cryptodev_get_header_session_size ();
+#else
+  elt_size = rte_cryptodev_sym_get_header_session_size ();
+#endif
 
   error =
     dpdk_pool_create (vm, pool_name, elt_size, DPDK_CRYPTO_NB_SESS_OBJS,
@@ -912,7 +917,12 @@ crypto_create_session_drv_pool (vlib_main_t * vm, crypto_dev_t * dev)
     return NULL;
 
   pool_name = format (0, "session_drv%u_pool_numa%u%c", dev->drv_id, numa, 0);
+
+#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
   elt_size = rte_cryptodev_get_private_session_size (dev->id);
+#else
+  elt_size = rte_cryptodev_sym_get_private_session_size (dev->id);
+#endif
 
   error =
     dpdk_pool_create (vm, pool_name, elt_size, DPDK_CRYPTO_NB_SESS_OBJS,