New upstream version 17.11.3 08/13108/1
authorChristian Ehrhardt <christian.ehrhardt@canonical.com>
Mon, 18 Jun 2018 11:36:41 +0000 (13:36 +0200)
committerChristian Ehrhardt <christian.ehrhardt@canonical.com>
Tue, 19 Jun 2018 06:22:47 +0000 (08:22 +0200)
Change-Id: I2b901aaf362a2b94195665cc890d824b2c3a620e
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
166 files changed:
app/proc_info/main.c
app/test-crypto-perf/cperf_test_common.c
app/test-crypto-perf/cperf_test_vector_parsing.c
app/test-crypto-perf/main.c
app/test-pmd/cmdline.c
app/test-pmd/cmdline_flow.c
app/test-pmd/config.c
app/test-pmd/parameters.c
app/test-pmd/testpmd.c
app/test-pmd/testpmd.h
config/defconfig_i686-native-linuxapp-icc
doc/guides/nics/features/mlx5.ini
doc/guides/nics/mlx5.rst
doc/guides/nics/nfp.rst
doc/guides/rel_notes/release_17_11.rst
drivers/bus/dpaa/base/fman/fman.c
drivers/bus/fslmc/fslmc_bus.c
drivers/bus/pci/pci_common.c
drivers/bus/pci/rte_bus_pci.h
drivers/bus/vdev/vdev.c
drivers/crypto/dpaa2_sec/dpaa2_sec_priv.h
drivers/crypto/scheduler/rte_cryptodev_scheduler.c
drivers/crypto/scheduler/rte_cryptodev_scheduler.h
drivers/crypto/scheduler/scheduler_multicore.c
drivers/crypto/scheduler/scheduler_pkt_size_distr.c
drivers/crypto/scheduler/scheduler_pmd.c
drivers/crypto/scheduler/scheduler_pmd_ops.c
drivers/crypto/scheduler/scheduler_pmd_private.h
drivers/crypto/zuc/rte_zuc_pmd.c
drivers/event/dpaa2/dpaa2_eventdev.c
drivers/event/dpaa2/dpaa2_eventdev.h
drivers/net/af_packet/rte_eth_af_packet.c
drivers/net/bnx2x/bnx2x.c
drivers/net/bnx2x/elink.c
drivers/net/bnxt/bnxt_ethdev.c
drivers/net/bnxt/bnxt_filter.c
drivers/net/bnxt/bnxt_hwrm.c
drivers/net/bnxt/bnxt_nvm_defs.h
drivers/net/bnxt/bnxt_rxq.c
drivers/net/bnxt/bnxt_rxr.c
drivers/net/bnxt/bnxt_rxr.h
drivers/net/bonding/rte_eth_bond_api.c
drivers/net/bonding/rte_eth_bond_args.c
drivers/net/bonding/rte_eth_bond_pmd.c
drivers/net/bonding/rte_eth_bond_private.h
drivers/net/bonding/rte_pmd_bond_version.map
drivers/net/dpaa/dpaa_ethdev.c
drivers/net/dpaa2/dpaa2_ethdev.c
drivers/net/enic/base/vnic_dev.c
drivers/net/enic/base/vnic_dev.h
drivers/net/enic/enic_main.c
drivers/net/failsafe/failsafe.c
drivers/net/failsafe/failsafe_ether.c
drivers/net/failsafe/failsafe_ops.c
drivers/net/failsafe/failsafe_private.h
drivers/net/i40e/base/i40e_register.h
drivers/net/i40e/i40e_ethdev.c
drivers/net/i40e/i40e_ethdev.h
drivers/net/i40e/i40e_flow.c
drivers/net/i40e/rte_pmd_i40e.c
drivers/net/ixgbe/ixgbe_ethdev.c
drivers/net/kni/rte_eth_kni.c
drivers/net/liquidio/lio_ethdev.c
drivers/net/mlx4/mlx4.c
drivers/net/mlx4/mlx4.h
drivers/net/mlx4/mlx4_ethdev.c
drivers/net/mlx4/mlx4_flow.c
drivers/net/mlx4/mlx4_flow.h
drivers/net/mlx4/mlx4_intr.c
drivers/net/mlx4/mlx4_rxq.c
drivers/net/mlx4/mlx4_rxtx.c
drivers/net/mlx4/mlx4_rxtx.h
drivers/net/mlx5/mlx5.c
drivers/net/mlx5/mlx5.h
drivers/net/mlx5/mlx5_defs.h
drivers/net/mlx5/mlx5_ethdev.c
drivers/net/mlx5/mlx5_flow.c
drivers/net/mlx5/mlx5_mac.c
drivers/net/mlx5/mlx5_mr.c
drivers/net/mlx5/mlx5_rss.c
drivers/net/mlx5/mlx5_rxmode.c
drivers/net/mlx5/mlx5_rxq.c
drivers/net/mlx5/mlx5_rxtx.c
drivers/net/mlx5/mlx5_rxtx.h
drivers/net/mlx5/mlx5_rxtx_vec.c
drivers/net/mlx5/mlx5_rxtx_vec_neon.h
drivers/net/mlx5/mlx5_rxtx_vec_sse.h
drivers/net/mlx5/mlx5_socket.c
drivers/net/mlx5/mlx5_stats.c
drivers/net/mlx5/mlx5_trigger.c
drivers/net/mlx5/mlx5_txq.c
drivers/net/mlx5/mlx5_utils.h
drivers/net/mlx5/mlx5_vlan.c
drivers/net/mrvl/mrvl_ethdev.c
drivers/net/nfp/nfp_net.c
drivers/net/nfp/nfp_nfpu.c
drivers/net/null/rte_eth_null.c
drivers/net/octeontx/octeontx_ethdev.c
drivers/net/pcap/rte_eth_pcap.c
drivers/net/qede/base/bcm_osal.c
drivers/net/qede/base/ecore.h
drivers/net/qede/base/ecore_dcbx.c
drivers/net/qede/base/ecore_dcbx_api.h
drivers/net/qede/base/ecore_dev.c
drivers/net/qede/base/ecore_hsi_common.h
drivers/net/qede/base/ecore_l2.c
drivers/net/qede/base/ecore_l2_api.h
drivers/net/qede/base/ecore_sp_commands.c
drivers/net/qede/base/ecore_sriov.c
drivers/net/qede/base/ecore_vf.c
drivers/net/qede/base/ecore_vfpf_if.h
drivers/net/qede/qede_ethdev.c
drivers/net/qede/qede_ethdev.h
drivers/net/qede/qede_fdir.c
drivers/net/qede/qede_rxtx.c
drivers/net/ring/rte_eth_ring.c
drivers/net/sfc/base/efx_port.c
drivers/net/sfc/efsys.h
drivers/net/sfc/sfc.c
drivers/net/sfc/sfc_ev.c
drivers/net/sfc/sfc_flow.c
drivers/net/sfc/sfc_rx.c
drivers/net/softnic/rte_eth_softnic.c
drivers/net/szedata2/rte_eth_szedata2.c
drivers/net/tap/rte_eth_tap.c
drivers/net/vhost/rte_eth_vhost.c
drivers/net/vmxnet3/vmxnet3_ethdev.c
drivers/net/vmxnet3/vmxnet3_rxtx.c
examples/exception_path/main.c
examples/l2fwd-crypto/main.c
examples/performance-thread/common/lthread.c
examples/performance-thread/common/lthread_api.h
examples/performance-thread/l3fwd-thread/main.c
examples/performance-thread/pthread_shim/main.c
examples/performance-thread/pthread_shim/pthread_shim.c
examples/quota_watermark/qw/main.c
lib/librte_cryptodev/rte_cryptodev.c
lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
lib/librte_eal/common/include/rte_version.h
lib/librte_eal/linuxapp/eal/eal_memory.c
lib/librte_eal/linuxapp/eal/eal_vfio.c
lib/librte_eal/linuxapp/kni/compat.h
lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
lib/librte_ether/rte_ethdev.c
lib/librte_ether/rte_ethdev.h
lib/librte_ether/rte_ethdev_pci.h
lib/librte_hash/rte_cuckoo_hash.c
lib/librte_ip_frag/rte_ipv4_reassembly.c
lib/librte_ip_frag/rte_ipv6_reassembly.c
lib/librte_mbuf/rte_mbuf.h
lib/librte_mempool/rte_mempool.c
lib/librte_net/rte_ip.h
lib/librte_pci/rte_pci_version.map
lib/librte_vhost/fd_man.c
lib/librte_vhost/fd_man.h
lib/librte_vhost/socket.c
lib/librte_vhost/vhost_user.c
lib/librte_vhost/virtio_net.c
mk/rte.sdkconfig.mk
pkg/dpdk.spec
test/test/test_cryptodev.c
test/test/test_distributor_perf.c
test/test/test_eal_flags.c
test/test/test_mempool.c
test/test/test_reorder.c
test/test/test_table_pipeline.c

index 64fbbd0..875d91e 100644 (file)
@@ -188,7 +188,7 @@ proc_info_preparse_args(int argc, char **argv)
                                proc_info_usage(prgname);
                                return -1;
                        }
-                       strncpy(host_id, argv[i+1], sizeof(host_id));
+                       snprintf(host_id, sizeof(host_id), "%s", argv[i+1]);
                }
        }
 
index 328744e..9b08b3f 100644 (file)
@@ -119,7 +119,7 @@ mempool_obj_init(struct rte_mempool *mp,
        op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
        op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
        op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
-       op->phys_addr = rte_mem_virt2phy(obj);
+       op->phys_addr = rte_mem_virt2iova(obj);
        op->mempool = mp;
 
        /* Set source buffer */
index d4736f9..56df4e7 100644 (file)
@@ -534,8 +534,7 @@ parse_file(struct cperf_test_vector *vector, struct cperf_options *opts)
                if (entry == NULL)
                        return -1;
 
-               memset(entry, 0, strlen(line) + 1);
-               strncpy(entry, line, strlen(line));
+               strcpy(entry, line);
 
                /* check if entry ends with , or = */
                if (entry[strlen(entry) - 1] == ','
@@ -552,8 +551,8 @@ parse_file(struct cperf_test_vector *vector, struct cperf_options *opts)
                                if (entry_extended == NULL)
                                        goto err;
                                entry = entry_extended;
-
-                               strncat(entry, line, strlen(line));
+                               /* entry has been allocated accordingly */
+                               strcpy(&entry[strlen(entry)], line);
 
                                if (entry[strlen(entry) - 1] != ',')
                                        break;
index 29373f5..13e0121 100644 (file)
@@ -106,13 +106,19 @@ cperf_initialize_cryptodev(struct cperf_options *opts, uint8_t *enabled_cdevs,
 
        nb_lcores = rte_lcore_count() - 1;
 
-       if (enabled_cdev_count > nb_lcores) {
-               printf("Number of capable crypto devices (%d) "
-                               "has to be less or equal to number of slave "
-                               "cores (%d)\n", enabled_cdev_count, nb_lcores);
+       if (nb_lcores < 1) {
+               RTE_LOG(ERR, USER1,
+                       "Number of enabled cores need to be higher than 1\n");
                return -EINVAL;
        }
 
+       /*
+        * Use less number of devices,
+        * if there are more available than cores.
+        */
+       if (enabled_cdev_count > nb_lcores)
+               enabled_cdev_count = nb_lcores;
+
        /* Create a mempool shared by all the devices */
        uint32_t max_sess_size = 0, sess_size;
 
index b3c3f24..77c11b8 100644 (file)
@@ -2127,7 +2127,7 @@ cmdline_parse_inst_t cmd_config_rxtx_queue = {
        .data = NULL,
        .help_str = "port <port_id> rxq|txq <queue_id> start|stop",
        .tokens = {
-               (void *)&cmd_config_speed_all_port,
+               (void *)&cmd_config_rxtx_queue_port,
                (void *)&cmd_config_rxtx_queue_portid,
                (void *)&cmd_config_rxtx_queue_rxtxq,
                (void *)&cmd_config_rxtx_queue_qid,
index df16d2a..35440ea 100644 (file)
@@ -2028,7 +2028,7 @@ parse_vc_action_rss_queue(struct context *ctx, const struct token *token,
        i = ctx->objdata >> 16;
        if (!strcmp_partial("end", str, len)) {
                ctx->objdata &= 0xffff;
-               return len;
+               goto end;
        }
        if (i >= ACTION_RSS_NUM)
                return -1;
@@ -2045,6 +2045,7 @@ parse_vc_action_rss_queue(struct context *ctx, const struct token *token,
        if (ctx->next_num == RTE_DIM(ctx->next))
                return -1;
        ctx->next[ctx->next_num++] = next;
+end:
        if (!ctx->object)
                return len;
        ((struct rte_flow_action_rss *)ctx->object)->num = i;
index a0f3c24..61608d1 100644 (file)
@@ -149,15 +149,11 @@ nic_stats_display(portid_t port_id)
        struct rte_eth_stats stats;
        struct rte_port *port = &ports[port_id];
        uint8_t i;
-       portid_t pid;
 
        static const char *nic_stats_border = "########################";
 
        if (port_id_is_invalid(port_id, ENABLED_WARN)) {
-               printf("Valid port range is [0");
-               RTE_ETH_FOREACH_DEV(pid)
-                       printf(", %d", pid);
-               printf("]\n");
+               print_valid_ports();
                return;
        }
        rte_eth_stats_get(port_id, &stats);
@@ -231,13 +227,8 @@ nic_stats_display(portid_t port_id)
 void
 nic_stats_clear(portid_t port_id)
 {
-       portid_t pid;
-
        if (port_id_is_invalid(port_id, ENABLED_WARN)) {
-               printf("Valid port range is [0");
-               RTE_ETH_FOREACH_DEV(pid)
-                       printf(", %d", pid);
-               printf("]\n");
+               print_valid_ports();
                return;
        }
        rte_eth_stats_reset(port_id);
@@ -314,15 +305,11 @@ nic_stats_mapping_display(portid_t port_id)
 {
        struct rte_port *port = &ports[port_id];
        uint16_t i;
-       portid_t pid;
 
        static const char *nic_stats_mapping_border = "########################";
 
        if (port_id_is_invalid(port_id, ENABLED_WARN)) {
-               printf("Valid port range is [0");
-               RTE_ETH_FOREACH_DEV(pid)
-                       printf(", %d", pid);
-               printf("]\n");
+               print_valid_ports();
                return;
        }
 
@@ -434,14 +421,10 @@ port_infos_display(portid_t port_id)
        int vlan_offload;
        struct rte_mempool * mp;
        static const char *info_border = "*********************";
-       portid_t pid;
        uint16_t mtu;
 
        if (port_id_is_invalid(port_id, ENABLED_WARN)) {
-               printf("Valid port range is [0");
-               RTE_ETH_FOREACH_DEV(pid)
-                       printf(", %d", pid);
-               printf("]\n");
+               print_valid_ports();
                return;
        }
        port = &ports[port_id];
@@ -739,6 +722,17 @@ port_id_is_invalid(portid_t port_id, enum print_warning warning)
        return 1;
 }
 
+void print_valid_ports(void)
+{
+       portid_t pid;
+
+       printf("The valid ports array is [");
+       RTE_ETH_FOREACH_DEV(pid) {
+               printf(" %d", pid);
+       }
+       printf(" ]\n");
+}
+
 static int
 vlan_id_is_invalid(uint16_t vlan_id)
 {
index 8fbb515..5d51808 100644 (file)
@@ -403,7 +403,6 @@ parse_portnuma_config(const char *q_arg)
        };
        unsigned long int_fld[_NUM_FLD];
        char *str_fld[_NUM_FLD];
-       portid_t pid;
 
        /* reset from value set at definition */
        while ((p = strchr(p0,'(')) != NULL) {
@@ -427,10 +426,7 @@ parse_portnuma_config(const char *q_arg)
                port_id = (portid_t)int_fld[FLD_PORT];
                if (port_id_is_invalid(port_id, ENABLED_WARN) ||
                        port_id == (portid_t)RTE_PORT_ALL) {
-                       printf("Valid port range is [0");
-                       RTE_ETH_FOREACH_DEV(pid)
-                               printf(", %d", pid);
-                       printf("]\n");
+                       print_valid_ports();
                        return -1;
                }
                socket_id = (uint8_t)int_fld[FLD_SOCKET];
@@ -461,7 +457,6 @@ parse_ringnuma_config(const char *q_arg)
        };
        unsigned long int_fld[_NUM_FLD];
        char *str_fld[_NUM_FLD];
-       portid_t pid;
        #define RX_RING_ONLY 0x1
        #define TX_RING_ONLY 0x2
        #define RXTX_RING    0x3
@@ -488,10 +483,7 @@ parse_ringnuma_config(const char *q_arg)
                port_id = (portid_t)int_fld[FLD_PORT];
                if (port_id_is_invalid(port_id, ENABLED_WARN) ||
                        port_id == (portid_t)RTE_PORT_ALL) {
-                       printf("Valid port range is [0");
-                       RTE_ETH_FOREACH_DEV(pid)
-                               printf(", %d", pid);
-                       printf("]\n");
+                       print_valid_ports();
                        return -1;
                }
                socket_id = (uint8_t)int_fld[FLD_SOCKET];
index f66f4c6..a4b28e9 100644 (file)
@@ -880,18 +880,23 @@ init_fwd_streams(void)
 
        /* init new */
        nb_fwd_streams = nb_fwd_streams_new;
-       fwd_streams = rte_zmalloc("testpmd: fwd_streams",
-               sizeof(struct fwd_stream *) * nb_fwd_streams, RTE_CACHE_LINE_SIZE);
-       if (fwd_streams == NULL)
-               rte_exit(EXIT_FAILURE, "rte_zmalloc(%d (struct fwd_stream *)) "
-                                               "failed\n", nb_fwd_streams);
+       if (nb_fwd_streams) {
+               fwd_streams = rte_zmalloc("testpmd: fwd_streams",
+                       sizeof(struct fwd_stream *) * nb_fwd_streams,
+                       RTE_CACHE_LINE_SIZE);
+               if (fwd_streams == NULL)
+                       rte_exit(EXIT_FAILURE, "rte_zmalloc(%d"
+                                " (struct fwd_stream *)) failed\n",
+                                nb_fwd_streams);
 
-       for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
-               fwd_streams[sm_id] = rte_zmalloc("testpmd: struct fwd_stream",
-                               sizeof(struct fwd_stream), RTE_CACHE_LINE_SIZE);
-               if (fwd_streams[sm_id] == NULL)
-                       rte_exit(EXIT_FAILURE, "rte_zmalloc(struct fwd_stream)"
-                                                               " failed\n");
+               for (sm_id = 0; sm_id < nb_fwd_streams; sm_id++) {
+                       fwd_streams[sm_id] = rte_zmalloc("testpmd:"
+                               " struct fwd_stream", sizeof(struct fwd_stream),
+                               RTE_CACHE_LINE_SIZE);
+                       if (fwd_streams[sm_id] == NULL)
+                               rte_exit(EXIT_FAILURE, "rte_zmalloc"
+                                        "(struct fwd_stream) failed\n");
+               }
        }
 
        return 0;
@@ -925,6 +930,9 @@ pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs)
                        pktnb_stats[1] = pktnb_stats[0];
                        burst_stats[0] = nb_burst;
                        pktnb_stats[0] = nb_pkt;
+               } else if (nb_burst > burst_stats[1]) {
+                       burst_stats[1] = nb_burst;
+                       pktnb_stats[1] = nb_pkt;
                }
        }
        if (total_burst == 0)
@@ -1210,6 +1218,31 @@ launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
        }
 }
 
+/*
+ * Update the forward ports list.
+ */
+void
+update_fwd_ports(portid_t new_pid)
+{
+       unsigned int i;
+       unsigned int new_nb_fwd_ports = 0;
+       int move = 0;
+
+       for (i = 0; i < nb_fwd_ports; ++i) {
+               if (port_id_is_invalid(fwd_ports_ids[i], DISABLED_WARN))
+                       move = 1;
+               else if (move)
+                       fwd_ports_ids[new_nb_fwd_ports++] = fwd_ports_ids[i];
+               else
+                       new_nb_fwd_ports++;
+       }
+       if (new_pid < RTE_MAX_ETHPORTS)
+               fwd_ports_ids[new_nb_fwd_ports++] = new_pid;
+
+       nb_fwd_ports = new_nb_fwd_ports;
+       nb_cfg_ports = new_nb_fwd_ports;
+}
+
 /*
  * Launch packet forwarding configuration.
  */
@@ -1245,10 +1278,6 @@ start_packet_forwarding(int with_tx_first)
                return;
        }
 
-       if (init_fwd_streams() < 0) {
-               printf("Fail from init_fwd_streams()\n");
-               return;
-       }
 
        if(dcb_test) {
                for (i = 0; i < nb_fwd_ports; i++) {
@@ -1268,10 +1297,11 @@ start_packet_forwarding(int with_tx_first)
        }
        test_done = 0;
 
+       fwd_config_setup();
+
        if(!no_flush_rx)
                flush_fwd_rx_queues();
 
-       fwd_config_setup();
        pkt_fwd_config_display(&cur_fwd_config);
        rxtx_config_display();
 
@@ -1876,6 +1906,8 @@ attach_port(char *identifier)
 
        ports[pi].port_status = RTE_PORT_STOPPED;
 
+       update_fwd_ports(pi);
+
        printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
        printf("Done\n");
 }
@@ -1902,6 +1934,8 @@ detach_port(portid_t port_id)
 
        nb_ports = rte_eth_dev_count();
 
+       update_fwd_ports(RTE_MAX_ETHPORTS);
+
        printf("Port '%s' is detached. Now total ports is %d\n",
                        name, nb_ports);
        printf("Done\n");
@@ -1995,13 +2029,16 @@ check_all_ports_link_status(uint32_t port_mask)
 static void
 rmv_event_callback(void *arg)
 {
+       int org_no_link_check = no_link_check;
        struct rte_eth_dev *dev;
        portid_t port_id = (intptr_t)arg;
 
        RTE_ETH_VALID_PORTID_OR_RET(port_id);
        dev = &rte_eth_devices[port_id];
 
+       no_link_check = 1;
        stop_port(port_id);
+       no_link_check = org_no_link_check;
        close_port(port_id);
        printf("removing device %s\n", dev->device->name);
        if (rte_eal_dev_detach(dev->device))
@@ -2246,7 +2283,10 @@ uint8_t port_is_bonding_slave(portid_t slave_pid)
        struct rte_port *port;
 
        port = &ports[slave_pid];
-       return port->slave_flag;
+       if ((rte_eth_devices[slave_pid].data->dev_flags &
+           RTE_ETH_DEV_BONDED_SLAVE) || (port->slave_flag == 1))
+               return 1;
+       return 0;
 }
 
 const uint16_t vlan_tags[] = {
index 92e1607..b3b26d2 100644 (file)
@@ -599,6 +599,7 @@ void fwd_config_setup(void);
 void set_def_fwd_config(void);
 void reconfig(portid_t new_port_id, unsigned socket_id);
 int init_fwd_streams(void);
+void update_fwd_ports(portid_t new_pid);
 
 void port_mtu_set(portid_t port_id, uint16_t mtu);
 void port_reg_bit_display(portid_t port_id, uint32_t reg_off, uint8_t bit_pos);
@@ -726,6 +727,7 @@ enum print_warning {
        DISABLED_WARN
 };
 int port_id_is_invalid(portid_t port_id, enum print_warning warning);
+void print_valid_ports(void);
 int new_socket_id(unsigned int socket_id);
 
 queueid_t get_allowed_max_nb_rxq(portid_t *pid);
index 269e88e..46ffb11 100644 (file)
@@ -46,11 +46,6 @@ CONFIG_RTE_TOOLCHAIN_ICC=y
 #
 CONFIG_RTE_LIBRTE_KNI=n
 
-#
-# Vectorized PMD is not supported on 32-bit
-#
-CONFIG_RTE_IXGBE_INC_VECTOR=n
-
 #
 # Solarflare PMD is not supported on 32-bit
 #
index c363639..f8ce087 100644 (file)
@@ -29,6 +29,7 @@ CRC offload          = Y
 VLAN offload         = Y
 L3 checksum offload  = Y
 L4 checksum offload  = Y
+Timestamp offload    = Y
 Packet type parsing  = Y
 Rx descriptor status = Y
 Tx descriptor status = Y
index f9558da..50fced3 100644 (file)
@@ -108,7 +108,11 @@ Limitations
 - Port statistics through software counters only. Flow statistics are
   supported by hardware counters.
 - Hardware checksum RX offloads for VXLAN inner header are not supported yet.
-- Forked secondary process not supported.
+- For secondary process:
+
+  - Forked secondary process not supported.
+  - All mempools must be initialized before rte_eth_dev_start().
+
 - Flow pattern without any specific vlan will match for vlan packets as well:
 
   When VLAN spec is not specified in the pattern, the matching rule will be created with VLAN as a wild card.
index 99a3b76..67e574e 100644 (file)
@@ -34,14 +34,14 @@ NFP poll mode driver library
 Netronome's sixth generation of flow processors pack 216 programmable
 cores and over 100 hardware accelerators that uniquely combine packet,
 flow, security and content processing in a single device that scales
-up to 400 Gbps.
+up to 400-Gb/s.
 
 This document explains how to use DPDK with the Netronome Poll Mode
 Driver (PMD) supporting Netronome's Network Flow Processor 6xxx
 (NFP-6xxx) and Netronome's Flow Processor 4xxx (NFP-4xxx).
 
 NFP is a SRIOV capable device and the PMD driver supports the physical
-function (PF) and virtual functions (VFs).
+function (PF) and the virtual functions (VFs).
 
 Dependencies
 ------------
@@ -49,17 +49,18 @@ Dependencies
 Before using the Netronome's DPDK PMD some NFP configuration,
 which is not related to DPDK, is required. The system requires
 installation of **Netronome's BSP (Board Support Package)** along
-with some specific NFP firmware application. Netronome's NSP ABI
+with a specific NFP firmware application. Netronome's NSP ABI
 version should be 0.20 or higher.
 
 If you have a NFP device you should already have the code and
-documentation for doing all this configuration. Contact
+documentation for this configuration. Contact
 **support@netronome.com** to obtain the latest available firmware.
 
-The NFP Linux netdev kernel driver for VFs is part of vanilla kernel
-since kernel version 4.5, and support for the PF since kernel version
-4.11. Support for older kernels can be obtained on Github at
-**https://github.com/Netronome/nfp-drv-kmods** along with build
+The NFP Linux netdev kernel driver for VFs has been a part of the
+vanilla kernel since kernel version 4.5, and support for the PF
+since kernel version 4.11. Support for older kernels can be obtained
+on Github at
+**https://github.com/Netronome/nfp-drv-kmods** along with the build
 instructions.
 
 NFP PMD needs to be used along with UIO ``igb_uio`` or VFIO (``vfio-pci``)
@@ -70,15 +71,15 @@ Building the software
 
 Netronome's PMD code is provided in the **drivers/net/nfp** directory.
 Although NFP PMD has Netronome´s BSP dependencies, it is possible to
-compile it along with other DPDK PMDs even if no BSP was installed before.
+compile it along with other DPDK PMDs even if no BSP was installed previously.
 Of course, a DPDK app will require such a BSP installed for using the
 NFP PMD, along with a specific NFP firmware application.
 
-Default PMD configuration is at **common_linuxapp configuration** file:
+Default PMD configuration is at the **common_linuxapp configuration** file:
 
 - **CONFIG_RTE_LIBRTE_NFP_PMD=y**
 
-Once DPDK is built all the DPDK apps and examples include support for
+Once the DPDK is built all the DPDK apps and examples include support for
 the NFP PMD.
 
 
@@ -91,18 +92,18 @@ for details.
 Using the PF
 ------------
 
-NFP PMD has support for using the NFP PF as another DPDK port, but it does not
+NFP PMD supports using the NFP PF as another DPDK port, but it does not
 have any functionality for controlling VFs. In fact, it is not possible to use
 the PMD with the VFs if the PF is being used by DPDK, that is, with the NFP PF
-bound to ``igb_uio`` or ``vfio-pci`` kernel drivers. Future DPDK version will
+bound to ``igb_uio`` or ``vfio-pci`` kernel drivers. Future DPDK versions will
 have a PMD able to work with the PF and VFs at the same time and with the PF
 implementing VF management along with other PF-only functionalities/offloads.
 
-The PMD PF has extra work to do which will delay the DPDK app initialization
-like checking if a firmware is already available in the device, uploading the
-firmware if necessary, and configure the Link state properly when starting or
-stopping a PF port. Note that firmware upload is not always necessary which is
-the main delay for NFP PF PMD initialization.
+The PMD PF has extra work to do which will delay the DPDK app initialization.
+This additional effort could be checking if a firmware is already available in
+the device, uploading the firmware if necessary or configuring the Link state
+properly when starting or stopping a PF port. Note that firmware upload is not
+always necessary which is the main delay for NFP PF PMD initialization.
 
 Depending on the Netronome product installed in the system, firmware files
 should be available under ``/lib/firmware/netronome``. DPDK PMD supporting the
@@ -114,14 +115,14 @@ PF multiport support
 --------------------
 
 Some NFP cards support several physical ports with just one single PCI device.
-DPDK core is designed with the 1:1 relationship between PCI devices and DPDK
+The DPDK core is designed with a 1:1 relationship between PCI devices and DPDK
 ports, so NFP PMD PF support requires handling the multiport case specifically.
 During NFP PF initialization, the PMD will extract the information about the
 number of PF ports from the firmware and will create as many DPDK ports as
 needed.
 
 Because the unusual relationship between a single PCI device and several DPDK
-ports, there are some limitations when using more than one PF DPDK ports: there
+ports, there are some limitations when using more than one PF DPDK port: there
 is no support for RX interrupts and it is not possible either to use those PF
 ports with the device hotplug functionality.
 
@@ -136,7 +137,7 @@ System configuration
    get the drivers from the above Github repository and follow the instructions
    for building and installing it.
 
-   Virtual Functions need to be enabled before they can be used with the PMD.
+   VFs need to be enabled before they can be used with the PMD.
    Before enabling the VFs it is useful to obtain information about the
    current NFP PCI device detected by the system:
 
index fad7a7e..39a14ff 100644 (file)
@@ -1090,3 +1090,218 @@ Fixes in 17.11 LTS Release
 * vhost: handle virtually non-contiguous buffers in Rx-mrg (fixes CVE-2018-1059)
 * vhost: handle virtually non-contiguous buffers in Tx (fixes CVE-2018-1059)
 * vhost: introduce safe API for GPA translation (fixes CVE-2018-1059)
+
+17.11.3
+~~~~~~~
+
+* app/crypto-perf: check minimum lcore number
+* app/crypto-perf: fix excess crypto device error
+* app/crypto-perf: fix IOVA translation
+* app/crypto-perf: fix parameters copy
+* app/crypto-perf: use strcpy for allocated string
+* app/procinfo: fix strncpy usage in args parsing
+* app/testpmd: fix burst stats reporting
+* app/testpmd: fix command token
+* app/testpmd: fix empty list of RSS queues for flow
+* app/testpmd: fix forward ports Rx flush
+* app/testpmd: fix forward ports update
+* app/testpmd: fix removed device link status asking
+* app/testpmd: fix slave port detection
+* app/testpmd: fix synchronic port hotplug
+* app/testpmd: fix valid ports prints
+* bus/dpaa: fix resource leak
+* bus/fslmc: fix find device start condition
+* bus/pci: fix find device implementation
+* bus/vdev: fix finding device by name
+* cryptodev: fix supported size check
+* crypto/dpaa2_sec: fix HMAC supported digest sizes
+* crypto/scheduler: fix 64-bit mask of workers cores
+* crypto/scheduler: fix memory leak
+* crypto/scheduler: fix multicore rings re-use
+* crypto/scheduler: fix possible duplicated ring names
+* crypto/scheduler: set null pointer after freeing
+* crypto/zuc: batch ops with same transform
+* crypto/zuc: do not set default op status
+* doc: add timestamp offload to mlx5 features
+* doc: fix NFP NIC guide grammar
+* drivers/net: fix link autoneg value for virtual PMDs
+* eal/ppc: remove braces in SMP memory barrier macro
+* ethdev: fix port accessing after release
+* ethdev: fix queue start
+* event/dpaa2: remove link from info structure
+* examples/exception_path: limit core count to 64
+* examples/l2fwd-crypto: fix the default aead assignments
+* examples/performance-thread: fix return type of threads
+* examples/quota_watermark: fix return type of threads
+* hash: fix missing spinlock unlock in add key
+* ip_frag: fix double free of chained mbufs
+* kni: fix build on CentOS 7.4
+* kni: fix build on RHEL 7.5
+* mbuf: fix Tx checksum offload API doc
+* mbuf: improve tunnel Tx offloads API doc
+* mem: do not use physical addresses in IOVA as VA mode
+* mempool: fix leak when no objects are populated
+* mempool: fix virtual address population
+* mk: fix make defconfig on FreeBSD
+* net: add IPv6 header fields macros
+* net/bnx2x: do not cast function pointers as a policy
+* net/bnx2x: fix for PCI FLR after ungraceful exit
+* net/bnx2x: fix KR2 device check
+* net/bnx2x: fix memzone name overrun
+* net/bnxt: avoid invalid vnic id in set L2 Rx mask
+* net/bnxt: fix endianness of flag
+* net/bnxt: fix license header
+* net/bnxt: fix LRO disable
+* net/bnxt: fix Rx checksum flags
+* net/bnxt: fix Rx checksum flags for tunnel frames
+* net/bnxt: fix Rx drop setting
+* net/bnxt: fix Rx mbuf and agg ring leak in dev stop
+* net/bnxt: fix usage of vnic id
+* net/bnxt: free memory allocated for VF filters
+* net/bnxt: set padding flags in Rx descriptor
+* net/bonding: clear started state if start fails
+* net/bonding: export mode 4 slave info routine
+* net/bonding: fix primary slave port id storage type
+* net/bonding: fix setting VLAN ID on slave ports
+* net/bonding: fix slave activation simultaneously
+* net/bonding: free mempool used in mode 6
+* net/dpaa2: fix xstats
+* net/dpaa: fix oob access
+* net/enic: allocate stats DMA buffer upfront during probe
+* net/enic: fix crash on MTU update with non-setup queues
+* net/failsafe: fix duplicate event registration
+* net/failsafe: fix probe cleanup
+* net/failsafe: fix removed sub-device cleanup
+* net/i40e: fix DDP profile DEL operation
+* net/i40e: fix failing to disable FDIR Tx queue
+* net/i40e: fix intr callback unregister by adding retry
+* net/i40e: fix link status update
+* net/i40e: fix link update no wait
+* net/i40e: fix shifts of signed values
+* net/ixgbe: enable vector PMD for icc 32 bits
+* net/ixgbe: fix busy wait during checking link status
+* net/ixgbe: fix DCB configuration
+* net/ixgbe: fix intr callback unregister by adding retry
+* net/ixgbe: fix too many interrupts
+* net/liquidio: fix link state fetching during start
+* net/mlx4: avoid constant recreations in function
+* net/mlx4: fix a typo in header file
+* net/mlx4: fix broadcast Rx
+* net/mlx4: fix removal detection of stopped port
+* net/mlx4: fix RSS resource leak in case of error
+* net/mlx4: fix Rx resource leak in case of error
+* net/mlx4: fix single port configuration
+* net/mlx4: fix UDP flow rule limitation enforcement
+* net/mlx4: store RSS hash result in mbufs
+* net/mlx5: add data-plane debug message macro
+* net/mlx5: add missing function documentation
+* net/mlx5: add packet type index for TCP ack
+* net/mlx5: change device reference for secondary process
+* net/mlx5: change non failing function return values
+* net/mlx5: change pkt burst select function prototype
+* net/mlx5: change tunnel flow priority
+* net/mlx5: enforce RSS key length limitation
+* net/mlx5: fix allocation when no memory on device NUMA node
+* net/mlx5: fix build with clang on ARM
+* net/mlx5: fix calculation of Tx TSO inline room size
+* net/mlx5: fix close after start failure
+* net/mlx5: fix count in xstats
+* net/mlx5: fix CRC strip capability query
+* net/mlx5: fix disabling Tx packet inlining
+* net/mlx5: fix double free on error handling
+* net/mlx5: fix ethtool link setting call order
+* net/mlx5: fix existing file removal
+* net/mlx5: fix flow creation with a single target queue
+* net/mlx5: fix flow director conversion
+* net/mlx5: fix flow director drop rule deletion crash
+* net/mlx5: fix flow director mask
+* net/mlx5: fix flow director rule deletion crash
+* net/mlx5: fix flow validation
+* net/mlx5: fix icc build
+* net/mlx5: fix invalid flow item check
+* net/mlx5: fix IPv6 header fields
+* net/mlx5: fix link status behavior
+* net/mlx5: fix link status initialization
+* net/mlx5: fix link status to use wait to complete
+* net/mlx5: fix probe return value polarity
+* net/mlx5: fix reception of multiple MAC addresses
+* net/mlx5: fix resource leak in case of error
+* net/mlx5: fix RSS flow action bounds check
+* net/mlx5: fix RSS key length query
+* net/mlx5: fix secondary process mempool registration
+* net/mlx5: fix socket connection return value
+* net/mlx5: fix sriov flag
+* net/mlx5: fix synchronization on polling Rx completions
+* net/mlx5: improve flow error explanation
+* net/mlx5: map UAR address around huge pages
+* net/mlx5: mark parameters with unused attribute
+* net/mlx5: name parameters in function prototypes
+* net/mlx5: normalize function prototypes
+* net/mlx5: prefix all functions with mlx5
+* net/mlx5: refuse empty VLAN flow specification
+* net/mlx5: remove 32-bit support
+* net/mlx5: remove assert un-accessible from secondary process
+* net/mlx5: remove control path locks
+* net/mlx5: remove excessive data prefetch
+* net/mlx5: remove get priv internal function
+* net/mlx5: remove kernel version check
+* net/mlx5: remove useless empty lines
+* net/mlx5: setup RSS regardless of queue count
+* net/mlx5: split L3/L4 in flow director
+* net/mlx5: standardize on negative errno values
+* net/mlx5: use dynamic logging
+* net/mlx5: use port id in PMD log
+* net/mlx5: warn for unsuccessful memory registration
+* net/mlx: control netdevices through ioctl only
+* net/mrvl: fix crash when port is closed without starting
+* net/mrvl: fix Rx descriptors number
+* net/nfp: fix assigning port id in mbuf
+* net/nfp: fix barrier location
+* net/nfp: fix link speed capabilities
+* net/nfp: fix mbufs releasing when stop or close
+* net/octeontx: fix null pointer dereference
+* net/octeontx: fix uninitialized speed variable
+* net/octeontx: fix uninitialized variable in port open
+* net/qede/base: fix to support OVLAN mode
+* net/qede: fix alloc from socket 0
+* net/qede: fix device stop to remove primary MAC
+* net/qede: fix L2-handles used for RSS hash update
+* net/qede: fix memory alloc for multiple port reconfig
+* net/qede: fix missing loop index in Tx SG mode
+* net/qede: fix multicast filtering
+* net/qede: fix to prevent overwriting packet type
+* net/qede: fix unicast filter routine return code
+* net/qede: fix VF port creation sequence
+* net/sfc: add missing defines for SAL annotation
+* net/sfc: add missing Rx fini on RSS setup fail path
+* net/sfc/base: fix comparison always true warning
+* net/sfc: fix mbuf data alignment calculation
+* net/sfc: fix type of opaque pointer in perf profile handler
+* net/sfc: ignore spec bits not covered by mask
+* net/sfc: process RSS settings on Rx configure step
+* net/szedata2: fix format string for PCI address
+* net/szedata2: fix total stats
+* net/tap: fix icc build
+* net/vhost: fix crash when creating vdev dynamically
+* net/vhost: fix invalid state
+* net/vhost: initialise device as inactive
+* net/vmxnet3: set the queue shared buffer at start
+* nfp: allow for non-root user
+* nfp: restore the unlink operation
+* nfp: unlink the appropriate lock file
+* pci: remove duplicated symbol from map file
+* test/distributor: fix return type of thread function
+* test: fix memory flags test for low NUMA nodes number
+* test/mempool: fix autotest retry
+* test/pipeline: fix return type of stub miss
+* test/pipeline: fix type of table entry parameter
+* test/reorder: fix freeing mbuf twice
+* vfio: do not needlessly check for IOVA mode
+* vhost: check cmsg not null
+* vhost: fix compilation issue when vhost debug enabled
+* vhost: fix dead lock on closing in server mode
+* vhost: fix device cleanup at stop
+* vhost: fix message payload union in setting ring address
+* vhost: fix offset while mmaping log base address
+* vhost: fix realloc failure
+* vhost: fix ring index returned to master on stop
index 3816dba..a9c88dd 100644 (file)
@@ -475,6 +475,7 @@ fman_if_init(const struct device_node *dpa_node)
                if (!pool_node) {
                        FMAN_ERR(-ENXIO, "%s: bad fsl,bman-buffer-pools\n",
                                 dname);
+                       free(bpool);
                        goto err;
                }
                pname = pool_node->full_name;
@@ -482,6 +483,7 @@ fman_if_init(const struct device_node *dpa_node)
                prop = of_get_property(pool_node, "fsl,bpid", &proplen);
                if (!prop) {
                        FMAN_ERR(-EINVAL, "%s: no fsl,bpid\n", pname);
+                       free(bpool);
                        goto err;
                }
                assert(proplen == sizeof(*prop));
index 480857e..001e56c 100644 (file)
@@ -310,8 +310,9 @@ rte_fslmc_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
        struct rte_dpaa2_device *dev;
 
        TAILQ_FOREACH(dev, &rte_fslmc_bus.device_list, next) {
-               if (start && &dev->device == start) {
-                       start = NULL;  /* starting point found */
+               if (start != NULL) {
+                       if (&dev->device == start)
+                               start = NULL;  /* starting point found */
                        continue;
                }
 
index 104fdf9..6789748 100644 (file)
@@ -488,17 +488,20 @@ static struct rte_device *
 pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
                const void *data)
 {
-       struct rte_pci_device *dev;
+       const struct rte_pci_device *pstart;
+       struct rte_pci_device *pdev;
 
-       FOREACH_DEVICE_ON_PCIBUS(dev) {
-               if (start && &dev->device == start) {
-                       start = NULL; /* starting point found */
-                       continue;
-               }
-               if (cmp(&dev->device, data) == 0)
-                       return &dev->device;
+       if (start != NULL) {
+               pstart = RTE_DEV_TO_PCI_CONST(start);
+               pdev = TAILQ_NEXT(pstart, next);
+       } else {
+               pdev = TAILQ_FIRST(&rte_pci_bus.device_list);
+       }
+       while (pdev != NULL) {
+               if (cmp(&pdev->device, data) == 0)
+                       return &pdev->device;
+               pdev = TAILQ_NEXT(pdev, next);
        }
-
        return NULL;
 }
 
index d4a2996..c6af61d 100644 (file)
@@ -103,6 +103,9 @@ struct rte_pci_device {
  */
 #define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device)
 
+#define RTE_DEV_TO_PCI_CONST(ptr) \
+       container_of(ptr, const struct rte_pci_device, device)
+
 #define RTE_ETH_DEV_TO_PCI(eth_dev)    RTE_DEV_TO_PCI((eth_dev)->device)
 
 /** Any PCI device identifier (vendor, device, ...) */
index ba0ed7a..a0ffb53 100644 (file)
@@ -129,7 +129,7 @@ find_vdev(const char *name)
        TAILQ_FOREACH(dev, &vdev_device_list, next) {
                const char *devname = rte_vdev_device_name(dev);
 
-               if (!strncmp(devname, name, strlen(name)))
+               if (!strcmp(devname, name))
                        return dev;
        }
 
index 8e58380..ae8c0c3 100644 (file)
@@ -211,9 +211,9 @@ static const struct rte_cryptodev_capabilities dpaa2_sec_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 16,
+                                       .min = 1,
                                        .max = 16,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -232,9 +232,9 @@ static const struct rte_cryptodev_capabilities dpaa2_sec_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 20,
+                                       .min = 1,
                                        .max = 20,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -253,9 +253,9 @@ static const struct rte_cryptodev_capabilities dpaa2_sec_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 28,
+                                       .min = 1,
                                        .max = 28,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -274,9 +274,9 @@ static const struct rte_cryptodev_capabilities dpaa2_sec_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                               .min = 32,
-                                               .max = 32,
-                                               .increment = 0
+                                       .min = 1,
+                                       .max = 32,
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                                }, }
@@ -295,9 +295,9 @@ static const struct rte_cryptodev_capabilities dpaa2_sec_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 48,
+                                       .min = 1,
                                        .max = 48,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -316,9 +316,9 @@ static const struct rte_cryptodev_capabilities dpaa2_sec_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 64,
+                                       .min = 1,
                                        .max = 64,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
index 822ce27..59ece95 100644 (file)
@@ -119,8 +119,10 @@ update_scheduler_capability(struct scheduler_ctx *sched_ctx)
        struct rte_cryptodev_capabilities tmp_caps[256] = { {0} };
        uint32_t nb_caps = 0, i;
 
-       if (sched_ctx->capabilities)
+       if (sched_ctx->capabilities) {
                rte_free(sched_ctx->capabilities);
+               sched_ctx->capabilities = NULL;
+       }
 
        for (i = 0; i < sched_ctx->nb_slaves; i++) {
                struct rte_cryptodev_info dev_info;
@@ -490,8 +492,10 @@ rte_cryptodev_scheduler_load_user_scheduler(uint8_t scheduler_id,
        sched_ctx->ops.option_set = scheduler->ops->option_set;
        sched_ctx->ops.option_get = scheduler->ops->option_get;
 
-       if (sched_ctx->private_ctx)
+       if (sched_ctx->private_ctx) {
                rte_free(sched_ctx->private_ctx);
+               sched_ctx->private_ctx = NULL;
+       }
 
        if (sched_ctx->ops.create_private_ctx) {
                int ret = (*sched_ctx->ops.create_private_ctx)(dev);
index df22f2a..84917d1 100644 (file)
@@ -59,7 +59,7 @@ extern "C" {
 #endif
 
 /** Maximum number of multi-core worker cores */
-#define RTE_CRYPTODEV_SCHEDULER_MAX_NB_WORKER_CORES    (64)
+#define RTE_CRYPTODEV_SCHEDULER_MAX_NB_WORKER_CORES    (RTE_MAX_LCORE - 1)
 
 /** Round-robin scheduling mode string */
 #define SCHEDULER_MODE_NAME_ROUND_ROBIN                round-robin
index 0cd5bce..14c33b9 100644 (file)
@@ -49,8 +49,8 @@ struct mc_scheduler_ctx {
        uint32_t num_workers;             /**< Number of workers polling */
        uint32_t stop_signal;
 
-       struct rte_ring *sched_enq_ring[RTE_CRYPTODEV_SCHEDULER_MAX_NB_WORKER_CORES];
-       struct rte_ring *sched_deq_ring[RTE_CRYPTODEV_SCHEDULER_MAX_NB_WORKER_CORES];
+       struct rte_ring *sched_enq_ring[RTE_MAX_LCORE];
+       struct rte_ring *sched_deq_ring[RTE_MAX_LCORE];
 };
 
 struct mc_scheduler_qp_ctx {
@@ -356,11 +356,13 @@ static int
 scheduler_create_private_ctx(struct rte_cryptodev *dev)
 {
        struct scheduler_ctx *sched_ctx = dev->data->dev_private;
-       struct mc_scheduler_ctx *mc_ctx;
+       struct mc_scheduler_ctx *mc_ctx = NULL;
        uint16_t i;
 
-       if (sched_ctx->private_ctx)
+       if (sched_ctx->private_ctx) {
                rte_free(sched_ctx->private_ctx);
+               sched_ctx->private_ctx = NULL;
+       }
 
        mc_ctx = rte_zmalloc_socket(NULL, sizeof(struct mc_scheduler_ctx), 0,
                        rte_socket_id());
@@ -373,25 +375,48 @@ scheduler_create_private_ctx(struct rte_cryptodev *dev)
        for (i = 0; i < sched_ctx->nb_wc; i++) {
                char r_name[16];
 
-               snprintf(r_name, sizeof(r_name), MC_SCHED_ENQ_RING_NAME_PREFIX "%u", i);
-               mc_ctx->sched_enq_ring[i] = rte_ring_create(r_name, PER_SLAVE_BUFF_SIZE,
-                                       rte_socket_id(), RING_F_SC_DEQ | RING_F_SP_ENQ);
+               snprintf(r_name, sizeof(r_name), MC_SCHED_ENQ_RING_NAME_PREFIX
+                               "%u_%u", dev->data->dev_id, i);
+               mc_ctx->sched_enq_ring[i] = rte_ring_lookup(r_name);
                if (!mc_ctx->sched_enq_ring[i]) {
-                       CS_LOG_ERR("Cannot create ring for worker %u", i);
-                       return -1;
+                       mc_ctx->sched_enq_ring[i] = rte_ring_create(r_name,
+                                               PER_SLAVE_BUFF_SIZE,
+                                               rte_socket_id(),
+                                               RING_F_SC_DEQ | RING_F_SP_ENQ);
+                       if (!mc_ctx->sched_enq_ring[i]) {
+                               CS_LOG_ERR("Cannot create ring for worker %u",
+                                          i);
+                               goto exit;
+                       }
                }
-               snprintf(r_name, sizeof(r_name), MC_SCHED_DEQ_RING_NAME_PREFIX "%u", i);
-               mc_ctx->sched_deq_ring[i] = rte_ring_create(r_name, PER_SLAVE_BUFF_SIZE,
-                                       rte_socket_id(), RING_F_SC_DEQ | RING_F_SP_ENQ);
+               snprintf(r_name, sizeof(r_name), MC_SCHED_DEQ_RING_NAME_PREFIX
+                               "%u_%u", dev->data->dev_id, i);
+               mc_ctx->sched_deq_ring[i] = rte_ring_lookup(r_name);
                if (!mc_ctx->sched_deq_ring[i]) {
-                       CS_LOG_ERR("Cannot create ring for worker %u", i);
-                       return -1;
+                       mc_ctx->sched_deq_ring[i] = rte_ring_create(r_name,
+                                               PER_SLAVE_BUFF_SIZE,
+                                               rte_socket_id(),
+                                               RING_F_SC_DEQ | RING_F_SP_ENQ);
+                       if (!mc_ctx->sched_deq_ring[i]) {
+                               CS_LOG_ERR("Cannot create ring for worker %u",
+                                          i);
+                               goto exit;
+                       }
                }
        }
 
        sched_ctx->private_ctx = (void *)mc_ctx;
 
        return 0;
+
+exit:
+       for (i = 0; i < sched_ctx->nb_wc; i++) {
+               rte_ring_free(mc_ctx->sched_enq_ring[i]);
+               rte_ring_free(mc_ctx->sched_deq_ring[i]);
+       }
+       rte_free(mc_ctx);
+
+       return -1;
 }
 
 struct rte_cryptodev_scheduler_ops scheduler_mc_ops = {
index 1dd1bc3..4874191 100644 (file)
@@ -362,8 +362,10 @@ scheduler_create_private_ctx(struct rte_cryptodev *dev)
        struct scheduler_ctx *sched_ctx = dev->data->dev_private;
        struct psd_scheduler_ctx *psd_ctx;
 
-       if (sched_ctx->private_ctx)
+       if (sched_ctx->private_ctx) {
                rte_free(sched_ctx->private_ctx);
+               sched_ctx->private_ctx = NULL;
+       }
 
        psd_ctx = rte_zmalloc_socket(NULL, sizeof(struct psd_scheduler_ctx), 0,
                        rte_socket_id());
index acdf636..fcba119 100644 (file)
@@ -48,7 +48,8 @@ struct scheduler_init_params {
        uint32_t nb_slaves;
        enum rte_cryptodev_scheduler_mode mode;
        uint32_t enable_ordering;
-       uint64_t wcmask;
+       uint16_t wc_pool[RTE_MAX_LCORE];
+       uint16_t nb_wc;
        char slave_names[RTE_CRYPTODEV_SCHEDULER_MAX_NB_SLAVES]
                        [RTE_CRYPTODEV_SCHEDULER_NAME_MAX_LEN];
 };
@@ -114,10 +115,6 @@ cryptodev_scheduler_create(const char *name,
                return -EFAULT;
        }
 
-       if (init_params->wcmask != 0)
-               RTE_LOG(INFO, PMD, "  workers core mask = %"PRIx64"\n",
-                       init_params->wcmask);
-
        dev->driver_id = cryptodev_driver_id;
        dev->dev_ops = rte_crypto_scheduler_pmd_ops;
 
@@ -128,15 +125,12 @@ cryptodev_scheduler_create(const char *name,
        if (init_params->mode == CDEV_SCHED_MODE_MULTICORE) {
                uint16_t i;
 
-               sched_ctx->nb_wc = 0;
+               sched_ctx->nb_wc = init_params->nb_wc;
 
-               for (i = 0; i < RTE_CRYPTODEV_SCHEDULER_MAX_NB_WORKER_CORES; i++) {
-                       if (init_params->wcmask & (1ULL << i)) {
-                               sched_ctx->wc_pool[sched_ctx->nb_wc++] = i;
-                               RTE_LOG(INFO, PMD,
-                                       "  Worker core[%u]=%u added\n",
-                                       sched_ctx->nb_wc-1, i);
-                       }
+               for (i = 0; i < sched_ctx->nb_wc; i++) {
+                       sched_ctx->wc_pool[i] = init_params->wc_pool[i];
+                       RTE_LOG(INFO, PMD, "  Worker core[%u]=%u added\n",
+                               i, sched_ctx->wc_pool[i]);
                }
        }
 
@@ -260,9 +254,47 @@ static int
 parse_coremask_arg(const char *key __rte_unused,
                const char *value, void *extra_args)
 {
+       int i, j, val;
+       uint16_t idx = 0;
+       char c;
        struct scheduler_init_params *params = extra_args;
 
-       params->wcmask = strtoull(value, NULL, 16);
+       params->nb_wc = 0;
+
+       if (value == NULL)
+               return -1;
+       /* Remove all blank characters ahead and after .
+        * Remove 0x/0X if exists.
+        */
+       while (isblank(*value))
+               value++;
+       if (value[0] == '0' && ((value[1] == 'x') || (value[1] == 'X')))
+               value += 2;
+       i = strlen(value);
+       while ((i > 0) && isblank(value[i - 1]))
+               i--;
+
+       if (i == 0)
+               return -1;
+
+       for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
+               c = value[i];
+               if (isxdigit(c) == 0) {
+                       /* invalid characters */
+                       return -1;
+               }
+               if (isdigit(c))
+                       val = c - '0';
+               else if (isupper(c))
+                       val = c - 'A' + 10;
+               else
+                       val = c - 'a' + 10;
+
+               for (j = 0; j < 4 && idx < RTE_MAX_LCORE; j++, idx++) {
+                       if ((1 << j) & val)
+                               params->wc_pool[params->nb_wc++] = idx;
+               }
+       }
 
        return 0;
 }
@@ -274,7 +306,7 @@ parse_corelist_arg(const char *key __rte_unused,
 {
        struct scheduler_init_params *params = extra_args;
 
-       params->wcmask = 0ULL;
+       params->nb_wc = 0;
 
        const char *token = value;
 
@@ -282,7 +314,11 @@ parse_corelist_arg(const char *key __rte_unused,
                char *rval;
                unsigned int core = strtoul(token, &rval, 10);
 
-               params->wcmask |= 1ULL << core;
+               if (core >= RTE_MAX_LCORE) {
+                       CS_LOG_ERR("Invalid worker core %u, should be smaller "
+                                  "than %u.\n", core, RTE_MAX_LCORE);
+               }
+               params->wc_pool[params->nb_wc++] = (uint16_t)core;
                token = (const char *)rval;
                if (token[0] == '\0')
                        break;
index d9b5235..75433db 100644 (file)
@@ -74,6 +74,7 @@ scheduler_attach_init_slave(struct rte_cryptodev *dev)
                                sched_ctx->init_slave_names[i]);
 
                rte_free(sched_ctx->init_slave_names[i]);
+               sched_ctx->init_slave_names[i] = NULL;
 
                sched_ctx->nb_init_slaves -= 1;
        }
@@ -289,11 +290,15 @@ scheduler_pmd_close(struct rte_cryptodev *dev)
                }
        }
 
-       if (sched_ctx->private_ctx)
+       if (sched_ctx->private_ctx) {
                rte_free(sched_ctx->private_ctx);
+               sched_ctx->private_ctx = NULL;
+       }
 
-       if (sched_ctx->capabilities)
+       if (sched_ctx->capabilities) {
                rte_free(sched_ctx->capabilities);
+               sched_ctx->capabilities = NULL;
+       }
 
        return 0;
 }
index e606716..bab4334 100644 (file)
@@ -89,7 +89,7 @@ struct scheduler_ctx {
 
        char name[RTE_CRYPTODEV_SCHEDULER_NAME_MAX_LEN];
        char description[RTE_CRYPTODEV_SCHEDULER_DESC_MAX_LEN];
-       uint16_t wc_pool[RTE_CRYPTODEV_SCHEDULER_MAX_NB_WORKER_CORES];
+       uint16_t wc_pool[RTE_MAX_LCORE];
        uint16_t nb_wc;
 
        char *init_slave_names[RTE_CRYPTODEV_SCHEDULER_MAX_NB_SLAVES];
index 590224b..8b13be9 100644 (file)
@@ -40,7 +40,7 @@
 
 #include "rte_zuc_pmd_private.h"
 
-#define ZUC_MAX_BURST 8
+#define ZUC_MAX_BURST 4
 #define BYTE_LEN 8
 
 static uint8_t cryptodev_driver_id;
@@ -196,10 +196,10 @@ zuc_get_session(struct zuc_qp *qp, struct rte_crypto_op *op)
        return sess;
 }
 
-/** Encrypt/decrypt mbufs with same cipher key. */
+/** Encrypt/decrypt mbufs. */
 static uint8_t
 process_zuc_cipher_op(struct rte_crypto_op **ops,
-               struct zuc_session *session,
+               struct zuc_session **sessions,
                uint8_t num_ops)
 {
        unsigned i;
@@ -208,6 +208,7 @@ process_zuc_cipher_op(struct rte_crypto_op **ops,
        uint8_t *iv[ZUC_MAX_BURST];
        uint32_t num_bytes[ZUC_MAX_BURST];
        uint8_t *cipher_keys[ZUC_MAX_BURST];
+       struct zuc_session *sess;
 
        for (i = 0; i < num_ops; i++) {
                if (((ops[i]->sym->cipher.data.length % BYTE_LEN) != 0)
@@ -218,6 +219,8 @@ process_zuc_cipher_op(struct rte_crypto_op **ops,
                        break;
                }
 
+               sess = sessions[i];
+
 #ifdef RTE_LIBRTE_PMD_ZUC_DEBUG
                if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) ||
                                (ops[i]->sym->m_dst != NULL &&
@@ -239,10 +242,10 @@ process_zuc_cipher_op(struct rte_crypto_op **ops,
                        rte_pktmbuf_mtod(ops[i]->sym->m_src, uint8_t *) +
                                (ops[i]->sym->cipher.data.offset >> 3);
                iv[i] = rte_crypto_op_ctod_offset(ops[i], uint8_t *,
-                               session->cipher_iv_offset);
+                               sess->cipher_iv_offset);
                num_bytes[i] = ops[i]->sym->cipher.data.length >> 3;
 
-               cipher_keys[i] = session->pKey_cipher;
+               cipher_keys[i] = sess->pKey_cipher;
 
                processed_ops++;
        }
@@ -253,10 +256,10 @@ process_zuc_cipher_op(struct rte_crypto_op **ops,
        return processed_ops;
 }
 
-/** Generate/verify hash from mbufs with same hash key. */
+/** Generate/verify hash from mbufs. */
 static int
 process_zuc_hash_op(struct zuc_qp *qp, struct rte_crypto_op **ops,
-               struct zuc_session *session,
+               struct zuc_session **sessions,
                uint8_t num_ops)
 {
        unsigned i;
@@ -265,6 +268,7 @@ process_zuc_hash_op(struct zuc_qp *qp, struct rte_crypto_op **ops,
        uint32_t *dst;
        uint32_t length_in_bits;
        uint8_t *iv;
+       struct zuc_session *sess;
 
        for (i = 0; i < num_ops; i++) {
                /* Data must be byte aligned */
@@ -274,17 +278,19 @@ process_zuc_hash_op(struct zuc_qp *qp, struct rte_crypto_op **ops,
                        break;
                }
 
+               sess = sessions[i];
+
                length_in_bits = ops[i]->sym->auth.data.length;
 
                src = rte_pktmbuf_mtod(ops[i]->sym->m_src, uint8_t *) +
                                (ops[i]->sym->auth.data.offset >> 3);
                iv = rte_crypto_op_ctod_offset(ops[i], uint8_t *,
-                               session->auth_iv_offset);
+                               sess->auth_iv_offset);
 
-               if (session->auth_op == RTE_CRYPTO_AUTH_OP_VERIFY) {
+               if (sess->auth_op == RTE_CRYPTO_AUTH_OP_VERIFY) {
                        dst = (uint32_t *)qp->temp_digest;
 
-                       sso_zuc_eia3_1_buffer(session->pKey_hash,
+                       sso_zuc_eia3_1_buffer(sess->pKey_hash,
                                        iv, src,
                                        length_in_bits, dst);
                        /* Verify digest. */
@@ -294,7 +300,7 @@ process_zuc_hash_op(struct zuc_qp *qp, struct rte_crypto_op **ops,
                } else  {
                        dst = (uint32_t *)ops[i]->sym->auth.digest.data;
 
-                       sso_zuc_eia3_1_buffer(session->pKey_hash,
+                       sso_zuc_eia3_1_buffer(sess->pKey_hash,
                                        iv, src,
                                        length_in_bits, dst);
                }
@@ -304,33 +310,34 @@ process_zuc_hash_op(struct zuc_qp *qp, struct rte_crypto_op **ops,
        return processed_ops;
 }
 
-/** Process a batch of crypto ops which shares the same session. */
+/** Process a batch of crypto ops which shares the same operation type. */
 static int
-process_ops(struct rte_crypto_op **ops, struct zuc_session *session,
+process_ops(struct rte_crypto_op **ops, enum zuc_operation op_type,
+               struct zuc_session **sessions,
                struct zuc_qp *qp, uint8_t num_ops,
                uint16_t *accumulated_enqueued_ops)
 {
        unsigned i;
        unsigned enqueued_ops, processed_ops;
 
-       switch (session->op) {
+       switch (op_type) {
        case ZUC_OP_ONLY_CIPHER:
                processed_ops = process_zuc_cipher_op(ops,
-                               session, num_ops);
+                               sessions, num_ops);
                break;
        case ZUC_OP_ONLY_AUTH:
-               processed_ops = process_zuc_hash_op(qp, ops, session,
+               processed_ops = process_zuc_hash_op(qp, ops, sessions,
                                num_ops);
                break;
        case ZUC_OP_CIPHER_AUTH:
-               processed_ops = process_zuc_cipher_op(ops, session,
+               processed_ops = process_zuc_cipher_op(ops, sessions,
                                num_ops);
-               process_zuc_hash_op(qp, ops, session, processed_ops);
+               process_zuc_hash_op(qp, ops, sessions, processed_ops);
                break;
        case ZUC_OP_AUTH_CIPHER:
-               processed_ops = process_zuc_hash_op(qp, ops, session,
+               processed_ops = process_zuc_hash_op(qp, ops, sessions,
                                num_ops);
-               process_zuc_cipher_op(ops, session, processed_ops);
+               process_zuc_cipher_op(ops, sessions, processed_ops);
                break;
        default:
                /* Operation not supported. */
@@ -346,10 +353,10 @@ process_ops(struct rte_crypto_op **ops, struct zuc_session *session,
                        ops[i]->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
                /* Free session if a session-less crypto op. */
                if (ops[i]->sess_type == RTE_CRYPTO_OP_SESSIONLESS) {
-                       memset(session, 0, sizeof(struct zuc_session));
+                       memset(sessions[i], 0, sizeof(struct zuc_session));
                        memset(ops[i]->sym->session, 0,
                                        rte_cryptodev_get_header_session_size());
-                       rte_mempool_put(qp->sess_mp, session);
+                       rte_mempool_put(qp->sess_mp, sessions[i]);
                        rte_mempool_put(qp->sess_mp, ops[i]->sym->session);
                        ops[i]->sym->session = NULL;
                }
@@ -370,7 +377,10 @@ zuc_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops,
        struct rte_crypto_op *c_ops[ZUC_MAX_BURST];
        struct rte_crypto_op *curr_c_op;
 
-       struct zuc_session *prev_sess = NULL, *curr_sess = NULL;
+       struct zuc_session *curr_sess;
+       struct zuc_session *sessions[ZUC_MAX_BURST];
+       enum zuc_operation prev_zuc_op = ZUC_OP_NOT_SUPPORTED;
+       enum zuc_operation curr_zuc_op;
        struct zuc_qp *qp = queue_pair;
        unsigned i;
        uint8_t burst_size = 0;
@@ -380,9 +390,6 @@ zuc_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops,
        for (i = 0; i < nb_ops; i++) {
                curr_c_op = ops[i];
 
-               /* Set status as enqueued (not processed yet) by default. */
-               curr_c_op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
-
                curr_sess = zuc_get_session(qp, curr_c_op);
                if (unlikely(curr_sess == NULL ||
                                curr_sess->op == ZUC_OP_NOT_SUPPORTED)) {
@@ -391,50 +398,63 @@ zuc_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops,
                        break;
                }
 
-               /* Batch ops that share the same session. */
-               if (prev_sess == NULL) {
-                       prev_sess = curr_sess;
-                       c_ops[burst_size++] = curr_c_op;
-               } else if (curr_sess == prev_sess) {
-                       c_ops[burst_size++] = curr_c_op;
+               curr_zuc_op = curr_sess->op;
+
+               /*
+                * Batch ops that share the same operation type
+                * (cipher only, auth only...).
+                */
+               if (burst_size == 0) {
+                       prev_zuc_op = curr_zuc_op;
+                       c_ops[0] = curr_c_op;
+                       sessions[0] = curr_sess;
+                       burst_size++;
+               } else if (curr_zuc_op == prev_zuc_op) {
+                       c_ops[burst_size] = curr_c_op;
+                       sessions[burst_size] = curr_sess;
+                       burst_size++;
                        /*
                         * When there are enough ops to process in a batch,
                         * process them, and start a new batch.
                         */
                        if (burst_size == ZUC_MAX_BURST) {
-                               processed_ops = process_ops(c_ops, prev_sess,
-                                               qp, burst_size, &enqueued_ops);
+                               processed_ops = process_ops(c_ops, curr_zuc_op,
+                                               sessions, qp, burst_size,
+                                               &enqueued_ops);
                                if (processed_ops < burst_size) {
                                        burst_size = 0;
                                        break;
                                }
 
                                burst_size = 0;
-                               prev_sess = NULL;
                        }
                } else {
                        /*
-                        * Different session, process the ops
-                        * of the previous session.
+                        * Different operation type, process the ops
+                        * of the previous type.
                         */
-                       processed_ops = process_ops(c_ops, prev_sess,
-                                       qp, burst_size, &enqueued_ops);
+                       processed_ops = process_ops(c_ops, prev_zuc_op,
+                                       sessions, qp, burst_size,
+                                       &enqueued_ops);
                        if (processed_ops < burst_size) {
                                burst_size = 0;
                                break;
                        }
 
                        burst_size = 0;
-                       prev_sess = curr_sess;
+                       prev_zuc_op = curr_zuc_op;
 
-                       c_ops[burst_size++] = curr_c_op;
+                       c_ops[0] = curr_c_op;
+                       sessions[0] = curr_sess;
+                       burst_size++;
                }
        }
 
        if (burst_size != 0) {
-               /* Process the crypto ops of the last session. */
-               processed_ops = process_ops(c_ops, prev_sess,
-                               qp, burst_size, &enqueued_ops);
+               /* Process the crypto ops of the last operation type. */
+               processed_ops = process_ops(c_ops, prev_zuc_op,
+                               sessions, qp, burst_size,
+                               &enqueued_ops);
        }
 
        qp->qp_stats.enqueue_err_count += nb_ops - enqueued_ops;
index eeeb231..56ea124 100644 (file)
@@ -489,7 +489,6 @@ dpaa2_eventdev_port_unlink(struct rte_eventdev *dev, void *port,
                dpio_remove_static_dequeue_channel(dpaa2_portal->dpio_dev->dpio,
                                        0, dpaa2_portal->dpio_dev->token,
                        evq_info->dpcon->dpcon_id);
-               evq_info->link = 0;
        }
 
        return (int)nb_unlinks;
@@ -510,8 +509,6 @@ dpaa2_eventdev_port_link(struct rte_eventdev *dev, void *port,
 
        for (i = 0; i < nb_links; i++) {
                evq_info = &priv->evq_info[queues[i]];
-               if (evq_info->link)
-                       continue;
 
                ret = dpio_add_static_dequeue_channel(
                        dpaa2_portal->dpio_dev->dpio,
@@ -526,7 +523,6 @@ dpaa2_eventdev_port_link(struct rte_eventdev *dev, void *port,
                qbman_swp_push_set(dpaa2_portal->dpio_dev->sw_portal,
                                   channel_index, 1);
                evq_info->dpcon->channel_index = channel_index;
-               evq_info->link = 1;
        }
 
        RTE_SET_USED(priorities);
@@ -540,7 +536,6 @@ err:
                dpio_remove_static_dequeue_channel(dpaa2_portal->dpio_dev->dpio,
                                        0, dpaa2_portal->dpio_dev->token,
                        evq_info->dpcon->dpcon_id);
-               evq_info->link = 0;
        }
        return ret;
 }
index ae8e07e..5b9c80e 100644 (file)
@@ -100,7 +100,6 @@ struct evq_info_t {
        struct dpaa2_dpci_dev *dpci;
        /* Configuration provided by the user */
        uint32_t event_queue_cfg;
-       uint8_t link;
 };
 
 struct dpaa2_eventdev {
index d515408..5a101ce 100644 (file)
@@ -124,7 +124,7 @@ static struct rte_eth_link pmd_link = {
        .link_speed = ETH_SPEED_NUM_10G,
        .link_duplex = ETH_LINK_FULL_DUPLEX,
        .link_status = ETH_LINK_DOWN,
-       .link_autoneg = ETH_LINK_AUTONEG
+       .link_autoneg = ETH_LINK_FIXED,
 };
 
 static uint16_t
index 9394f6c..98b08d1 100644 (file)
@@ -170,10 +170,10 @@ bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size, struct bnx2x_dma *dma,
 
        dma->sc = sc;
        if (IS_PF(sc))
-               sprintf(mz_name, "bnx2x%d_%s_%" PRIx64, SC_ABS_FUNC(sc), msg,
+               snprintf(mz_name, sizeof(mz_name), "bnx2x%d_%s_%" PRIx64, SC_ABS_FUNC(sc), msg,
                        rte_get_timer_cycles());
        else
-               sprintf(mz_name, "bnx2x%d_%s_%" PRIx64, sc->pcie_device, msg,
+               snprintf(mz_name, sizeof(mz_name), "bnx2x%d_%s_%" PRIx64, sc->pcie_device, msg,
                        rte_get_timer_cycles());
 
        /* Caller must take care that strlen(mz_name) < RTE_MEMZONE_NAMESIZE */
@@ -8289,16 +8289,6 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
                        REG_WR(sc, PXP2_REG_PGL_ADDR_90_F1, 0);
                        REG_WR(sc, PXP2_REG_PGL_ADDR_94_F1, 0);
                }
-
-/*
- * Enable internal target-read (in case we are probed after PF
- * FLR). Must be done prior to any BAR read access. Only for
- * 57712 and up
- */
-               if (!CHIP_IS_E1x(sc)) {
-                       REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ,
-                              1);
-               }
        }
 
        /* get the nvram size */
@@ -9675,7 +9665,17 @@ int bnx2x_attach(struct bnx2x_softc *sc)
        bnx2x_init_rte(sc);
 
        if (IS_PF(sc)) {
-/* get device info and set params */
+               /* Enable internal target-read (in case we are probed after PF
+                * FLR). Must be done prior to any BAR read access. Only for
+                * 57712 and up
+                */
+               if (!CHIP_IS_E1x(sc)) {
+                       REG_WR(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ,
+                              1);
+                       DELAY(200000);
+               }
+
+               /* get device info and set params */
                if (bnx2x_get_device_info(sc) != 0) {
                        PMD_DRV_LOG(NOTICE, "getting device info");
                        return -ENXIO;
@@ -9684,7 +9684,7 @@ int bnx2x_attach(struct bnx2x_softc *sc)
 /* get phy settings from shmem and 'and' against admin settings */
                bnx2x_get_phy_info(sc);
        } else {
-/* Left mac of VF unfilled, PF should set it for VF */
+               /* Left mac of VF unfilled, PF should set it for VF */
                memset(sc->link_params.mac_addr, 0, ETHER_ADDR_LEN);
        }
 
index 9d0f313..74e1bea 100644 (file)
@@ -4143,9 +4143,9 @@ static void elink_sfp_e3_set_transmitter(struct elink_params *params,
                elink_set_cfg_pin(sc, cfg_pin + 3, tx_en ^ 1);
 }
 
-static void elink_warpcore_config_init(struct elink_phy *phy,
-                                      struct elink_params *params,
-                                      struct elink_vars *vars)
+static uint8_t elink_warpcore_config_init(struct elink_phy *phy,
+                                         struct elink_params *params,
+                                         struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint32_t serdes_net_if;
@@ -4222,7 +4222,7 @@ static void elink_warpcore_config_init(struct elink_phy *phy,
                case PORT_HW_CFG_NET_SERDES_IF_DXGXS:
                        if (vars->line_speed != ELINK_SPEED_20000) {
                                PMD_DRV_LOG(DEBUG, "Speed not supported yet");
-                               return;
+                               return 0;
                        }
                        PMD_DRV_LOG(DEBUG, "Setting 20G DXGXS");
                        elink_warpcore_set_20G_DXGXS(sc, phy, lane);
@@ -4242,13 +4242,15 @@ static void elink_warpcore_config_init(struct elink_phy *phy,
                        PMD_DRV_LOG(DEBUG,
                                    "Unsupported Serdes Net Interface 0x%x",
                                    serdes_net_if);
-                       return;
+                       return 0;
                }
        }
 
        /* Take lane out of reset after configuration is finished */
        elink_warpcore_reset_lane(sc, phy, 0);
        PMD_DRV_LOG(DEBUG, "Exit config init");
+
+       return 0;
 }
 
 static void elink_warpcore_link_reset(struct elink_phy *phy,
@@ -5226,9 +5228,9 @@ static elink_status_t elink_get_link_speed_duplex(struct elink_phy *phy,
        return ELINK_STATUS_OK;
 }
 
-static elink_status_t elink_link_settings_status(struct elink_phy *phy,
-                                                struct elink_params *params,
-                                                struct elink_vars *vars)
+static uint8_t elink_link_settings_status(struct elink_phy *phy,
+                                         struct elink_params *params,
+                                         struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
 
@@ -5299,9 +5301,9 @@ static elink_status_t elink_link_settings_status(struct elink_phy *phy,
        return rc;
 }
 
-static elink_status_t elink_warpcore_read_status(struct elink_phy *phy,
-                                                struct elink_params *params,
-                                                struct elink_vars *vars)
+static uint8_t elink_warpcore_read_status(struct elink_phy *phy,
+                                         struct elink_params *params,
+                                         struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t lane;
@@ -5520,9 +5522,9 @@ static void elink_set_preemphasis(struct elink_phy *phy,
        }
 }
 
-static void elink_xgxs_config_init(struct elink_phy *phy,
-                                  struct elink_params *params,
-                                  struct elink_vars *vars)
+static uint8_t elink_xgxs_config_init(struct elink_phy *phy,
+                                     struct elink_params *params,
+                                     struct elink_vars *vars)
 {
        uint8_t enable_cl73 = (ELINK_SINGLE_MEDIA_DIRECT(params) ||
                               (params->loopback_mode == ELINK_LOOPBACK_XGXS));
@@ -5567,6 +5569,8 @@ static void elink_xgxs_config_init(struct elink_phy *phy,
 
                elink_initialize_sgmii_process(phy, params, vars);
        }
+
+       return 0;
 }
 
 static elink_status_t elink_prepare_xgxs(struct elink_phy *phy,
@@ -5751,8 +5755,8 @@ static void elink_link_int_ack(struct elink_params *params,
        }
 }
 
-static elink_status_t elink_format_ver(uint32_t num, uint8_t * str,
-                                      uint16_t * len)
+static uint8_t elink_format_ver(uint32_t num, uint8_t * str,
+                               uint16_t * len)
 {
        uint8_t *str_ptr = str;
        uint32_t mask = 0xf0000000;
@@ -5790,8 +5794,8 @@ static elink_status_t elink_format_ver(uint32_t num, uint8_t * str,
        return ELINK_STATUS_OK;
 }
 
-static elink_status_t elink_null_format_ver(__rte_unused uint32_t spirom_ver,
-                                           uint8_t * str, uint16_t * len)
+static uint8_t elink_null_format_ver(__rte_unused uint32_t spirom_ver,
+                                    uint8_t * str, uint16_t * len)
 {
        str[0] = '\0';
        (*len)--;
@@ -6802,9 +6806,9 @@ static void elink_8073_specific_func(struct elink_phy *phy,
        }
 }
 
-static elink_status_t elink_8073_config_init(struct elink_phy *phy,
-                                            struct elink_params *params,
-                                            struct elink_vars *vars)
+static uint8_t elink_8073_config_init(struct elink_phy *phy,
+                                     struct elink_params *params,
+                                     struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val = 0, tmp1;
@@ -7097,9 +7101,9 @@ static void elink_8073_link_reset(__rte_unused struct elink_phy *phy,
 /******************************************************************/
 /*                     BNX2X8705 PHY SECTION                     */
 /******************************************************************/
-static elink_status_t elink_8705_config_init(struct elink_phy *phy,
-                                            struct elink_params *params,
-                                            __rte_unused struct elink_vars
+static uint8_t elink_8705_config_init(struct elink_phy *phy,
+                                     struct elink_params *params,
+                                     __rte_unused struct elink_vars
                                             *vars)
 {
        struct bnx2x_softc *sc = params->sc;
@@ -8403,9 +8407,9 @@ static uint8_t elink_8706_config_init(struct elink_phy *phy,
        return ELINK_STATUS_OK;
 }
 
-static elink_status_t elink_8706_read_status(struct elink_phy *phy,
-                                            struct elink_params *params,
-                                            struct elink_vars *vars)
+static uint8_t elink_8706_read_status(struct elink_phy *phy,
+                                     struct elink_params *params,
+                                     struct elink_vars *vars)
 {
        return elink_8706_8726_read_status(phy, params, vars);
 }
@@ -8477,9 +8481,9 @@ static uint8_t elink_8726_read_status(struct elink_phy *phy,
        return link_up;
 }
 
-static elink_status_t elink_8726_config_init(struct elink_phy *phy,
-                                            struct elink_params *params,
-                                            struct elink_vars *vars)
+static uint8_t elink_8726_config_init(struct elink_phy *phy,
+                                     struct elink_params *params,
+                                     struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        PMD_DRV_LOG(DEBUG, "Initializing BNX2X8726");
@@ -8684,9 +8688,9 @@ static void elink_8727_config_speed(struct elink_phy *phy,
        }
 }
 
-static elink_status_t elink_8727_config_init(struct elink_phy *phy,
-                                            struct elink_params *params,
-                                            __rte_unused struct elink_vars
+static uint8_t elink_8727_config_init(struct elink_phy *phy,
+                                     struct elink_params *params,
+                                     __rte_unused struct elink_vars
                                             *vars)
 {
        uint32_t tx_en_mode;
@@ -9291,7 +9295,7 @@ static elink_status_t elink_848xx_cmn_config_init(struct elink_phy *phy,
        return ELINK_STATUS_OK;
 }
 
-static elink_status_t elink_8481_config_init(struct elink_phy *phy,
+static uint8_t elink_8481_config_init(struct elink_phy *phy,
                                             struct elink_params *params,
                                             struct elink_vars *vars)
 {
@@ -9442,8 +9446,8 @@ static uint8_t elink_84833_get_reset_gpios(struct bnx2x_softc *sc,
        return reset_gpios;
 }
 
-static elink_status_t elink_84833_hw_reset_phy(struct elink_phy *phy,
-                                              struct elink_params *params)
+static void elink_84833_hw_reset_phy(struct elink_phy *phy,
+                                       struct elink_params *params)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t reset_gpios;
@@ -9471,8 +9475,6 @@ static elink_status_t elink_84833_hw_reset_phy(struct elink_phy *phy,
                                 MISC_REGISTERS_GPIO_OUTPUT_LOW);
        DELAY(10);
        PMD_DRV_LOG(DEBUG, "84833 hw reset on pin values 0x%x", reset_gpios);
-
-       return ELINK_STATUS_OK;
 }
 
 static elink_status_t elink_8483x_disable_eee(struct elink_phy *phy,
@@ -9513,9 +9515,9 @@ static elink_status_t elink_8483x_enable_eee(struct elink_phy *phy,
 }
 
 #define PHY84833_CONSTANT_LATENCY 1193
-static elink_status_t elink_848x3_config_init(struct elink_phy *phy,
-                                             struct elink_params *params,
-                                             struct elink_vars *vars)
+static uint8_t elink_848x3_config_init(struct elink_phy *phy,
+                                      struct elink_params *params,
+                                      struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port, initialize = 1;
@@ -9819,7 +9821,7 @@ static uint8_t elink_848xx_read_status(struct elink_phy *phy,
        return link_up;
 }
 
-static elink_status_t elink_848xx_format_ver(uint32_t raw_ver, uint8_t * str,
+static uint8_t elink_848xx_format_ver(uint32_t raw_ver, uint8_t * str,
                                             uint16_t * len)
 {
        elink_status_t status = ELINK_STATUS_OK;
@@ -10146,9 +10148,9 @@ static void elink_54618se_specific_func(struct elink_phy *phy,
        }
 }
 
-static elink_status_t elink_54618se_config_init(struct elink_phy *phy,
-                                               struct elink_params *params,
-                                               struct elink_vars *vars)
+static uint8_t elink_54618se_config_init(struct elink_phy *phy,
+                                        struct elink_params *params,
+                                        struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port;
@@ -10542,9 +10544,9 @@ static void elink_7101_config_loopback(struct elink_phy *phy,
                         MDIO_XS_DEVAD, MDIO_XS_SFX7101_XGXS_TEST1, 0x100);
 }
 
-static elink_status_t elink_7101_config_init(struct elink_phy *phy,
-                                            struct elink_params *params,
-                                            struct elink_vars *vars)
+static uint8_t elink_7101_config_init(struct elink_phy *phy,
+                                     struct elink_params *params,
+                                     struct elink_vars *vars)
 {
        uint16_t fw_ver1, fw_ver2, val;
        struct bnx2x_softc *sc = params->sc;
@@ -10614,8 +10616,8 @@ static uint8_t elink_7101_read_status(struct elink_phy *phy,
        return link_up;
 }
 
-static elink_status_t elink_7101_format_ver(uint32_t spirom_ver, uint8_t * str,
-                                           uint16_t * len)
+static uint8_t elink_7101_format_ver(uint32_t spirom_ver, uint8_t * str,
+                                    uint16_t * len)
 {
        if (*len < 5)
                return ELINK_STATUS_ERROR;
@@ -10680,14 +10682,14 @@ static const struct elink_phy phy_null = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) NULL,
-       .read_status = (read_status_t) NULL,
-       .link_reset = (link_reset_t) NULL,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) NULL,
-       .hw_reset = (hw_reset_t) NULL,
-       .set_link_led = (set_link_led_t) NULL,
-       .phy_specific_func = (phy_specific_func_t) NULL
+       .config_init = NULL,
+       .read_status = NULL,
+       .link_reset = NULL,
+       .config_loopback = NULL,
+       .format_fw_ver = NULL,
+       .hw_reset = NULL,
+       .set_link_led = NULL,
+       .phy_specific_func = NULL
 };
 
 static const struct elink_phy phy_serdes = {
@@ -10714,14 +10716,14 @@ static const struct elink_phy phy_serdes = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_xgxs_config_init,
-       .read_status = (read_status_t) elink_link_settings_status,
-       .link_reset = (link_reset_t) elink_int_link_reset,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) NULL,
-       .hw_reset = (hw_reset_t) NULL,
-       .set_link_led = (set_link_led_t) NULL,
-       .phy_specific_func = (phy_specific_func_t) NULL
+       .config_init = elink_xgxs_config_init,
+       .read_status = elink_link_settings_status,
+       .link_reset = elink_int_link_reset,
+       .config_loopback = NULL,
+       .format_fw_ver = NULL,
+       .hw_reset = NULL,
+       .set_link_led = NULL,
+       .phy_specific_func = NULL
 };
 
 static const struct elink_phy phy_xgxs = {
@@ -10749,14 +10751,14 @@ static const struct elink_phy phy_xgxs = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_xgxs_config_init,
-       .read_status = (read_status_t) elink_link_settings_status,
-       .link_reset = (link_reset_t) elink_int_link_reset,
-       .config_loopback = (config_loopback_t) elink_set_xgxs_loopback,
-       .format_fw_ver = (format_fw_ver_t) NULL,
-       .hw_reset = (hw_reset_t) NULL,
-       .set_link_led = (set_link_led_t) NULL,
-       .phy_specific_func = (phy_specific_func_t) elink_xgxs_specific_func
+       .config_init = elink_xgxs_config_init,
+       .read_status = elink_link_settings_status,
+       .link_reset = elink_int_link_reset,
+       .config_loopback = elink_set_xgxs_loopback,
+       .format_fw_ver = NULL,
+       .hw_reset = NULL,
+       .set_link_led = NULL,
+       .phy_specific_func = elink_xgxs_specific_func
 };
 
 static const struct elink_phy phy_warpcore = {
@@ -10785,14 +10787,14 @@ static const struct elink_phy phy_warpcore = {
        .speed_cap_mask = 0,
        /* req_duplex = */ 0,
        /* rsrv = */ 0,
-       .config_init = (config_init_t) elink_warpcore_config_init,
-       .read_status = (read_status_t) elink_warpcore_read_status,
-       .link_reset = (link_reset_t) elink_warpcore_link_reset,
-       .config_loopback = (config_loopback_t) elink_set_warpcore_loopback,
-       .format_fw_ver = (format_fw_ver_t) NULL,
-       .hw_reset = (hw_reset_t) elink_warpcore_hw_reset,
-       .set_link_led = (set_link_led_t) NULL,
-       .phy_specific_func = (phy_specific_func_t) NULL
+       .config_init = elink_warpcore_config_init,
+       .read_status = elink_warpcore_read_status,
+       .link_reset = elink_warpcore_link_reset,
+       .config_loopback = elink_set_warpcore_loopback,
+       .format_fw_ver = NULL,
+       .hw_reset = elink_warpcore_hw_reset,
+       .set_link_led = NULL,
+       .phy_specific_func = NULL
 };
 
 static const struct elink_phy phy_7101 = {
@@ -10814,14 +10816,14 @@ static const struct elink_phy phy_7101 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_7101_config_init,
-       .read_status = (read_status_t) elink_7101_read_status,
-       .link_reset = (link_reset_t) elink_common_ext_link_reset,
-       .config_loopback = (config_loopback_t) elink_7101_config_loopback,
-       .format_fw_ver = (format_fw_ver_t) elink_7101_format_ver,
-       .hw_reset = (hw_reset_t) elink_7101_hw_reset,
-       .set_link_led = (set_link_led_t) elink_7101_set_link_led,
-       .phy_specific_func = (phy_specific_func_t) NULL
+       .config_init = elink_7101_config_init,
+       .read_status = elink_7101_read_status,
+       .link_reset = elink_common_ext_link_reset,
+       .config_loopback = elink_7101_config_loopback,
+       .format_fw_ver = elink_7101_format_ver,
+       .hw_reset = elink_7101_hw_reset,
+       .set_link_led = elink_7101_set_link_led,
+       .phy_specific_func = NULL
 };
 
 static const struct elink_phy phy_8073 = {
@@ -10845,14 +10847,14 @@ static const struct elink_phy phy_8073 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_8073_config_init,
-       .read_status = (read_status_t) elink_8073_read_status,
-       .link_reset = (link_reset_t) elink_8073_link_reset,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) elink_format_ver,
-       .hw_reset = (hw_reset_t) NULL,
-       .set_link_led = (set_link_led_t) NULL,
-       .phy_specific_func = (phy_specific_func_t) elink_8073_specific_func
+       .config_init = elink_8073_config_init,
+       .read_status = elink_8073_read_status,
+       .link_reset = elink_8073_link_reset,
+       .config_loopback = NULL,
+       .format_fw_ver = elink_format_ver,
+       .hw_reset = NULL,
+       .set_link_led = NULL,
+       .phy_specific_func = elink_8073_specific_func
 };
 
 static const struct elink_phy phy_8705 = {
@@ -10873,14 +10875,14 @@ static const struct elink_phy phy_8705 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_8705_config_init,
-       .read_status = (read_status_t) elink_8705_read_status,
-       .link_reset = (link_reset_t) elink_common_ext_link_reset,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) elink_null_format_ver,
-       .hw_reset = (hw_reset_t) NULL,
-       .set_link_led = (set_link_led_t) NULL,
-       .phy_specific_func = (phy_specific_func_t) NULL
+       .config_init = elink_8705_config_init,
+       .read_status = elink_8705_read_status,
+       .link_reset = elink_common_ext_link_reset,
+       .config_loopback = NULL,
+       .format_fw_ver = elink_null_format_ver,
+       .hw_reset = NULL,
+       .set_link_led = NULL,
+       .phy_specific_func = NULL
 };
 
 static const struct elink_phy phy_8706 = {
@@ -10902,14 +10904,14 @@ static const struct elink_phy phy_8706 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_8706_config_init,
-       .read_status = (read_status_t) elink_8706_read_status,
-       .link_reset = (link_reset_t) elink_common_ext_link_reset,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) elink_format_ver,
-       .hw_reset = (hw_reset_t) NULL,
-       .set_link_led = (set_link_led_t) NULL,
-       .phy_specific_func = (phy_specific_func_t) NULL
+       .config_init = elink_8706_config_init,
+       .read_status = elink_8706_read_status,
+       .link_reset = elink_common_ext_link_reset,
+       .config_loopback = NULL,
+       .format_fw_ver = elink_format_ver,
+       .hw_reset = NULL,
+       .set_link_led = NULL,
+       .phy_specific_func = NULL
 };
 
 static const struct elink_phy phy_8726 = {
@@ -10932,14 +10934,14 @@ static const struct elink_phy phy_8726 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_8726_config_init,
-       .read_status = (read_status_t) elink_8726_read_status,
-       .link_reset = (link_reset_t) elink_8726_link_reset,
-       .config_loopback = (config_loopback_t) elink_8726_config_loopback,
-       .format_fw_ver = (format_fw_ver_t) elink_format_ver,
-       .hw_reset = (hw_reset_t) NULL,
-       .set_link_led = (set_link_led_t) NULL,
-       .phy_specific_func = (phy_specific_func_t) NULL
+       .config_init = elink_8726_config_init,
+       .read_status = elink_8726_read_status,
+       .link_reset = elink_8726_link_reset,
+       .config_loopback = elink_8726_config_loopback,
+       .format_fw_ver = elink_format_ver,
+       .hw_reset = NULL,
+       .set_link_led = NULL,
+       .phy_specific_func = NULL
 };
 
 static const struct elink_phy phy_8727 = {
@@ -10961,14 +10963,14 @@ static const struct elink_phy phy_8727 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_8727_config_init,
-       .read_status = (read_status_t) elink_8727_read_status,
-       .link_reset = (link_reset_t) elink_8727_link_reset,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) elink_format_ver,
-       .hw_reset = (hw_reset_t) elink_8727_hw_reset,
-       .set_link_led = (set_link_led_t) elink_8727_set_link_led,
-       .phy_specific_func = (phy_specific_func_t) elink_8727_specific_func
+       .config_init = elink_8727_config_init,
+       .read_status = elink_8727_read_status,
+       .link_reset = elink_8727_link_reset,
+       .config_loopback = NULL,
+       .format_fw_ver = elink_format_ver,
+       .hw_reset = elink_8727_hw_reset,
+       .set_link_led = elink_8727_set_link_led,
+       .phy_specific_func = elink_8727_specific_func
 };
 
 static const struct elink_phy phy_8481 = {
@@ -10996,14 +10998,14 @@ static const struct elink_phy phy_8481 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_8481_config_init,
-       .read_status = (read_status_t) elink_848xx_read_status,
-       .link_reset = (link_reset_t) elink_8481_link_reset,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) elink_848xx_format_ver,
-       .hw_reset = (hw_reset_t) elink_8481_hw_reset,
-       .set_link_led = (set_link_led_t) elink_848xx_set_link_led,
-       .phy_specific_func = (phy_specific_func_t) NULL
+       .config_init = elink_8481_config_init,
+       .read_status = elink_848xx_read_status,
+       .link_reset = elink_8481_link_reset,
+       .config_loopback = NULL,
+       .format_fw_ver = elink_848xx_format_ver,
+       .hw_reset = elink_8481_hw_reset,
+       .set_link_led = elink_848xx_set_link_led,
+       .phy_specific_func = NULL
 };
 
 static const struct elink_phy phy_84823 = {
@@ -11031,14 +11033,14 @@ static const struct elink_phy phy_84823 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_848x3_config_init,
-       .read_status = (read_status_t) elink_848xx_read_status,
-       .link_reset = (link_reset_t) elink_848x3_link_reset,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) elink_848xx_format_ver,
-       .hw_reset = (hw_reset_t) NULL,
-       .set_link_led = (set_link_led_t) elink_848xx_set_link_led,
-       .phy_specific_func = (phy_specific_func_t) elink_848xx_specific_func
+       .config_init = elink_848x3_config_init,
+       .read_status = elink_848xx_read_status,
+       .link_reset = elink_848x3_link_reset,
+       .config_loopback = NULL,
+       .format_fw_ver = elink_848xx_format_ver,
+       .hw_reset = NULL,
+       .set_link_led = elink_848xx_set_link_led,
+       .phy_specific_func = elink_848xx_specific_func
 };
 
 static const struct elink_phy phy_84833 = {
@@ -11065,14 +11067,14 @@ static const struct elink_phy phy_84833 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_848x3_config_init,
-       .read_status = (read_status_t) elink_848xx_read_status,
-       .link_reset = (link_reset_t) elink_848x3_link_reset,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) elink_848xx_format_ver,
-       .hw_reset = (hw_reset_t) elink_84833_hw_reset_phy,
-       .set_link_led = (set_link_led_t) elink_848xx_set_link_led,
-       .phy_specific_func = (phy_specific_func_t) elink_848xx_specific_func
+       .config_init = elink_848x3_config_init,
+       .read_status = elink_848xx_read_status,
+       .link_reset = elink_848x3_link_reset,
+       .config_loopback = NULL,
+       .format_fw_ver = elink_848xx_format_ver,
+       .hw_reset = elink_84833_hw_reset_phy,
+       .set_link_led = elink_848xx_set_link_led,
+       .phy_specific_func = elink_848xx_specific_func
 };
 
 static const struct elink_phy phy_84834 = {
@@ -11098,14 +11100,14 @@ static const struct elink_phy phy_84834 = {
        .speed_cap_mask = 0,
        .req_duplex = 0,
        .rsrv = 0,
-       .config_init = (config_init_t) elink_848x3_config_init,
-       .read_status = (read_status_t) elink_848xx_read_status,
-       .link_reset = (link_reset_t) elink_848x3_link_reset,
-       .config_loopback = (config_loopback_t) NULL,
-       .format_fw_ver = (format_fw_ver_t) elink_848xx_format_ver,
-       .hw_reset = (hw_reset_t) elink_84833_hw_reset_phy,
-       .set_link_led = (set_link_led_t) elink_848xx_set_link_led,
-       .phy_specific_func = (phy_specific_func_t) elink_848xx_specific_func
+       .config_init = elink_848x3_config_init,
+       .read_status = elink_848xx_read_status,
+       .link_reset = elink_848x3_link_reset,
+       .config_loopback = NULL,
+       .format_fw_ver = elink_848xx_format_ver,
+       .hw_reset = elink_84833_hw_reset_phy,
+       .set_link_led = elink_848xx_set_link_led,
+       .phy_specific_func = elink_848xx_specific_func
 };
 
 static const struct elink_phy phy_54618se = {
@@ -11131,14 +11133,14 @@ static const struct elink_phy phy_54618se = {
        .speed_cap_mask = 0,
        /* req_duplex = */ 0,
        /* rsrv = */ 0,
-       .config_init = (config_init_t) elink_54618se_config_init,
-       .read_status = (read_status_t) elink_54618se_read_status,
-       .link_reset = (link_reset_t) elink_54618se_link_reset,
-       .config_loopback = (config_loopback_t) elink_54618se_config_loopback,
-       .format_fw_ver = (format_fw_ver_t) NULL,
-       .hw_reset = (hw_reset_t) NULL,
-       .set_link_led = (set_link_led_t) elink_5461x_set_link_led,
-       .phy_specific_func = (phy_specific_func_t) elink_54618se_specific_func
+       .config_init = elink_54618se_config_init,
+       .read_status = elink_54618se_read_status,
+       .link_reset = elink_54618se_link_reset,
+       .config_loopback = elink_54618se_config_loopback,
+       .format_fw_ver = NULL,
+       .hw_reset = NULL,
+       .set_link_led = elink_5461x_set_link_led,
+       .phy_specific_func = elink_54618se_specific_func
 };
 
 /*****************************************************************/
@@ -12919,7 +12921,7 @@ static void elink_check_kr2_wa(struct elink_params *params,
         */
        not_kr2_device = (((base_page & 0x8000) == 0) ||
                          (((base_page & 0x8000) &&
-                           ((next_page & 0xe0) == 0x2))));
+                           ((next_page & 0xe0) == 0x20))));
 
        /* In case KR2 is already disabled, check if we need to re-enable it */
        if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) {
index 3eeca6f..52c511e 100644 (file)
@@ -400,10 +400,6 @@ static int bnxt_init_nic(struct bnxt *bp)
        bnxt_init_vnics(bp);
        bnxt_init_filters(bp);
 
-       rc = bnxt_init_chip(bp);
-       if (rc)
-               return rc;
-
        return 0;
 }
 
@@ -465,7 +461,8 @@ static void bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
                        .wthresh = 0,
                },
                .rx_free_thresh = 32,
-               .rx_drop_en = 0,
+               /* If no descriptors available, pkts are dropped by default */
+               .rx_drop_en = 1,
        };
 
        dev_info->default_txconf = (struct rte_eth_txconf) {
@@ -572,7 +569,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
        }
        bp->dev_stopped = 0;
 
-       rc = bnxt_init_nic(bp);
+       rc = bnxt_init_chip(bp);
        if (rc)
                goto error;
 
@@ -631,6 +628,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
        }
        bnxt_set_hwrm_link_config(bp, false);
        bnxt_hwrm_port_clr_stats(bp);
+       bnxt_free_tx_mbufs(bp);
+       bnxt_free_rx_mbufs(bp);
        bnxt_shutdown_nic(bp);
        bp->dev_stopped = 1;
 }
@@ -642,8 +641,6 @@ static void bnxt_dev_close_op(struct rte_eth_dev *eth_dev)
        if (bp->dev_stopped == 0)
                bnxt_dev_stop_op(eth_dev);
 
-       bnxt_free_tx_mbufs(bp);
-       bnxt_free_rx_mbufs(bp);
        bnxt_free_mem(bp);
        if (eth_dev->data->mac_addrs != NULL) {
                rte_free(eth_dev->data->mac_addrs);
@@ -3057,6 +3054,7 @@ skip_init:
                goto error_free_int;
 
        bnxt_enable_int(bp);
+       bnxt_init_nic(bp);
 
        return 0;
 
index 32af606..8d3ddf1 100644 (file)
@@ -159,6 +159,14 @@ void bnxt_free_filter_mem(struct bnxt *bp)
 
        rte_free(bp->filter_info);
        bp->filter_info = NULL;
+
+       for (i = 0; i < bp->pf.max_vfs; i++) {
+               STAILQ_FOREACH(filter, &bp->pf.vf_info[i].filter, next) {
+                       rte_free(filter);
+                       STAILQ_REMOVE(&bp->pf.vf_info[i].filter, filter,
+                                     bnxt_filter_info, next);
+               }
+       }
 }
 
 int bnxt_alloc_filter_mem(struct bnxt *bp)
index ce214d7..22f092f 100644 (file)
@@ -252,6 +252,9 @@ int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp,
        struct hwrm_cfa_l2_set_rx_mask_output *resp = bp->hwrm_cmd_resp_addr;
        uint32_t mask = 0;
 
+       if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
+               return rc;
+
        HWRM_PREP(req, CFA_L2_SET_RX_MASK);
        req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
@@ -1100,7 +1103,8 @@ int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic)
        HWRM_PREP(req, VNIC_ALLOC);
 
        if (vnic->func_default)
-               req.flags = HWRM_VNIC_ALLOC_INPUT_FLAGS_DEFAULT;
+               req.flags =
+                       rte_cpu_to_le_32(HWRM_VNIC_ALLOC_INPUT_FLAGS_DEFAULT);
        rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
        HWRM_CHECK_RESULT();
@@ -1121,7 +1125,7 @@ static int bnxt_hwrm_vnic_plcmodes_qcfg(struct bnxt *bp,
 
        HWRM_PREP(req, VNIC_PLCMODES_QCFG);
 
-       req.vnic_id = rte_cpu_to_le_32(vnic->fw_vnic_id);
+       req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
        rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
@@ -1149,7 +1153,7 @@ static int bnxt_hwrm_vnic_plcmodes_cfg(struct bnxt *bp,
 
        HWRM_PREP(req, VNIC_PLCMODES_CFG);
 
-       req.vnic_id = rte_cpu_to_le_32(vnic->fw_vnic_id);
+       req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
        req.flags = rte_cpu_to_le_32(pmode->flags);
        req.jumbo_thresh = rte_cpu_to_le_16(pmode->jumbo_thresh);
        req.hds_offset = rte_cpu_to_le_16(pmode->hds_offset);
@@ -1393,7 +1397,7 @@ int bnxt_hwrm_vnic_plcmode_cfg(struct bnxt *bp,
        size -= RTE_PKTMBUF_HEADROOM;
 
        req.jumbo_thresh = rte_cpu_to_le_16(size);
-       req.vnic_id = rte_cpu_to_le_32(vnic->fw_vnic_id);
+       req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
        rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
@@ -1424,12 +1428,12 @@ int bnxt_hwrm_vnic_tpa_cfg(struct bnxt *bp,
                                HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO |
                                HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_ECN |
                        HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_SAME_GRE_SEQ);
-               req.vnic_id = rte_cpu_to_le_32(vnic->fw_vnic_id);
                req.max_agg_segs = rte_cpu_to_le_16(5);
                req.max_aggs =
                        rte_cpu_to_le_16(HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX);
                req.min_agg_len = rte_cpu_to_le_32(512);
        }
+       req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
        rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
index c5ccc9b..6ce94bf 100644 (file)
@@ -1,13 +1,37 @@
-/* Broadcom NetXtreme-C/E network driver.
+/*-
+ *   BSD LICENSE
  *
- * Copyright (c) 2014-2016 Broadcom Corporation
- * Copyright (c) 2016-2017 Broadcom Limited
+ *   Copyright(c) Broadcom Limited.
+ *   All rights reserved.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation.
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Broadcom Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+
 #ifndef _BNXT_NVM_DEFS_H_
 #define _BNXT_NVM_DEFS_H_
 
index b4e9f38..5088e9d 100644 (file)
@@ -237,7 +237,8 @@ static void bnxt_rx_queue_release_mbufs(struct bnxt_rx_queue *rxq)
        if (rxq) {
                sw_ring = rxq->rx_ring->rx_buf_ring;
                if (sw_ring) {
-                       for (i = 0; i < rxq->nb_rx_desc; i++) {
+                       for (i = 0;
+                            i < rxq->rx_ring->rx_ring_struct->ring_size; i++) {
                                if (sw_ring[i].mbuf) {
                                        rte_pktmbuf_free_seg(sw_ring[i].mbuf);
                                        sw_ring[i].mbuf = NULL;
@@ -247,7 +248,8 @@ static void bnxt_rx_queue_release_mbufs(struct bnxt_rx_queue *rxq)
                /* Free up mbufs in Agg ring */
                sw_ring = rxq->rx_ring->ag_buf_ring;
                if (sw_ring) {
-                       for (i = 0; i < rxq->nb_rx_desc; i++) {
+                       for (i = 0;
+                            i < rxq->rx_ring->ag_ring_struct->ring_size; i++) {
                                if (sw_ring[i].mbuf) {
                                        rte_pktmbuf_free_seg(sw_ring[i].mbuf);
                                        sw_ring[i].mbuf = NULL;
index 5128335..c5c5484 100644 (file)
@@ -469,11 +469,15 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 
        if (likely(RX_CMP_IP_CS_OK(rxcmp1)))
                mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+       else if (likely(RX_CMP_IP_CS_UNKNOWN(rxcmp1)))
+               mbuf->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
        else
                mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
 
        if (likely(RX_CMP_L4_CS_OK(rxcmp1)))
                mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+       else if (likely(RX_CMP_L4_CS_UNKNOWN(rxcmp1)))
+               mbuf->ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
        else
                mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
 
@@ -730,7 +734,7 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
        if (rxq->rx_buf_use_size <= size)
                size = rxq->rx_buf_use_size;
 
-       type = RX_PROD_PKT_BD_TYPE_RX_PROD_PKT;
+       type = RX_PROD_PKT_BD_TYPE_RX_PROD_PKT | RX_PROD_PKT_BD_FLAGS_EOP_PAD;
 
        rxr = rxq->rx_ring;
        ring = rxr->rx_ring_struct;
index a94373d..4daa7e2 100644 (file)
 #define BNXT_TPA_OUTER_L3_OFF(hdr_info)        \
        ((hdr_info) & 0x1ff)
 
-#define RX_CMP_L4_CS_BITS      rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_L4_CS_CALC)
+#define RX_CMP_L4_CS_BITS      \
+       rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_L4_CS_CALC | \
+                        RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC)
 
-#define RX_CMP_L4_CS_ERR_BITS  rte_cpu_to_le_32(RX_PKT_CMPL_ERRORS_L4_CS_ERROR)
+#define RX_CMP_L4_CS_ERR_BITS  \
+       rte_cpu_to_le_32(RX_PKT_CMPL_ERRORS_L4_CS_ERROR | \
+                        RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR)
 
 #define RX_CMP_L4_CS_OK(rxcmp1)                                                \
            (((rxcmp1)->flags2 & RX_CMP_L4_CS_BITS) &&          \
             !((rxcmp1)->errors_v2 & RX_CMP_L4_CS_ERR_BITS))
 
-#define RX_CMP_IP_CS_ERR_BITS  rte_cpu_to_le_32(RX_PKT_CMPL_ERRORS_IP_CS_ERROR)
+#define RX_CMP_L4_CS_UNKNOWN(rxcmp1)                                   \
+           !((rxcmp1)->flags2 & RX_CMP_L4_CS_BITS)
 
-#define RX_CMP_IP_CS_BITS      rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_CS_CALC)
+#define RX_CMP_IP_CS_ERR_BITS  \
+       rte_cpu_to_le_32(RX_PKT_CMPL_ERRORS_IP_CS_ERROR | \
+                        RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR)
+
+#define RX_CMP_IP_CS_BITS      \
+       rte_cpu_to_le_32(RX_PKT_CMPL_FLAGS2_IP_CS_CALC | \
+                        RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC)
 
 #define RX_CMP_IP_CS_OK(rxcmp1)                                                \
                (((rxcmp1)->flags2 & RX_CMP_IP_CS_BITS) &&      \
                !((rxcmp1)->errors_v2 & RX_CMP_IP_CS_ERR_BITS))
 
+#define RX_CMP_IP_CS_UNKNOWN(rxcmp1)                                   \
+               !((rxcmp1)->flags2 & RX_CMP_IP_CS_BITS)
+
 enum pkt_hash_types {
        PKT_HASH_TYPE_NONE,     /* Undefined type */
        PKT_HASH_TYPE_L2,       /* Input: src_MAC, dest_MAC */
index b834035..8fd90ae 100644 (file)
@@ -240,9 +240,12 @@ slave_vlan_filter_set(uint16_t bonded_port_id, uint16_t slave_port_id)
                for (i = 0, mask = 1;
                     i < RTE_BITMAP_SLAB_BIT_SIZE;
                     i ++, mask <<= 1) {
-                       if (unlikely(slab & mask))
+                       if (unlikely(slab & mask)) {
+                               uint16_t vlan_id = pos + i;
+
                                res = rte_eth_dev_vlan_filter(slave_port_id,
-                                                             (uint16_t)pos, 1);
+                                                             vlan_id, 1);
+                       }
                }
                found = rte_bitmap_scan(internals->vlan_filter_bmp,
                                        &pos, &slab);
index e816da3..8c94cc6 100644 (file)
@@ -273,7 +273,7 @@ bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused,
        if (primary_slave_port_id < 0)
                return -1;
 
-       *(uint8_t *)extra_args = (uint8_t)primary_slave_port_id;
+       *(uint16_t *)extra_args = (uint16_t)primary_slave_port_id;
 
        return 0;
 }
index 1d3fbeb..e19a4a3 100644 (file)
@@ -1912,7 +1912,7 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
 
        if (internals->slave_count == 0) {
                RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
-               return -1;
+               goto out_err;
        }
 
        if (internals->user_defined_mac == 0) {
@@ -1923,18 +1923,18 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
                                new_mac_addr = &internals->slaves[i].persisted_mac_addr;
 
                if (new_mac_addr == NULL)
-                       return -1;
+                       goto out_err;
 
                if (mac_address_set(eth_dev, new_mac_addr) != 0) {
                        RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
                                        eth_dev->data->port_id);
-                       return -1;
+                       goto out_err;
                }
        }
 
        /* Update all slave devices MACs*/
        if (mac_address_slaves_update(eth_dev) != 0)
-               return -1;
+               goto out_err;
 
        /* If bonded device is configure in promiscuous mode then re-apply config */
        if (internals->promiscuous_en)
@@ -1959,7 +1959,7 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
                                "bonded port (%d) failed to reconfigure slave device (%d)",
                                eth_dev->data->port_id,
                                internals->slaves[i].port_id);
-                       return -1;
+                       goto out_err;
                }
                /* We will need to poll for link status if any slave doesn't
                 * support interrupts
@@ -1967,6 +1967,7 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
                if (internals->slaves[i].link_status_poll_enabled)
                        internals->link_status_polling_enabled = 1;
        }
+
        /* start polling if needed */
        if (internals->link_status_polling_enabled) {
                rte_eal_alarm_set(
@@ -1986,6 +1987,10 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev)
                bond_tlb_enable(internals);
 
        return 0;
+
+out_err:
+       eth_dev->data->dev_started = 0;
+       return -1;
 }
 
 static void
@@ -2519,14 +2524,21 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
        if (!valid_slave)
                return rc;
 
+       /* Synchronize lsc callback parallel calls either by real link event
+        * from the slaves PMDs or by the bonding PMD itself.
+        */
+       rte_spinlock_lock(&internals->lsc_lock);
+
        /* Search for port in active port list */
        active_pos = find_slave_by_id(internals->active_slaves,
                        internals->active_slave_count, port_id);
 
        rte_eth_link_get_nowait(port_id, &link);
        if (link.link_status) {
-               if (active_pos < internals->active_slave_count)
+               if (active_pos < internals->active_slave_count) {
+                       rte_spinlock_unlock(&internals->lsc_lock);
                        return rc;
+               }
 
                /* if no active slave ports then set this port to be primary port */
                if (internals->active_slave_count < 1) {
@@ -2545,8 +2557,10 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
                                internals->primary_port == port_id)
                        bond_ethdev_primary_set(internals, port_id);
        } else {
-               if (active_pos == internals->active_slave_count)
+               if (active_pos == internals->active_slave_count) {
+                       rte_spinlock_unlock(&internals->lsc_lock);
                        return rc;
+               }
 
                /* Remove from active slave list */
                deactivate_slave(bonded_eth_dev, port_id);
@@ -2599,6 +2613,9 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
                                                NULL, NULL);
                }
        }
+
+       rte_spinlock_unlock(&internals->lsc_lock);
+
        return 0;
 }
 
@@ -2766,6 +2783,7 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
        eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
 
        rte_spinlock_init(&internals->lock);
+       rte_spinlock_init(&internals->lsc_lock);
 
        internals->port_id = eth_dev->data->port_id;
        internals->mode = BONDING_MODE_INVALID;
@@ -2967,6 +2985,10 @@ bond_remove(struct rte_vdev_device *dev)
        eth_dev->tx_pkt_burst = NULL;
 
        internals = eth_dev->data->dev_private;
+       /* Try to release mempool used in mode6. If the bond
+        * device is not mode6, free the NULL is not problem.
+        */
+       rte_mempool_free(internals->mode6.mempool);
        rte_bitmap_free(internals->vlan_filter_bmp);
        rte_free(internals->vlan_filter_bmpmem);
        rte_free(eth_dev->data->dev_private);
index a5cfa6a..dae8aab 100644 (file)
@@ -118,6 +118,7 @@ struct bond_dev_private {
        uint8_t mode;                                           /**< Link Bonding Mode */
 
        rte_spinlock_t lock;
+       rte_spinlock_t lsc_lock;
 
        uint16_t primary_port;                  /**< Primary Slave Port */
        uint16_t current_primary_port;          /**< Primary Slave Port */
index ec3374b..03ddb44 100644 (file)
@@ -1,6 +1,7 @@
 DPDK_2.0 {
        global:
 
+       rte_eth_bond_8023ad_slave_info;
        rte_eth_bond_active_slaves_get;
        rte_eth_bond_create;
        rte_eth_bond_link_monitoring_set;
index e4375c3..02056bc 100644 (file)
@@ -324,10 +324,13 @@ dpaa_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 static int
 dpaa_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
                      struct rte_eth_xstat_name *xstats_names,
-                     __rte_unused unsigned int limit)
+                     unsigned int limit)
 {
        unsigned int i, stat_cnt = RTE_DIM(dpaa_xstats_strings);
 
+       if (limit < stat_cnt)
+               return stat_cnt;
+
        if (xstats_names != NULL)
                for (i = 0; i < stat_cnt; i++)
                        snprintf(xstats_names[i].name,
@@ -355,7 +358,7 @@ dpaa_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
                        return 0;
 
                fman_if_stats_get_all(dpaa_intf->fif, values_copy,
-                                     sizeof(struct dpaa_if_stats));
+                                     sizeof(struct dpaa_if_stats) / 8);
 
                for (i = 0; i < stat_cnt; i++)
                        values[i] =
index 202f84f..0711baf 100644 (file)
@@ -1144,12 +1144,12 @@ dpaa2_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
        union dpni_statistics value[3] = {};
        unsigned int i = 0, num = RTE_DIM(dpaa2_xstats_strings);
 
-       if (xstats == NULL)
-               return 0;
-
        if (n < num)
                return num;
 
+       if (xstats == NULL)
+               return 0;
+
        /* Get Counters from page_0*/
        retcode = dpni_get_statistics(dpni, CMD_PRI_LOW, priv->token,
                                      0, 0, &value[0]);
@@ -1182,10 +1182,13 @@ err:
 static int
 dpaa2_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
                       struct rte_eth_xstat_name *xstats_names,
-                      __rte_unused unsigned int limit)
+                      unsigned int limit)
 {
        unsigned int i, stat_cnt = RTE_DIM(dpaa2_xstats_strings);
 
+       if (limit < stat_cnt)
+               return stat_cnt;
+
        if (xstats_names != NULL)
                for (i = 0; i < stat_cnt; i++)
                        snprintf(xstats_names[i].name,
index 9b25d21..9e54ace 100644 (file)
@@ -627,17 +627,9 @@ int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats)
 {
        u64 a0, a1;
        int wait = 1000;
-       static u32 instance;
-       char name[NAME_MAX];
 
-       if (!vdev->stats) {
-               snprintf((char *)name, sizeof(name),
-                       "vnic_stats-%u", instance++);
-               vdev->stats = vdev->alloc_consistent(vdev->priv,
-                       sizeof(struct vnic_stats), &vdev->stats_pa, (u8 *)name);
-               if (!vdev->stats)
-                       return -ENOMEM;
-       }
+       if (!vdev->stats)
+               return -ENOMEM;
 
        *stats = vdev->stats;
        a0 = vdev->stats_pa;
@@ -962,6 +954,18 @@ u32 vnic_dev_get_intr_coal_timer_max(struct vnic_dev *vdev)
        return vdev->intr_coal_timer_info.max_usec;
 }
 
+int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev)
+{
+       char name[NAME_MAX];
+       static u32 instance;
+
+       snprintf((char *)name, sizeof(name), "vnic_stats-%u", instance++);
+       vdev->stats = vdev->alloc_consistent(vdev->priv,
+                                            sizeof(struct vnic_stats),
+                                            &vdev->stats_pa, (u8 *)name);
+       return vdev->stats == NULL ? -ENOMEM : 0;
+}
+
 void vnic_dev_unregister(struct vnic_dev *vdev)
 {
        if (vdev) {
index c9ca25b..94964e4 100644 (file)
@@ -196,6 +196,7 @@ struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
        void *priv, struct rte_pci_device *pdev, struct vnic_dev_bar *bar,
        unsigned int num_bars);
 struct rte_pci_device *vnic_dev_get_pdev(struct vnic_dev *vdev);
+int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev);
 int vnic_dev_cmd_init(struct vnic_dev *vdev, int fallback);
 int vnic_dev_get_size(void);
 int vnic_dev_int13(struct vnic_dev *vdev, u64 arg, u32 op);
index 1694aed..6356c10 100644 (file)
@@ -1252,6 +1252,8 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
        /* free and reallocate RQs with the new MTU */
        for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
                rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
+               if (!rq->in_use)
+                       continue;
 
                enic_free_rq(rq);
                rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
@@ -1383,6 +1385,15 @@ int enic_probe(struct enic *enic)
                enic_alloc_consistent,
                enic_free_consistent);
 
+       /*
+        * Allocate the consistent memory for stats upfront so both primary and
+        * secondary processes can dump stats.
+        */
+       err = vnic_dev_alloc_stats_mem(enic->vdev);
+       if (err) {
+               dev_err(enic, "Failed to allocate cmd memory, aborting\n");
+               goto err_out_unregister;
+       }
        /* Issue device open to get device in known state */
        err = enic_dev_open(enic);
        if (err) {
index 8336510..41b4cb0 100644 (file)
@@ -210,7 +210,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
                                                               mac);
                        if (ret) {
                                ERROR("Failed to set default MAC address");
-                               goto free_args;
+                               goto cancel_alarm;
                        }
                }
        } else {
@@ -240,6 +240,8 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
                mac->addr_bytes[4], mac->addr_bytes[5]);
        dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
        return 0;
+cancel_alarm:
+       failsafe_hotplug_alarm_cancel(dev);
 free_args:
        failsafe_args_free(dev);
 free_subs:
index 21392e5..5b5ac42 100644 (file)
@@ -287,6 +287,7 @@ fs_dev_remove(struct sub_device *sdev)
                sdev->state = DEV_ACTIVE;
                /* fallthrough */
        case DEV_ACTIVE:
+               failsafe_eth_dev_unregister_callbacks(sdev);
                rte_eth_dev_close(PORT_ID(sdev));
                sdev->state = DEV_PROBED;
                /* fallthrough */
@@ -346,6 +347,35 @@ fs_rxtx_clean(struct sub_device *sdev)
        return 1;
 }
 
+void
+failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev)
+{
+       int ret;
+
+       if (sdev == NULL)
+               return;
+       if (sdev->rmv_callback) {
+               ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
+                                               RTE_ETH_EVENT_INTR_RMV,
+                                               failsafe_eth_rmv_event_callback,
+                                               sdev);
+               if (ret)
+                       WARN("Failed to unregister RMV callback for sub_device"
+                            " %d", SUB_ID(sdev));
+               sdev->rmv_callback = 0;
+       }
+       if (sdev->lsc_callback) {
+               ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
+                                               RTE_ETH_EVENT_INTR_LSC,
+                                               failsafe_eth_lsc_event_callback,
+                                               sdev);
+               if (ret)
+                       WARN("Failed to unregister LSC callback for sub_device"
+                            " %d", SUB_ID(sdev));
+               sdev->lsc_callback = 0;
+       }
+}
+
 void
 failsafe_dev_remove(struct rte_eth_dev *dev)
 {
index e16a590..9a5d873 100644 (file)
@@ -124,7 +124,7 @@ fs_dev_configure(struct rte_eth_dev *dev)
                        ERROR("Could not configure sub_device %d", i);
                        return ret;
                }
-               if (rmv_interrupt) {
+               if (rmv_interrupt && sdev->rmv_callback == 0) {
                        ret = rte_eth_dev_callback_register(PORT_ID(sdev),
                                        RTE_ETH_EVENT_INTR_RMV,
                                        failsafe_eth_rmv_event_callback,
@@ -132,9 +132,11 @@ fs_dev_configure(struct rte_eth_dev *dev)
                        if (ret)
                                WARN("Failed to register RMV callback for sub_device %d",
                                     SUB_ID(sdev));
+                       else
+                               sdev->rmv_callback = 1;
                }
                dev->data->dev_conf.intr_conf.rmv = 0;
-               if (lsc_interrupt) {
+               if (lsc_interrupt && sdev->lsc_callback == 0) {
                        ret = rte_eth_dev_callback_register(PORT_ID(sdev),
                                                RTE_ETH_EVENT_INTR_LSC,
                                                failsafe_eth_lsc_event_callback,
@@ -142,6 +144,8 @@ fs_dev_configure(struct rte_eth_dev *dev)
                        if (ret)
                                WARN("Failed to register LSC callback for sub_device %d",
                                     SUB_ID(sdev));
+                       else
+                               sdev->lsc_callback = 1;
                }
                dev->data->dev_conf.intr_conf.lsc = lsc_enabled;
                sdev->state = DEV_ACTIVE;
@@ -237,6 +241,7 @@ fs_dev_close(struct rte_eth_dev *dev)
        PRIV(dev)->state = DEV_ACTIVE - 1;
        FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
                DEBUG("Closing sub_device %d", i);
+               failsafe_eth_dev_unregister_callbacks(sdev);
                rte_eth_dev_close(PORT_ID(sdev));
                sdev->state = DEV_ACTIVE - 1;
        }
index d81cc3c..40eabb7 100644 (file)
@@ -117,6 +117,10 @@ struct sub_device {
        volatile unsigned int remove:1;
        /* flow isolation state */
        int flow_isolated:1;
+       /* RMV callback registration state */
+       unsigned int rmv_callback:1;
+       /* LSC callback registration state */
+       unsigned int lsc_callback:1;
 };
 
 struct fs_priv {
@@ -187,6 +191,7 @@ int failsafe_eal_uninit(struct rte_eth_dev *dev);
 /* ETH_DEV */
 
 int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
+void failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev);
 void failsafe_dev_remove(struct rte_eth_dev *dev);
 void failsafe_stats_increment(struct rte_eth_stats *to,
                                struct rte_eth_stats *from);
index a482ab9..df66e76 100644 (file)
@@ -90,7 +90,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_PF_ARQLEN_ARQCRIT_SHIFT   30
 #define I40E_PF_ARQLEN_ARQCRIT_MASK    I40E_MASK(0x1, I40E_PF_ARQLEN_ARQCRIT_SHIFT)
 #define I40E_PF_ARQLEN_ARQENABLE_SHIFT 31
-#define I40E_PF_ARQLEN_ARQENABLE_MASK  I40E_MASK(0x1, I40E_PF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_PF_ARQLEN_ARQENABLE_MASK  I40E_MASK(0x1u, I40E_PF_ARQLEN_ARQENABLE_SHIFT)
 #define I40E_PF_ARQT            0x00080480 /* Reset: EMPR */
 #define I40E_PF_ARQT_ARQT_SHIFT 0
 #define I40E_PF_ARQT_ARQT_MASK  I40E_MASK(0x3FF, I40E_PF_ARQT_ARQT_SHIFT)
@@ -113,7 +113,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_PF_ATQLEN_ATQCRIT_SHIFT   30
 #define I40E_PF_ATQLEN_ATQCRIT_MASK    I40E_MASK(0x1, I40E_PF_ATQLEN_ATQCRIT_SHIFT)
 #define I40E_PF_ATQLEN_ATQENABLE_SHIFT 31
-#define I40E_PF_ATQLEN_ATQENABLE_MASK  I40E_MASK(0x1, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_PF_ATQLEN_ATQENABLE_MASK  I40E_MASK(0x1u, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
 #define I40E_PF_ATQT            0x00080400 /* Reset: EMPR */
 #define I40E_PF_ATQT_ATQT_SHIFT 0
 #define I40E_PF_ATQT_ATQT_MASK  I40E_MASK(0x3FF, I40E_PF_ATQT_ATQT_SHIFT)
@@ -140,7 +140,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_VF_ARQLEN_ARQCRIT_SHIFT   30
 #define I40E_VF_ARQLEN_ARQCRIT_MASK    I40E_MASK(0x1, I40E_VF_ARQLEN_ARQCRIT_SHIFT)
 #define I40E_VF_ARQLEN_ARQENABLE_SHIFT 31
-#define I40E_VF_ARQLEN_ARQENABLE_MASK  I40E_MASK(0x1, I40E_VF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_VF_ARQLEN_ARQENABLE_MASK  I40E_MASK(0x1u, I40E_VF_ARQLEN_ARQENABLE_SHIFT)
 #define I40E_VF_ARQT(_VF)       (0x00082C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
 #define I40E_VF_ARQT_MAX_INDEX  127
 #define I40E_VF_ARQT_ARQT_SHIFT 0
@@ -168,7 +168,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_VF_ATQLEN_ATQCRIT_SHIFT   30
 #define I40E_VF_ATQLEN_ATQCRIT_MASK    I40E_MASK(0x1, I40E_VF_ATQLEN_ATQCRIT_SHIFT)
 #define I40E_VF_ATQLEN_ATQENABLE_SHIFT 31
-#define I40E_VF_ATQLEN_ATQENABLE_MASK  I40E_MASK(0x1, I40E_VF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_VF_ATQLEN_ATQENABLE_MASK  I40E_MASK(0x1u, I40E_VF_ATQLEN_ATQENABLE_SHIFT)
 #define I40E_VF_ATQT(_VF)       (0x00082800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
 #define I40E_VF_ATQT_MAX_INDEX  127
 #define I40E_VF_ATQT_ATQT_SHIFT 0
@@ -291,7 +291,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT 30
 #define I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK  I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT)
 #define I40E_PRTDCB_RETSTCC_ETSTC_SHIFT       31
-#define I40E_PRTDCB_RETSTCC_ETSTC_MASK        I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_ETSTC_SHIFT)
+#define I40E_PRTDCB_RETSTCC_ETSTC_MASK        I40E_MASK(0x1u, I40E_PRTDCB_RETSTCC_ETSTC_SHIFT)
 #define I40E_PRTDCB_RPPMC                    0x001223A0 /* Reset: CORER */
 #define I40E_PRTDCB_RPPMC_LANRPPM_SHIFT      0
 #define I40E_PRTDCB_RPPMC_LANRPPM_MASK       I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_LANRPPM_SHIFT)
@@ -535,7 +535,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_GLGEN_MSCA_MDICMD_SHIFT      30
 #define I40E_GLGEN_MSCA_MDICMD_MASK       I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT)
 #define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31
-#define I40E_GLGEN_MSCA_MDIINPROGEN_MASK  I40E_MASK(0x1, I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT)
+#define I40E_GLGEN_MSCA_MDIINPROGEN_MASK  I40E_MASK(0x1u, I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT)
 #define I40E_GLGEN_MSRWD(_i)             (0x0008819C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
 #define I40E_GLGEN_MSRWD_MAX_INDEX       3
 #define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0
@@ -1274,14 +1274,14 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT   30
 #define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK    I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT)
 #define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT 31
-#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK  I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK  I40E_MASK(0x1u, I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
 #define I40E_PFLAN_QALLOC              0x001C0400 /* Reset: CORER */
 #define I40E_PFLAN_QALLOC_FIRSTQ_SHIFT 0
 #define I40E_PFLAN_QALLOC_FIRSTQ_MASK  I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_FIRSTQ_SHIFT)
 #define I40E_PFLAN_QALLOC_LASTQ_SHIFT  16
 #define I40E_PFLAN_QALLOC_LASTQ_MASK   I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_LASTQ_SHIFT)
 #define I40E_PFLAN_QALLOC_VALID_SHIFT  31
-#define I40E_PFLAN_QALLOC_VALID_MASK   I40E_MASK(0x1, I40E_PFLAN_QALLOC_VALID_SHIFT)
+#define I40E_PFLAN_QALLOC_VALID_MASK   I40E_MASK(0x1u, I40E_PFLAN_QALLOC_VALID_SHIFT)
 #define I40E_QRX_ENA(_Q)             (0x00120000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: PFR */
 #define I40E_QRX_ENA_MAX_INDEX       1535
 #define I40E_QRX_ENA_QENA_REQ_SHIFT  0
@@ -1692,7 +1692,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_GLNVM_SRCTL_START_SHIFT  30
 #define I40E_GLNVM_SRCTL_START_MASK   I40E_MASK(0x1, I40E_GLNVM_SRCTL_START_SHIFT)
 #define I40E_GLNVM_SRCTL_DONE_SHIFT   31
-#define I40E_GLNVM_SRCTL_DONE_MASK    I40E_MASK(0x1, I40E_GLNVM_SRCTL_DONE_SHIFT)
+#define I40E_GLNVM_SRCTL_DONE_MASK    I40E_MASK(0x1u, I40E_GLNVM_SRCTL_DONE_SHIFT)
 #define I40E_GLNVM_SRDATA              0x000B6114 /* Reset: POR */
 #define I40E_GLNVM_SRDATA_WRDATA_SHIFT 0
 #define I40E_GLNVM_SRDATA_WRDATA_MASK  I40E_MASK(0xFFFF, I40E_GLNVM_SRDATA_WRDATA_SHIFT)
@@ -3059,7 +3059,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_PF_VT_PFALLOC_LASTVF_SHIFT  8
 #define I40E_PF_VT_PFALLOC_LASTVF_MASK   I40E_MASK(0xFF, I40E_PF_VT_PFALLOC_LASTVF_SHIFT)
 #define I40E_PF_VT_PFALLOC_VALID_SHIFT   31
-#define I40E_PF_VT_PFALLOC_VALID_MASK    I40E_MASK(0x1, I40E_PF_VT_PFALLOC_VALID_SHIFT)
+#define I40E_PF_VT_PFALLOC_VALID_MASK    I40E_MASK(0x1u, I40E_PF_VT_PFALLOC_VALID_SHIFT)
 #define I40E_VP_MDET_RX(_VF)        (0x0012A000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
 #define I40E_VP_MDET_RX_MAX_INDEX   127
 #define I40E_VP_MDET_RX_VALID_SHIFT 0
@@ -3196,7 +3196,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_VF_ARQLEN1_ARQCRIT_SHIFT   30
 #define I40E_VF_ARQLEN1_ARQCRIT_MASK    I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQCRIT_SHIFT)
 #define I40E_VF_ARQLEN1_ARQENABLE_SHIFT 31
-#define I40E_VF_ARQLEN1_ARQENABLE_MASK  I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQENABLE_SHIFT)
+#define I40E_VF_ARQLEN1_ARQENABLE_MASK  I40E_MASK(0x1u, I40E_VF_ARQLEN1_ARQENABLE_SHIFT)
 #define I40E_VF_ARQT1            0x00007000 /* Reset: EMPR */
 #define I40E_VF_ARQT1_ARQT_SHIFT 0
 #define I40E_VF_ARQT1_ARQT_MASK  I40E_MASK(0x3FF, I40E_VF_ARQT1_ARQT_SHIFT)
@@ -3219,7 +3219,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_VF_ATQLEN1_ATQCRIT_SHIFT   30
 #define I40E_VF_ATQLEN1_ATQCRIT_MASK    I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQCRIT_SHIFT)
 #define I40E_VF_ATQLEN1_ATQENABLE_SHIFT 31
-#define I40E_VF_ATQLEN1_ATQENABLE_MASK  I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQENABLE_SHIFT)
+#define I40E_VF_ATQLEN1_ATQENABLE_MASK  I40E_MASK(0x1u, I40E_VF_ATQLEN1_ATQENABLE_SHIFT)
 #define I40E_VF_ATQT1            0x00008400 /* Reset: EMPR */
 #define I40E_VF_ATQT1_ATQT_SHIFT 0
 #define I40E_VF_ATQT1_ATQT_MASK  I40E_MASK(0x3FF, I40E_VF_ATQT1_ATQT_SHIFT)
index 290ef24..85baff9 100644 (file)
@@ -1554,6 +1554,7 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
        struct rte_flow *p_flow;
        int ret;
        uint8_t aq_fail = 0;
+       int retries = 0;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1595,9 +1596,20 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
        /* disable uio intr before callback unregister */
        rte_intr_disable(intr_handle);
 
-       /* register callback func to eal lib */
-       rte_intr_callback_unregister(intr_handle,
-                                    i40e_dev_interrupt_handler, dev);
+       /* unregister callback func to eal lib */
+       do {
+               ret = rte_intr_callback_unregister(intr_handle,
+                               i40e_dev_interrupt_handler, dev);
+               if (ret >= 0) {
+                       break;
+               } else if (ret != -EAGAIN) {
+                       PMD_INIT_LOG(ERR,
+                                "intr callback unregister failed: %d",
+                                ret);
+                       return ret;
+               }
+               i40e_msec_delay(500);
+       } while (retries++ < 5);
 
        i40e_rm_ethtype_filter_list(pf);
        i40e_rm_tunnel_filter_list(pf);
@@ -2297,6 +2309,8 @@ i40e_dev_close(struct rte_eth_dev *dev)
        i40e_pf_disable_irq0(hw);
        rte_intr_disable(intr_handle);
 
+       i40e_fdir_teardown(pf);
+
        /* shutdown and destroy the HMC */
        i40e_shutdown_lan_hmc(hw);
 
@@ -2308,7 +2322,6 @@ i40e_dev_close(struct rte_eth_dev *dev)
        pf->vmdq = NULL;
 
        /* release all the existing VSIs and VEBs */
-       i40e_fdir_teardown(pf);
        i40e_vsi_release(pf->main_vsi);
 
        /* shutdown the adminq */
@@ -2444,77 +2457,139 @@ i40e_dev_set_link_down(struct rte_eth_dev *dev)
        return i40e_phy_conf_link(hw, abilities, speed, false);
 }
 
-int
-i40e_dev_link_update(struct rte_eth_dev *dev,
-                    int wait_to_complete)
+static __rte_always_inline void
+update_link_no_wait(struct i40e_hw *hw, struct rte_eth_link *link)
+{
+/* Link status registers and values*/
+#define I40E_PRTMAC_LINKSTA            0x001E2420
+#define I40E_REG_LINK_UP               0x40000080
+#define I40E_PRTMAC_MACC               0x001E24E0
+#define I40E_REG_MACC_25GB             0x00020000
+#define I40E_REG_SPEED_MASK            0x38000000
+#define I40E_REG_SPEED_100MB           0x00000000
+#define I40E_REG_SPEED_1GB             0x08000000
+#define I40E_REG_SPEED_10GB            0x10000000
+#define I40E_REG_SPEED_20GB            0x20000000
+#define I40E_REG_SPEED_25_40GB         0x18000000
+       uint32_t link_speed;
+       uint32_t reg_val;
+
+       reg_val = I40E_READ_REG(hw, I40E_PRTMAC_LINKSTA);
+       link_speed = reg_val & I40E_REG_SPEED_MASK;
+       reg_val &= I40E_REG_LINK_UP;
+       link->link_status = (reg_val == I40E_REG_LINK_UP) ? 1 : 0;
+
+       if (unlikely(link->link_status == 0))
+               return;
+
+       /* Parse the link status */
+       switch (link_speed) {
+       case I40E_REG_SPEED_100MB:
+               link->link_speed = ETH_SPEED_NUM_100M;
+               break;
+       case I40E_REG_SPEED_1GB:
+               link->link_speed = ETH_SPEED_NUM_1G;
+               break;
+       case I40E_REG_SPEED_10GB:
+               link->link_speed = ETH_SPEED_NUM_10G;
+               break;
+       case I40E_REG_SPEED_20GB:
+               link->link_speed = ETH_SPEED_NUM_20G;
+               break;
+       case I40E_REG_SPEED_25_40GB:
+               reg_val = I40E_READ_REG(hw, I40E_PRTMAC_MACC);
+
+               if (reg_val & I40E_REG_MACC_25GB)
+                       link->link_speed = ETH_SPEED_NUM_25G;
+               else
+                       link->link_speed = ETH_SPEED_NUM_40G;
+
+               break;
+       default:
+               PMD_DRV_LOG(ERR, "Unknown link speed info %u", link_speed);
+               break;
+       }
+}
+
+static __rte_always_inline void
+update_link_wait(struct i40e_hw *hw, struct rte_eth_link *link,
+       bool enable_lse)
 {
-#define CHECK_INTERVAL 100  /* 100ms */
-#define MAX_REPEAT_TIME 10  /* 1s (10 * 100ms) in total */
-       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+#define CHECK_INTERVAL             100  /* 100ms */
+#define MAX_REPEAT_TIME            10  /* 1s (10 * 100ms) in total */
+       uint32_t rep_cnt = MAX_REPEAT_TIME;
        struct i40e_link_status link_status;
-       struct rte_eth_link link, old;
        int status;
-       unsigned rep_cnt = MAX_REPEAT_TIME;
-       bool enable_lse = dev->data->dev_conf.intr_conf.lsc ? true : false;
 
-       memset(&link, 0, sizeof(link));
-       memset(&old, 0, sizeof(old));
        memset(&link_status, 0, sizeof(link_status));
-       rte_i40e_dev_atomic_read_link_status(dev, &old);
 
        do {
                /* Get link status information from hardware */
                status = i40e_aq_get_link_info(hw, enable_lse,
                                                &link_status, NULL);
-               if (status != I40E_SUCCESS) {
-                       link.link_speed = ETH_SPEED_NUM_100M;
-                       link.link_duplex = ETH_LINK_FULL_DUPLEX;
+               if (unlikely(status != I40E_SUCCESS)) {
+                       link->link_speed = ETH_SPEED_NUM_100M;
+                       link->link_duplex = ETH_LINK_FULL_DUPLEX;
                        PMD_DRV_LOG(ERR, "Failed to get link info");
-                       goto out;
+                       return;
                }
 
-               link.link_status = link_status.link_info & I40E_AQ_LINK_UP;
-               if (!wait_to_complete || link.link_status)
+               link->link_status = link_status.link_info & I40E_AQ_LINK_UP;
+               if (unlikely(link->link_status != 0))
                        break;
 
                rte_delay_ms(CHECK_INTERVAL);
        } while (--rep_cnt);
 
-       if (!link.link_status)
-               goto out;
-
-       /* i40e uses full duplex only */
-       link.link_duplex = ETH_LINK_FULL_DUPLEX;
-
        /* Parse the link status */
        switch (link_status.link_speed) {
        case I40E_LINK_SPEED_100MB:
-               link.link_speed = ETH_SPEED_NUM_100M;
+               link->link_speed = ETH_SPEED_NUM_100M;
                break;
        case I40E_LINK_SPEED_1GB:
-               link.link_speed = ETH_SPEED_NUM_1G;
+               link->link_speed = ETH_SPEED_NUM_1G;
                break;
        case I40E_LINK_SPEED_10GB:
-               link.link_speed = ETH_SPEED_NUM_10G;
+               link->link_speed = ETH_SPEED_NUM_10G;
                break;
        case I40E_LINK_SPEED_20GB:
-               link.link_speed = ETH_SPEED_NUM_20G;
+               link->link_speed = ETH_SPEED_NUM_20G;
                break;
        case I40E_LINK_SPEED_25GB:
-               link.link_speed = ETH_SPEED_NUM_25G;
+               link->link_speed = ETH_SPEED_NUM_25G;
                break;
        case I40E_LINK_SPEED_40GB:
-               link.link_speed = ETH_SPEED_NUM_40G;
+               link->link_speed = ETH_SPEED_NUM_40G;
                break;
        default:
-               link.link_speed = ETH_SPEED_NUM_100M;
+               link->link_speed = ETH_SPEED_NUM_100M;
                break;
        }
+}
 
+int
+i40e_dev_link_update(struct rte_eth_dev *dev,
+                    int wait_to_complete)
+{
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_eth_link link, old;
+       bool enable_lse = dev->data->dev_conf.intr_conf.lsc ? true : false;
+
+       memset(&link, 0, sizeof(link));
+       memset(&old, 0, sizeof(old));
+
+       rte_i40e_dev_atomic_read_link_status(dev, &old);
+
+       /* i40e uses full duplex only */
+       link.link_duplex = ETH_LINK_FULL_DUPLEX;
        link.link_autoneg = !(dev->data->dev_conf.link_speeds &
                        ETH_LINK_SPEED_FIXED);
 
-out:
+       if (!wait_to_complete)
+               update_link_no_wait(hw, &link);
+       else
+               update_link_wait(hw, &link, enable_lse);
+
        rte_i40e_dev_atomic_write_link_status(dev, &link);
        if (link.link_status == old.link_status)
                return -1;
@@ -11329,7 +11404,8 @@ i40e_find_customized_pctype(struct i40e_pf *pf, uint8_t index)
 static int
 i40e_update_customized_pctype(struct rte_eth_dev *dev, uint8_t *pkg,
                              uint32_t pkg_size, uint32_t proto_num,
-                             struct rte_pmd_i40e_proto_info *proto)
+                             struct rte_pmd_i40e_proto_info *proto,
+                             enum rte_pmd_i40e_package_op op)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        uint32_t pctype_num;
@@ -11342,6 +11418,12 @@ i40e_update_customized_pctype(struct rte_eth_dev *dev, uint8_t *pkg,
        uint32_t i, j, n;
        int ret;
 
+       if (op != RTE_PMD_I40E_PKG_OP_WR_ADD &&
+           op != RTE_PMD_I40E_PKG_OP_WR_DEL) {
+               PMD_DRV_LOG(ERR, "Unsupported operation.");
+               return -1;
+       }
+
        ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
                                (uint8_t *)&pctype_num, sizeof(pctype_num),
                                RTE_PMD_I40E_PKG_INFO_PCTYPE_NUM);
@@ -11404,8 +11486,13 @@ i40e_update_customized_pctype(struct rte_eth_dev *dev, uint8_t *pkg,
                                i40e_find_customized_pctype(pf,
                                                      I40E_CUSTOMIZED_GTPU);
                if (new_pctype) {
-                       new_pctype->pctype = pctype_value;
-                       new_pctype->valid = true;
+                       if (op == RTE_PMD_I40E_PKG_OP_WR_ADD) {
+                               new_pctype->pctype = pctype_value;
+                               new_pctype->valid = true;
+                       } else {
+                               new_pctype->pctype = I40E_FILTER_PCTYPE_INVALID;
+                               new_pctype->valid = false;
+                       }
                }
        }
 
@@ -11415,8 +11502,9 @@ i40e_update_customized_pctype(struct rte_eth_dev *dev, uint8_t *pkg,
 
 static int
 i40e_update_customized_ptype(struct rte_eth_dev *dev, uint8_t *pkg,
-                              uint32_t pkg_size, uint32_t proto_num,
-                              struct rte_pmd_i40e_proto_info *proto)
+                            uint32_t pkg_size, uint32_t proto_num,
+                            struct rte_pmd_i40e_proto_info *proto,
+                            enum rte_pmd_i40e_package_op op)
 {
        struct rte_pmd_i40e_ptype_mapping *ptype_mapping;
        uint16_t port_id = dev->data->port_id;
@@ -11429,6 +11517,17 @@ i40e_update_customized_ptype(struct rte_eth_dev *dev, uint8_t *pkg,
        bool inner_ip;
        int ret;
 
+       if (op != RTE_PMD_I40E_PKG_OP_WR_ADD &&
+           op != RTE_PMD_I40E_PKG_OP_WR_DEL) {
+               PMD_DRV_LOG(ERR, "Unsupported operation.");
+               return -1;
+       }
+
+       if (op == RTE_PMD_I40E_PKG_OP_WR_DEL) {
+               rte_pmd_i40e_ptype_mapping_reset(port_id);
+               return 0;
+       }
+
        /* get information about new ptype num */
        ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
                                (uint8_t *)&ptype_num, sizeof(ptype_num),
@@ -11547,7 +11646,7 @@ i40e_update_customized_ptype(struct rte_eth_dev *dev, uint8_t *pkg,
 
 void
 i40e_update_customized_info(struct rte_eth_dev *dev, uint8_t *pkg,
-                             uint32_t pkg_size)
+                           uint32_t pkg_size, enum rte_pmd_i40e_package_op op)
 {
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        uint32_t proto_num;
@@ -11556,6 +11655,12 @@ i40e_update_customized_info(struct rte_eth_dev *dev, uint8_t *pkg,
        uint32_t i;
        int ret;
 
+       if (op != RTE_PMD_I40E_PKG_OP_WR_ADD &&
+           op != RTE_PMD_I40E_PKG_OP_WR_DEL) {
+               PMD_DRV_LOG(ERR, "Unsupported operation.");
+               return;
+       }
+
        /* get information about protocol number */
        ret = rte_pmd_i40e_get_ddp_info(pkg, pkg_size,
                                       (uint8_t *)&proto_num, sizeof(proto_num),
@@ -11589,20 +11694,23 @@ i40e_update_customized_info(struct rte_eth_dev *dev, uint8_t *pkg,
        /* Check if GTP is supported. */
        for (i = 0; i < proto_num; i++) {
                if (!strncmp(proto[i].name, "GTP", 3)) {
-                       pf->gtp_support = true;
+                       if (op == RTE_PMD_I40E_PKG_OP_WR_ADD)
+                               pf->gtp_support = true;
+                       else
+                               pf->gtp_support = false;
                        break;
                }
        }
 
        /* Update customized pctype info */
        ret = i40e_update_customized_pctype(dev, pkg, pkg_size,
-                                           proto_num, proto);
+                                           proto_num, proto, op);
        if (ret)
                PMD_DRV_LOG(INFO, "No pctype is updated.");
 
        /* Update customized ptype info */
        ret = i40e_update_customized_ptype(dev, pkg, pkg_size,
-                                          proto_num, proto);
+                                          proto_num, proto, op);
        if (ret)
                PMD_DRV_LOG(INFO, "No ptype is updated.");
 
index 229c974..2c107e2 100644 (file)
@@ -40,6 +40,7 @@
 #include <rte_hash.h>
 #include <rte_flow_driver.h>
 #include <rte_tm_driver.h>
+#include "rte_pmd_i40e.h"
 
 #define I40E_VLAN_TAG_SIZE        4
 
@@ -1221,7 +1222,8 @@ void i40e_tm_conf_uninit(struct rte_eth_dev *dev);
 struct i40e_customized_pctype*
 i40e_find_customized_pctype(struct i40e_pf *pf, uint8_t index);
 void i40e_update_customized_info(struct rte_eth_dev *dev, uint8_t *pkg,
-                                uint32_t pkg_size);
+                                uint32_t pkg_size,
+                                enum rte_pmd_i40e_package_op op);
 int i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb);
 int i40e_flush_queue_region_all_conf(struct rte_eth_dev *dev,
                struct i40e_hw *hw, struct i40e_pf *pf, uint16_t on);
index 37380e6..4ebf925 100644 (file)
@@ -2418,7 +2418,7 @@ i40e_flow_fdir_get_pctype_value(struct i40e_pf *pf,
                break;
        }
 
-       if (cus_pctype)
+       if (cus_pctype && cus_pctype->valid)
                return cus_pctype->pctype;
 
        return I40E_FILTER_PCTYPE_INVALID;
index f726a9c..ab1163c 100644 (file)
@@ -1632,8 +1632,6 @@ rte_pmd_i40e_process_ddp_package(uint16_t port, uint8_t *buff,
                return -EINVAL;
        }
 
-       i40e_update_customized_info(dev, buff, size);
-
        /* Find metadata segment */
        metadata_seg_hdr = i40e_find_segment_in_package(SEGMENT_TYPE_METADATA,
                                                        pkg_hdr);
@@ -1737,6 +1735,10 @@ rte_pmd_i40e_process_ddp_package(uint16_t port, uint8_t *buff,
                }
        }
 
+       if (op == RTE_PMD_I40E_PKG_OP_WR_ADD ||
+           op == RTE_PMD_I40E_PKG_OP_WR_DEL)
+               i40e_update_customized_info(dev, buff, size, op);
+
        rte_free(profile_info_sec);
        return status;
 }
index f219866..d7eb458 100644 (file)
@@ -1366,6 +1366,8 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
        struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
        struct ixgbe_hw *hw;
+       int retries = 0;
+       int ret;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1386,8 +1388,20 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
 
        /* disable uio intr before callback unregister */
        rte_intr_disable(intr_handle);
-       rte_intr_callback_unregister(intr_handle,
-                                    ixgbe_dev_interrupt_handler, eth_dev);
+
+       do {
+               ret = rte_intr_callback_unregister(intr_handle,
+                               ixgbe_dev_interrupt_handler, eth_dev);
+               if (ret >= 0) {
+                       break;
+               } else if (ret != -EAGAIN) {
+                       PMD_INIT_LOG(ERR,
+                               "intr callback unregister failed: %d",
+                               ret);
+                       return ret;
+               }
+               rte_delay_ms(100);
+       } while (retries++ < (10 + IXGBE_LINK_UP_TIME));
 
        /* uninitialize PF if max_vfs not zero */
        ixgbe_pf_host_uninit(eth_dev);
@@ -2316,11 +2330,6 @@ ixgbe_check_mq_mode(struct rte_eth_dev *dev)
                if (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) {
                        const struct rte_eth_dcb_rx_conf *conf;
 
-                       if (nb_rx_q != IXGBE_DCB_NB_QUEUES) {
-                               PMD_INIT_LOG(ERR, "DCB selected, nb_rx_q != %d.",
-                                                IXGBE_DCB_NB_QUEUES);
-                               return -EINVAL;
-                       }
                        conf = &dev_conf->rx_adv_conf.dcb_rx_conf;
                        if (!(conf->nb_tcs == ETH_4_TCS ||
                               conf->nb_tcs == ETH_8_TCS)) {
@@ -2334,11 +2343,6 @@ ixgbe_check_mq_mode(struct rte_eth_dev *dev)
                if (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB) {
                        const struct rte_eth_dcb_tx_conf *conf;
 
-                       if (nb_tx_q != IXGBE_DCB_NB_QUEUES) {
-                               PMD_INIT_LOG(ERR, "DCB, nb_tx_q != %d.",
-                                                IXGBE_DCB_NB_QUEUES);
-                               return -EINVAL;
-                       }
                        conf = &dev_conf->tx_adv_conf.dcb_tx_conf;
                        if (!(conf->nb_tcs == ETH_4_TCS ||
                               conf->nb_tcs == ETH_8_TCS)) {
@@ -3886,7 +3890,7 @@ ixgbevf_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
        /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs
         * before the link status is correct
         */
-       if (mac->type == ixgbe_mac_82599_vf) {
+       if (mac->type == ixgbe_mac_82599_vf && wait_to_complete) {
                int i;
 
                for (i = 0; i < 5; i++) {
@@ -5822,8 +5826,12 @@ ixgbe_configure_msix(struct rte_eth_dev *dev)
 
        /* won't configure msix register if no mapping is done
         * between intr vector and event fd
+        * but if misx has been enabled already, need to configure
+        * auto clean, auto mask and throttling.
         */
-       if (!rte_intr_dp_is_en(intr_handle))
+       gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
+       if (!rte_intr_dp_is_en(intr_handle) &&
+           !(gpie & (IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT)))
                return;
 
        if (rte_intr_allow_others(intr_handle))
@@ -5847,27 +5855,30 @@ ixgbe_configure_msix(struct rte_eth_dev *dev)
        /* Populate the IVAR table and set the ITR values to the
         * corresponding register.
         */
-       for (queue_id = 0; queue_id < dev->data->nb_rx_queues;
-            queue_id++) {
-               /* by default, 1:1 mapping */
-               ixgbe_set_ivar_map(hw, 0, queue_id, vec);
-               intr_handle->intr_vec[queue_id] = vec;
-               if (vec < base + intr_handle->nb_efd - 1)
-                       vec++;
-       }
+       if (rte_intr_dp_is_en(intr_handle)) {
+               for (queue_id = 0; queue_id < dev->data->nb_rx_queues;
+                       queue_id++) {
+                       /* by default, 1:1 mapping */
+                       ixgbe_set_ivar_map(hw, 0, queue_id, vec);
+                       intr_handle->intr_vec[queue_id] = vec;
+                       if (vec < base + intr_handle->nb_efd - 1)
+                               vec++;
+               }
 
-       switch (hw->mac.type) {
-       case ixgbe_mac_82598EB:
-               ixgbe_set_ivar_map(hw, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
-                                  IXGBE_MISC_VEC_ID);
-               break;
-       case ixgbe_mac_82599EB:
-       case ixgbe_mac_X540:
-       case ixgbe_mac_X550:
-               ixgbe_set_ivar_map(hw, -1, 1, IXGBE_MISC_VEC_ID);
-               break;
-       default:
-               break;
+               switch (hw->mac.type) {
+               case ixgbe_mac_82598EB:
+                       ixgbe_set_ivar_map(hw, -1,
+                                          IXGBE_IVAR_OTHER_CAUSES_INDEX,
+                                          IXGBE_MISC_VEC_ID);
+                       break;
+               case ixgbe_mac_82599EB:
+               case ixgbe_mac_X540:
+               case ixgbe_mac_X550:
+                       ixgbe_set_ivar_map(hw, -1, 1, IXGBE_MISC_VEC_ID);
+                       break;
+               default:
+                       break;
+               }
        }
        IXGBE_WRITE_REG(hw, IXGBE_EITR(IXGBE_MISC_VEC_ID),
                        IXGBE_MIN_INTER_INTERRUPT_INTERVAL_DEFAULT & 0xFFF);
index c1a2ea5..5c79501 100644 (file)
@@ -90,7 +90,7 @@ static const struct rte_eth_link pmd_link = {
                .link_speed = ETH_SPEED_NUM_10G,
                .link_duplex = ETH_LINK_FULL_DUPLEX,
                .link_status = ETH_LINK_DOWN,
-               .link_autoneg = ETH_LINK_AUTONEG,
+               .link_autoneg = ETH_LINK_FIXED,
 };
 static int is_kni_initialized;
 
index 84b8a32..0ca491b 100644 (file)
@@ -1479,6 +1479,11 @@ lio_dev_start(struct rte_eth_dev *eth_dev)
        /* Configure RSS if device configured with multiple RX queues. */
        lio_dev_mq_rx_configure(eth_dev);
 
+       /* Before update the link info,
+        * must set linfo.link.link_status64 to 0.
+        */
+       lio_dev->linfo.link.link_status64 = 0;
+
        /* start polling for lsc */
        ret = rte_eal_alarm_set(LIO_LSC_TIMEOUT,
                                lio_sync_link_state_check,
index 97dac64..4d7bd5f 100644 (file)
@@ -85,6 +85,8 @@ const char *pmd_mlx4_init_params[] = {
        NULL,
 };
 
+static void mlx4_dev_stop(struct rte_eth_dev *dev);
+
 /**
  * DPDK callback for Ethernet device configuration.
  *
@@ -108,7 +110,13 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
                      " flow error type %d, cause %p, message: %s",
                      -ret, strerror(-ret), error.type, error.cause,
                      error.message ? error.message : "(unspecified)");
+               goto exit;
        }
+       ret = mlx4_intr_install(priv);
+       if (ret)
+               ERROR("%p: interrupt handler installation failed",
+                     (void *)dev);
+exit:
        return ret;
 }
 
@@ -141,7 +149,7 @@ mlx4_dev_start(struct rte_eth_dev *dev)
                      (void *)dev, strerror(-ret));
                goto err;
        }
-       ret = mlx4_intr_install(priv);
+       ret = mlx4_rxq_intr_enable(priv);
        if (ret) {
                ERROR("%p: interrupt handler installation failed",
                     (void *)dev);
@@ -161,8 +169,7 @@ mlx4_dev_start(struct rte_eth_dev *dev)
        dev->rx_pkt_burst = mlx4_rx_burst;
        return 0;
 err:
-       /* Rollback. */
-       priv->started = 0;
+       mlx4_dev_stop(dev);
        return ret;
 }
 
@@ -187,7 +194,7 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
        dev->rx_pkt_burst = mlx4_rx_burst_removed;
        rte_wmb();
        mlx4_flow_sync(priv, NULL);
-       mlx4_intr_uninstall(priv);
+       mlx4_rxq_intr_disable(priv);
        mlx4_rss_deinit(priv);
 }
 
@@ -212,6 +219,7 @@ mlx4_dev_close(struct rte_eth_dev *dev)
        dev->tx_pkt_burst = mlx4_tx_burst_removed;
        rte_wmb();
        mlx4_flow_clean(priv);
+       mlx4_rss_deinit(priv);
        for (i = 0; i != dev->data->nb_rx_queues; ++i)
                mlx4_rx_queue_release(dev->data->rx_queues[i]);
        for (i = 0; i != dev->data->nb_tx_queues; ++i)
@@ -336,7 +344,7 @@ mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf)
                return -rte_errno;
        }
        if (strcmp(MLX4_PMD_PORT_KVARG, key) == 0) {
-               uint32_t ports = rte_log2_u32(conf->ports.present);
+               uint32_t ports = rte_log2_u32(conf->ports.present + 1);
 
                if (tmp >= ports) {
                        ERROR("port index %lu outside range [0,%" PRIu32 ")",
index 3aeef87..41d652b 100644 (file)
@@ -126,6 +126,7 @@ struct priv {
        uint32_t vf:1; /**< This is a VF device. */
        uint32_t intr_alarm:1; /**< An interrupt alarm is scheduled. */
        uint32_t isolated:1; /**< Toggle isolated mode. */
+       uint32_t rss_init:1; /**< Common RSS context is initialized. */
        uint32_t hw_csum:1; /* Checksum offload is supported. */
        uint32_t hw_csum_l2tun:1; /* Checksum support for L2 tunnels. */
        struct rte_intr_handle intr_handle; /**< Port interrupt handle. */
@@ -170,6 +171,8 @@ const uint32_t *mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev);
 
 int mlx4_intr_uninstall(struct priv *priv);
 int mlx4_intr_install(struct priv *priv);
+int mlx4_rxq_intr_enable(struct priv *priv);
+void mlx4_rxq_intr_disable(struct priv *priv);
 int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx);
 int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx);
 
index 2f69e7d..89f552c 100644 (file)
@@ -158,167 +158,6 @@ try_dev_id:
        return 0;
 }
 
-/**
- * Read from sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[out] buf
- *   Data output buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   Number of bytes read on success, negative errno value otherwise and
- *   rte_errno is set.
- */
-static int
-mlx4_sysfs_read(const struct priv *priv, const char *entry,
-               char *buf, size_t size)
-{
-       char ifname[IF_NAMESIZE];
-       FILE *file;
-       int ret;
-
-       ret = mlx4_get_ifname(priv, &ifname);
-       if (ret)
-               return ret;
-
-       MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
-             ifname, entry);
-
-       file = fopen(path, "rb");
-       if (file == NULL) {
-               rte_errno = errno;
-               return -rte_errno;
-       }
-       ret = fread(buf, 1, size, file);
-       if ((size_t)ret < size && ferror(file)) {
-               rte_errno = EIO;
-               ret = -rte_errno;
-       } else {
-               ret = size;
-       }
-       fclose(file);
-       return ret;
-}
-
-/**
- * Write to sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[in] buf
- *   Data buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   Number of bytes written on success, negative errno value otherwise and
- *   rte_errno is set.
- */
-static int
-mlx4_sysfs_write(const struct priv *priv, const char *entry,
-                char *buf, size_t size)
-{
-       char ifname[IF_NAMESIZE];
-       FILE *file;
-       int ret;
-
-       ret = mlx4_get_ifname(priv, &ifname);
-       if (ret)
-               return ret;
-
-       MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
-             ifname, entry);
-
-       file = fopen(path, "wb");
-       if (file == NULL) {
-               rte_errno = errno;
-               return -rte_errno;
-       }
-       ret = fwrite(buf, 1, size, file);
-       if ((size_t)ret < size || ferror(file)) {
-               rte_errno = EIO;
-               ret = -rte_errno;
-       } else {
-               ret = size;
-       }
-       fclose(file);
-       return ret;
-}
-
-/**
- * Get unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param[out] value
- *   Value output buffer.
- *
- * @return
- *   0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
-{
-       int ret;
-       unsigned long value_ret;
-       char value_str[32];
-
-       ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
-       if (ret < 0) {
-               DEBUG("cannot read %s value from sysfs: %s",
-                     name, strerror(rte_errno));
-               return ret;
-       }
-       value_str[ret] = '\0';
-       errno = 0;
-       value_ret = strtoul(value_str, NULL, 0);
-       if (errno) {
-               rte_errno = errno;
-               DEBUG("invalid %s value `%s': %s", name, value_str,
-                     strerror(rte_errno));
-               return -rte_errno;
-       }
-       *value = value_ret;
-       return 0;
-}
-
-/**
- * Set unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param value
- *   Value to set.
- *
- * @return
- *   0 on success, negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
-{
-       int ret;
-       MKSTR(value_str, "%lu", value);
-
-       ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
-       if (ret < 0) {
-               DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
-                     name, value_str, value, strerror(rte_errno));
-               return ret;
-       }
-       return 0;
-}
-
 /**
  * Perform ifreq ioctl() on associated Ethernet device.
  *
@@ -388,12 +227,12 @@ mlx4_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
 int
 mlx4_mtu_get(struct priv *priv, uint16_t *mtu)
 {
-       unsigned long ulong_mtu = 0;
-       int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
+       struct ifreq request;
+       int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
 
        if (ret)
                return ret;
-       *mtu = ulong_mtu;
+       *mtu = request.ifr_mtu;
        return 0;
 }
 
@@ -412,20 +251,13 @@ int
 mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 {
        struct priv *priv = dev->data->dev_private;
-       uint16_t new_mtu;
-       int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
+       struct ifreq request = { .ifr_mtu = mtu, };
+       int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
 
        if (ret)
                return ret;
-       ret = mlx4_mtu_get(priv, &new_mtu);
-       if (ret)
-               return ret;
-       if (new_mtu == mtu) {
-               priv->mtu = mtu;
-               return 0;
-       }
-       rte_errno = EINVAL;
-       return -rte_errno;
+       priv->mtu = mtu;
+       return 0;
 }
 
 /**
@@ -444,14 +276,14 @@ mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 static int
 mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
 {
-       unsigned long tmp = 0;
-       int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
+       struct ifreq request;
+       int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
 
        if (ret)
                return ret;
-       tmp &= keep;
-       tmp |= (flags & (~keep));
-       return mlx4_set_sysfs_ulong(priv, "flags", tmp);
+       request.ifr_flags &= keep;
+       request.ifr_flags |= flags & ~keep;
+       return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
 }
 
 /**
index e81e24d..fc0f061 100644 (file)
@@ -116,7 +116,7 @@ static uint64_t
 mlx4_conv_rss_hf(uint64_t rss_hf)
 {
        enum { IPV4, IPV6, TCP, UDP, };
-       const uint64_t in[] = {
+       static const uint64_t in[] = {
                [IPV4] = (ETH_RSS_IPV4 |
                          ETH_RSS_FRAG_IPV4 |
                          ETH_RSS_NONFRAG_IPV4_TCP |
@@ -139,7 +139,7 @@ mlx4_conv_rss_hf(uint64_t rss_hf)
                 */
                [UDP] = 0,
        };
-       const uint64_t out[RTE_DIM(in)] = {
+       static const uint64_t out[RTE_DIM(in)] = {
                [IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
                [IPV6] = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
                [TCP] = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
@@ -379,6 +379,9 @@ error:
  * Additional mlx4-specific constraints on supported fields:
  *
  * - No support for partial masks.
+ * - Due to HW/FW limitation, flow rule priority is not taken into account
+ *   when matching UDP destination ports, doing is therefore only supported
+ *   at the highest priority level (0).
  *
  * @param[in, out] flow
  *   Flow rule handle to update.
@@ -410,6 +413,11 @@ mlx4_flow_merge_udp(struct rte_flow *flow,
                msg = "mlx4 does not support matching partial UDP fields";
                goto error;
        }
+       if (mask && mask->hdr.dst_port && flow->priority) {
+               msg = "combining UDP destination port matching with a nonzero"
+                       " priority level is not supported";
+               goto error;
+       }
        if (!flow->ibv_attr)
                return 0;
        ++flow->ibv_attr->num_of_specs;
@@ -674,6 +682,7 @@ mlx4_flow_prepare(struct priv *priv,
                         NULL, "only ingress is supported");
 fill:
        proc = mlx4_flow_proc_item_list;
+       flow->priority = attr->priority;
        /* Go over pattern. */
        for (item = pattern; item->type; ++item) {
                const struct mlx4_flow_proc_item *next = NULL;
@@ -839,11 +848,14 @@ fill:
                        },
                };
 
-               if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
+               if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) {
+                       if (temp.rss)
+                               mlx4_rss_put(temp.rss);
                        return rte_flow_error_set
                                (error, -rte_errno,
                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
                                 "flow rule handle allocation failure");
+               }
                /* Most fields will be updated by second pass. */
                *flow = (struct rte_flow){
                        .ibv_attr = temp.ibv_attr,
@@ -1217,9 +1229,12 @@ mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan)
  *
  * Various flow rules are created depending on the mode the device is in:
  *
- * 1. Promiscuous: port MAC + catch-all (VLAN filtering is ignored).
- * 2. All multicast: port MAC/VLAN + catch-all multicast.
- * 3. Otherwise: port MAC/VLAN + broadcast MAC/VLAN.
+ * 1. Promiscuous:
+ *       port MAC + broadcast + catch-all (VLAN filtering is ignored).
+ * 2. All multicast:
+ *       port MAC/VLAN + broadcast + catch-all multicast.
+ * 3. Otherwise:
+ *       port MAC/VLAN + broadcast MAC/VLAN.
  *
  * About MAC flow rules:
  *
@@ -1298,9 +1313,6 @@ mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
                !priv->dev->data->promiscuous ?
                &vlan_spec.tci :
                NULL;
-       int broadcast =
-               !priv->dev->data->promiscuous &&
-               !priv->dev->data->all_multicast;
        uint16_t vlan = 0;
        struct rte_flow *flow;
        unsigned int i;
@@ -1334,7 +1346,7 @@ next_vlan:
                        rule_vlan = NULL;
                }
        }
-       for (i = 0; i != RTE_DIM(priv->mac) + broadcast; ++i) {
+       for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
                const struct ether_addr *mac;
 
                /* Broadcasts are handled by an extra iteration. */
@@ -1398,7 +1410,7 @@ next_vlan:
                        goto next_vlan;
        }
        /* Take care of promiscuous and all multicast flow rules. */
-       if (!broadcast) {
+       if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
                for (flow = LIST_FIRST(&priv->flows);
                     flow && flow->internal;
                     flow = LIST_NEXT(flow, next)) {
index 651fd37..5e1f9ea 100644 (file)
@@ -70,6 +70,7 @@ struct rte_flow {
        uint32_t promisc:1; /**< This rule matches everything. */
        uint32_t allmulti:1; /**< This rule matches all multicast traffic. */
        uint32_t drop:1; /**< This rule drops packets. */
+       uint32_t priority; /**< Flow rule priority. */
        struct mlx4_rss *rss; /**< Rx target. */
 };
 
index 50d1976..2364cb2 100644 (file)
@@ -291,7 +291,7 @@ mlx4_intr_uninstall(struct priv *priv)
        }
        rte_eal_alarm_cancel((void (*)(void *))mlx4_link_status_alarm, priv);
        priv->intr_alarm = 0;
-       mlx4_rx_intr_vec_disable(priv);
+       mlx4_rxq_intr_disable(priv);
        rte_errno = err;
        return 0;
 }
@@ -313,8 +313,6 @@ mlx4_intr_install(struct priv *priv)
        int rc;
 
        mlx4_intr_uninstall(priv);
-       if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
-               goto error;
        if (intr_conf->lsc | intr_conf->rmv) {
                priv->intr_handle.fd = priv->ctx->async_fd;
                rc = rte_intr_callback_register(&priv->intr_handle,
@@ -395,3 +393,40 @@ mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
        }
        return -ret;
 }
+
+/**
+ * Enable datapath interrupts.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_rxq_intr_enable(struct priv *priv)
+{
+       const struct rte_intr_conf *const intr_conf =
+               &priv->dev->data->dev_conf.intr_conf;
+
+       if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0)
+               goto error;
+       return 0;
+error:
+       return -rte_errno;
+}
+
+/**
+ * Disable datapath interrupts, keeping other interrupts intact.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+mlx4_rxq_intr_disable(struct priv *priv)
+{
+       int err = rte_errno; /* Make sure rte_errno remains unchanged. */
+
+       mlx4_rx_intr_vec_disable(priv);
+       rte_errno = err;
+}
index 53313c5..06030c2 100644 (file)
@@ -363,6 +363,8 @@ mlx4_rss_init(struct priv *priv)
        unsigned int i;
        int ret;
 
+       if (priv->rss_init)
+               return 0;
        /* Prepare range for RSS contexts before creating the first WQ. */
        ret = mlx4dv_set_context_attr(priv->ctx,
                                      MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ,
@@ -444,6 +446,7 @@ wq_num_check:
                }
                wq_num_prev = wq_num;
        }
+       priv->rss_init = 1;
        return 0;
 error:
        ERROR("cannot initialize common RSS resources (queue %u): %s: %s",
@@ -472,6 +475,8 @@ mlx4_rss_deinit(struct priv *priv)
 {
        unsigned int i;
 
+       if (!priv->rss_init)
+               return;
        for (i = 0; i != priv->dev->data->nb_rx_queues; ++i) {
                struct rxq *rxq = priv->dev->data->rx_queues[i];
 
@@ -480,6 +485,7 @@ mlx4_rss_deinit(struct priv *priv)
                        mlx4_rxq_detach(rxq);
                }
        }
+       priv->rss_init = 0;
 }
 
 /**
@@ -622,6 +628,7 @@ error:
                claim_zero(ibv_destroy_wq(wq));
        if (cq)
                claim_zero(ibv_destroy_cq(cq));
+       --rxq->usecnt;
        rte_errno = ret;
        ERROR("error while attaching Rx queue %p: %s: %s",
              (void *)rxq, msg, strerror(ret));
index 92b6257..05c4892 100644 (file)
@@ -961,7 +961,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        /* Update packet information. */
                        pkt->packet_type =
                                rxq_cq_to_pkt_type(cqe, rxq->l2tun_offload);
-                       pkt->ol_flags = 0;
+                       pkt->ol_flags = PKT_RX_RSS_HASH;
+                       pkt->hash.rss = cqe->immed_rss_invalid;
                        pkt->pkt_len = len;
                        if (rxq->csum | rxq->csum_l2tun) {
                                uint32_t flags =
index 463df2b..5a390e8 100644 (file)
@@ -107,7 +107,7 @@ struct txq_elt {
        struct rte_mbuf *buf; /**< Buffer. */
 };
 
-/** Rx queue counters. */
+/** Tx queue counters. */
 struct mlx4_txq_stats {
        unsigned int idx; /**< Mapping index. */
        uint64_t opackets; /**< Total of successfully sent packets. */
index 45e0e8d..10ce335 100644 (file)
@@ -39,6 +39,7 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <net/if.h>
+#include <sys/mman.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -56,6 +57,7 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_common.h>
+#include <rte_eal_memconfig.h>
 #include <rte_kvargs.h>
 
 #include "mlx5.h"
@@ -117,6 +119,10 @@ struct mlx5_args {
        int tx_vec_en;
        int rx_vec_en;
 };
+
+/** Driver-specific log messages type. */
+int mlx5_logtype;
+
 /**
  * Retrieve integer value from environment variable.
  *
@@ -148,7 +154,7 @@ mlx5_getenv_int(const char *name)
  *   A pointer to the callback data.
  *
  * @return
- *   a pointer to the allocate space.
+ *   Allocated buffer, NULL otherwise and rte_errno is set.
  */
 static void *
 mlx5_alloc_verbs_buf(size_t size, void *data)
@@ -156,11 +162,22 @@ mlx5_alloc_verbs_buf(size_t size, void *data)
        struct priv *priv = data;
        void *ret;
        size_t alignment = sysconf(_SC_PAGESIZE);
+       unsigned int socket = SOCKET_ID_ANY;
+
+       if (priv->verbs_alloc_ctx.type == MLX5_VERBS_ALLOC_TYPE_TX_QUEUE) {
+               const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
+
+               socket = ctrl->socket;
+       } else if (priv->verbs_alloc_ctx.type ==
+                  MLX5_VERBS_ALLOC_TYPE_RX_QUEUE) {
+               const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj;
 
+               socket = ctrl->socket;
+       }
        assert(data != NULL);
-       ret = rte_malloc_socket(__func__, size, alignment,
-                               priv->dev->device->numa_node);
-       DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
+       ret = rte_malloc_socket(__func__, size, alignment, socket);
+       if (!ret && size)
+               rte_errno = ENOMEM;
        return ret;
 }
 
@@ -176,7 +193,6 @@ static void
 mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
 {
        assert(data != NULL);
-       DEBUG("Extern free request: %p", ptr);
        rte_free(ptr);
 }
 
@@ -191,17 +207,16 @@ mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
 static void
 mlx5_dev_close(struct rte_eth_dev *dev)
 {
-       struct priv *priv = mlx5_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
        int ret;
 
-       priv_lock(priv);
-       DEBUG("%p: closing device \"%s\"",
-             (void *)dev,
-             ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
+       DRV_LOG(DEBUG, "port %u closing device \"%s\"",
+               dev->data->port_id,
+               ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
        /* In case mlx5_dev_stop() has not been called. */
-       priv_dev_interrupt_handler_uninstall(priv, dev);
-       priv_dev_traffic_disable(priv, dev);
+       mlx5_dev_interrupt_handler_uninstall(dev);
+       mlx5_traffic_disable(dev);
        /* Prevent crashes when queues are still in use. */
        dev->rx_pkt_burst = removed_rx_burst;
        dev->tx_pkt_burst = removed_tx_burst;
@@ -209,7 +224,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                /* XXX race condition if mlx5_rx_burst() is still running. */
                usleep(1000);
                for (i = 0; (i != priv->rxqs_n); ++i)
-                       mlx5_priv_rxq_release(priv, i);
+                       mlx5_rxq_release(dev, i);
                priv->rxqs_n = 0;
                priv->rxqs = NULL;
        }
@@ -217,7 +232,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                /* XXX race condition if mlx5_tx_burst() is still running. */
                usleep(1000);
                for (i = 0; (i != priv->txqs_n); ++i)
-                       mlx5_priv_txq_release(priv, i);
+                       mlx5_txq_release(dev, i);
                priv->txqs_n = 0;
                priv->txqs = NULL;
        }
@@ -231,32 +246,40 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                rte_free(priv->rss_conf.rss_key);
        if (priv->reta_idx != NULL)
                rte_free(priv->reta_idx);
-       priv_socket_uninit(priv);
-       ret = mlx5_priv_hrxq_ibv_verify(priv);
+       if (priv->primary_socket)
+               mlx5_socket_uninit(dev);
+       ret = mlx5_hrxq_ibv_verify(dev);
        if (ret)
-               WARN("%p: some Hash Rx queue still remain", (void *)priv);
-       ret = mlx5_priv_ind_table_ibv_verify(priv);
+               DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
+                       dev->data->port_id);
+       ret = mlx5_ind_table_ibv_verify(dev);
        if (ret)
-               WARN("%p: some Indirection table still remain", (void *)priv);
-       ret = mlx5_priv_rxq_ibv_verify(priv);
+               DRV_LOG(WARNING, "port %u some indirection table still remain",
+                       dev->data->port_id);
+       ret = mlx5_rxq_ibv_verify(dev);
        if (ret)
-               WARN("%p: some Verbs Rx queue still remain", (void *)priv);
-       ret = mlx5_priv_rxq_verify(priv);
+               DRV_LOG(WARNING, "port %u some Verbs Rx queue still remain",
+                       dev->data->port_id);
+       ret = mlx5_rxq_verify(dev);
        if (ret)
-               WARN("%p: some Rx Queues still remain", (void *)priv);
-       ret = mlx5_priv_txq_ibv_verify(priv);
+               DRV_LOG(WARNING, "port %u some Rx queues still remain",
+                       dev->data->port_id);
+       ret = mlx5_txq_ibv_verify(dev);
        if (ret)
-               WARN("%p: some Verbs Tx queue still remain", (void *)priv);
-       ret = mlx5_priv_txq_verify(priv);
+               DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain",
+                       dev->data->port_id);
+       ret = mlx5_txq_verify(dev);
        if (ret)
-               WARN("%p: some Tx Queues still remain", (void *)priv);
-       ret = priv_flow_verify(priv);
+               DRV_LOG(WARNING, "port %u some Tx queues still remain",
+                       dev->data->port_id);
+       ret = mlx5_flow_verify(dev);
        if (ret)
-               WARN("%p: some flows still remain", (void *)priv);
-       ret = priv_mr_verify(priv);
+               DRV_LOG(WARNING, "port %u some flows still remain",
+                       dev->data->port_id);
+       ret = mlx5_mr_verify(dev);
        if (ret)
-               WARN("%p: some Memory Region still remain", (void *)priv);
-       priv_unlock(priv);
+               DRV_LOG(WARNING, "port %u some memory region still remain",
+                       dev->data->port_id);
        memset(priv, 0, sizeof(*priv));
 }
 
@@ -394,7 +417,7 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr)
  *   User data.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_args_check(const char *key, const char *val, void *opaque)
@@ -405,8 +428,9 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
        errno = 0;
        tmp = strtoul(val, NULL, 0);
        if (errno) {
-               WARN("%s: \"%s\" is not a valid integer", key, val);
-               return errno;
+               rte_errno = errno;
+               DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val);
+               return -rte_errno;
        }
        if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
                args->cqe_comp = !!tmp;
@@ -427,8 +451,9 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
        } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) {
                args->rx_vec_en = !!tmp;
        } else {
-               WARN("%s: unknown parameter", key);
-               return -EINVAL;
+               DRV_LOG(WARNING, "%s: unknown parameter", key);
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
        return 0;
 }
@@ -442,7 +467,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
  *   Device arguments structure.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs)
@@ -474,9 +499,10 @@ mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs)
                if (rte_kvargs_count(kvlist, params[i])) {
                        ret = rte_kvargs_process(kvlist, params[i],
                                                 mlx5_args_check, args);
-                       if (ret != 0) {
+                       if (ret) {
+                               rte_errno = EINVAL;
                                rte_kvargs_free(kvlist);
-                               return ret;
+                               return -rte_errno;
                        }
                }
        }
@@ -486,6 +512,112 @@ mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs)
 
 static struct rte_pci_driver mlx5_driver;
 
+/*
+ * Reserved UAR address space for TXQ UAR(hw doorbell) mapping, process
+ * local resource used by both primary and secondary to avoid duplicate
+ * reservation.
+ * The space has to be available on both primary and secondary process,
+ * TXQ UAR maps to this area using fixed mmap w/o double check.
+ */
+static void *uar_base;
+
+/**
+ * Reserve UAR address space for primary process.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_uar_init_primary(struct rte_eth_dev *dev)
+{
+       struct priv *priv = dev->data->dev_private;
+       void *addr = (void *)0;
+       int i;
+       const struct rte_mem_config *mcfg;
+
+       if (uar_base) { /* UAR address space mapped. */
+               priv->uar_base = uar_base;
+               return 0;
+       }
+       /* find out lower bound of hugepage segments */
+       mcfg = rte_eal_get_configuration()->mem_config;
+       for (i = 0; i < RTE_MAX_MEMSEG && mcfg->memseg[i].addr; i++) {
+               if (addr)
+                       addr = RTE_MIN(addr, mcfg->memseg[i].addr);
+               else
+                       addr = mcfg->memseg[i].addr;
+       }
+       /* keep distance to hugepages to minimize potential conflicts. */
+       addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE);
+       /* anonymous mmap, no real memory consumption. */
+       addr = mmap(addr, MLX5_UAR_SIZE,
+                   PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       if (addr == MAP_FAILED) {
+               DRV_LOG(ERR,
+                       "port %u failed to reserve UAR address space, please"
+                       " adjust MLX5_UAR_SIZE or try --base-virtaddr",
+                       dev->data->port_id);
+               rte_errno = ENOMEM;
+               return -rte_errno;
+       }
+       /* Accept either same addr or a new addr returned from mmap if target
+        * range occupied.
+        */
+       DRV_LOG(INFO, "port %u reserved UAR address space: %p",
+               dev->data->port_id, addr);
+       priv->uar_base = addr; /* for primary and secondary UAR re-mmap. */
+       uar_base = addr; /* process local, don't reserve again. */
+       return 0;
+}
+
+/**
+ * Reserve UAR address space for secondary process, align with
+ * primary process.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_uar_init_secondary(struct rte_eth_dev *dev)
+{
+       struct priv *priv = dev->data->dev_private;
+       void *addr;
+
+       assert(priv->uar_base);
+       if (uar_base) { /* already reserved. */
+               assert(uar_base == priv->uar_base);
+               return 0;
+       }
+       /* anonymous mmap, no real memory consumption. */
+       addr = mmap(priv->uar_base, MLX5_UAR_SIZE,
+                   PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       if (addr == MAP_FAILED) {
+               DRV_LOG(ERR, "port %u UAR mmap failed: %p size: %llu",
+                       dev->data->port_id, priv->uar_base, MLX5_UAR_SIZE);
+               rte_errno = ENXIO;
+               return -rte_errno;
+       }
+       if (priv->uar_base != addr) {
+               DRV_LOG(ERR,
+                       "port %u UAR address %p size %llu occupied, please"
+                       " adjust MLX5_UAR_OFFSET or try EAL parameter"
+                       " --base-virtaddr",
+                       dev->data->port_id, priv->uar_base, MLX5_UAR_SIZE);
+               rte_errno = ENXIO;
+               return -rte_errno;
+       }
+       uar_base = addr; /* process local, don't reserve again */
+       DRV_LOG(INFO, "port %u reserved UAR address space: %p",
+               dev->data->port_id, addr);
+       return 0;
+}
+
 /**
  * Assign parameters from args into priv, only non default
  * values are considered.
@@ -530,17 +662,17 @@ mlx5_args_assign(struct priv *priv, struct mlx5_args *args)
  *   PCI device information.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+              struct rte_pci_device *pci_dev)
 {
-       struct ibv_device **list;
+       struct ibv_device **list = NULL;
        struct ibv_device *ibv_dev;
        int err = 0;
        struct ibv_context *attr_ctx = NULL;
        struct ibv_device_attr_ex device_attr;
-       unsigned int sriov;
        unsigned int mps;
        unsigned int cqe_comp;
        unsigned int tunnel_en = 0;
@@ -551,24 +683,25 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
        struct ibv_counter_set_description cs_desc;
 #endif
 
-       (void)pci_drv;
        assert(pci_drv == &mlx5_driver);
        /* Get mlx5_dev[] index. */
        idx = mlx5_dev_idx(&pci_dev->addr);
        if (idx == -1) {
-               ERROR("this driver cannot support any more adapters");
-               return -ENOMEM;
+               DRV_LOG(ERR, "this driver cannot support any more adapters");
+               err = ENOMEM;
+               goto error;
        }
-       DEBUG("using driver device index %d", idx);
-
+       DRV_LOG(DEBUG, "using driver device index %d", idx);
        /* Save PCI address. */
        mlx5_dev[idx].pci_addr = pci_dev->addr;
        list = ibv_get_device_list(&i);
        if (list == NULL) {
                assert(errno);
+               err = errno;
                if (errno == ENOSYS)
-                       ERROR("cannot list devices, is ib_uverbs loaded?");
-               return -errno;
+                       DRV_LOG(ERR,
+                               "cannot list devices, is ib_uverbs loaded?");
+               goto error;
        }
        assert(i >= 0);
        /*
@@ -579,7 +712,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                struct rte_pci_addr pci_addr;
 
                --i;
-               DEBUG("checking device \"%s\"", list[i]->name);
+               DRV_LOG(DEBUG, "checking device \"%s\"", list[i]->name);
                if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr))
                        continue;
                if ((pci_dev->addr.domain != pci_addr.domain) ||
@@ -587,14 +720,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                    (pci_dev->addr.devid != pci_addr.devid) ||
                    (pci_dev->addr.function != pci_addr.function))
                        continue;
-               sriov = ((pci_dev->id.device_id ==
-                      PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) ||
-                     (pci_dev->id.device_id ==
-                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) ||
-                     (pci_dev->id.device_id ==
-                      PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) ||
-                     (pci_dev->id.device_id ==
-                      PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF));
                switch (pci_dev->id.device_id) {
                case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
                        tunnel_en = 1;
@@ -609,30 +734,29 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                default:
                        break;
                }
-               INFO("PCI information matches, using device \"%s\""
-                    " (SR-IOV: %s)",
-                    list[i]->name,
-                    sriov ? "true" : "false");
+               DRV_LOG(INFO, "PCI information matches, using device \"%s\"",
+                    list[i]->name);
                attr_ctx = ibv_open_device(list[i]);
-               err = errno;
+               rte_errno = errno;
+               err = rte_errno;
                break;
        }
        if (attr_ctx == NULL) {
-               ibv_free_device_list(list);
                switch (err) {
                case 0:
-                       ERROR("cannot access device, is mlx5_ib loaded?");
-                       return -ENODEV;
+                       DRV_LOG(ERR,
+                               "cannot access device, is mlx5_ib loaded?");
+                       err = ENODEV;
+                       break;
                case EINVAL:
-                       ERROR("cannot use device, are drivers up to date?");
-                       return -EINVAL;
+                       DRV_LOG(ERR,
+                               "cannot use device, are drivers up to date?");
+                       break;
                }
-               assert(err > 0);
-               return -err;
+               goto error;
        }
        ibv_dev = list[i];
-
-       DEBUG("device opened");
+       DRV_LOG(DEBUG, "device opened");
        /*
         * Multi-packet send is supported by ConnectX-4 Lx PF as well
         * as all ConnectX-5 devices.
@@ -640,14 +764,14 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
        mlx5dv_query_device(attr_ctx, &attrs_out);
        if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) {
                if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) {
-                       DEBUG("Enhanced MPW is supported");
+                       DRV_LOG(DEBUG, "enhanced MPW is supported");
                        mps = MLX5_MPW_ENHANCED;
                } else {
-                       DEBUG("MPW is supported");
+                       DRV_LOG(DEBUG, "MPW is supported");
                        mps = MLX5_MPW;
                }
        } else {
-               DEBUG("MPW isn't supported");
+               DRV_LOG(DEBUG, "MPW isn't supported");
                mps = MLX5_MPW_DISABLED;
        }
        if (RTE_CACHE_LINE_SIZE == 128 &&
@@ -655,10 +779,13 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                cqe_comp = 0;
        else
                cqe_comp = 1;
-       if (ibv_query_device_ex(attr_ctx, NULL, &device_attr))
+       err = ibv_query_device_ex(attr_ctx, NULL, &device_attr);
+       if (err) {
+               DEBUG("ibv_query_device_ex() failed");
                goto error;
-       INFO("%u port(s) detected", device_attr.orig_attr.phys_port_cnt);
-
+       }
+       DRV_LOG(INFO, "%u port(s) detected",
+               device_attr.orig_attr.phys_port_cnt);
        for (i = 0; i < device_attr.orig_attr.phys_port_cnt; i++) {
                char name[RTE_ETH_NAME_MAX_LEN];
                uint32_t port = i + 1; /* ports are indexed from one */
@@ -667,11 +794,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                struct ibv_port_attr port_attr;
                struct ibv_pd *pd = NULL;
                struct priv *priv = NULL;
-               struct rte_eth_dev *eth_dev;
+               struct rte_eth_dev *eth_dev = NULL;
                struct ibv_device_attr_ex device_attr_ex;
                struct ether_addr mac;
-               uint16_t num_vfs = 0;
-               struct ibv_device_attr_ex device_attr;
                struct mlx5_args args = {
                        .cqe_comp = MLX5_ARG_UNSET,
                        .txq_inline = MLX5_ARG_UNSET,
@@ -687,84 +812,85 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                snprintf(name, sizeof(name), PCI_PRI_FMT,
                         pci_dev->addr.domain, pci_dev->addr.bus,
                         pci_dev->addr.devid, pci_dev->addr.function);
-
                mlx5_dev[idx].ports |= test;
-
                if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                        eth_dev = rte_eth_dev_attach_secondary(name);
                        if (eth_dev == NULL) {
-                               ERROR("can not attach rte ethdev");
-                               err = ENOMEM;
+                               DRV_LOG(ERR, "can not attach rte ethdev");
+                               rte_errno = ENOMEM;
+                               err = rte_errno;
                                goto error;
                        }
                        eth_dev->device = &pci_dev->device;
                        eth_dev->dev_ops = &mlx5_dev_sec_ops;
-                       priv = eth_dev->data->dev_private;
+                       err = mlx5_uar_init_secondary(eth_dev);
+                       if (err) {
+                               err = rte_errno;
+                               goto error;
+                       }
                        /* Receive command fd from primary process */
-                       err = priv_socket_connect(priv);
+                       err = mlx5_socket_connect(eth_dev);
                        if (err < 0) {
-                               err = -err;
+                               err = rte_errno;
                                goto error;
                        }
                        /* Remap UAR for Tx queues. */
-                       err = priv_tx_uar_remap(priv, err);
-                       if (err < 0) {
-                               err = -err;
+                       err = mlx5_tx_uar_remap(eth_dev, err);
+                       if (err) {
+                               err = rte_errno;
                                goto error;
                        }
-                       priv_dev_select_rx_function(priv, eth_dev);
-                       priv_dev_select_tx_function(priv, eth_dev);
+                       /*
+                        * Ethdev pointer is still required as input since
+                        * the primary device is not accessible from the
+                        * secondary process.
+                        */
+                       eth_dev->rx_pkt_burst =
+                               mlx5_select_rx_function(eth_dev);
+                       eth_dev->tx_pkt_burst =
+                               mlx5_select_tx_function(eth_dev);
                        continue;
                }
-
-               DEBUG("using port %u (%08" PRIx32 ")", port, test);
-
+               DRV_LOG(DEBUG, "using port %u (%08" PRIx32 ")", port, test);
                ctx = ibv_open_device(ibv_dev);
                if (ctx == NULL) {
                        err = ENODEV;
                        goto port_error;
                }
-
-               ibv_query_device_ex(ctx, NULL, &device_attr);
                /* Check port status. */
                err = ibv_query_port(ctx, port, &port_attr);
                if (err) {
-                       ERROR("port query failed: %s", strerror(err));
+                       DRV_LOG(ERR, "port query failed: %s", strerror(err));
                        goto port_error;
                }
-
                if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
-                       ERROR("port %d is not configured in Ethernet mode",
-                             port);
+                       DRV_LOG(ERR,
+                               "port %d is not configured in Ethernet mode",
+                               port);
                        err = EINVAL;
                        goto port_error;
                }
-
                if (port_attr.state != IBV_PORT_ACTIVE)
-                       DEBUG("port %d is not active: \"%s\" (%d)",
-                             port, ibv_port_state_str(port_attr.state),
-                             port_attr.state);
-
+                       DRV_LOG(DEBUG, "port %d is not active: \"%s\" (%d)",
+                               port, ibv_port_state_str(port_attr.state),
+                               port_attr.state);
                /* Allocate protection domain. */
                pd = ibv_alloc_pd(ctx);
                if (pd == NULL) {
-                       ERROR("PD allocation failure");
+                       DRV_LOG(ERR, "PD allocation failure");
                        err = ENOMEM;
                        goto port_error;
                }
-
                mlx5_dev[idx].ports |= test;
-
                /* from rte_ethdev.c */
                priv = rte_zmalloc("ethdev private structure",
                                   sizeof(*priv),
                                   RTE_CACHE_LINE_SIZE);
                if (priv == NULL) {
-                       ERROR("priv allocation failure");
+                       DRV_LOG(ERR, "priv allocation failure");
                        err = ENOMEM;
                        goto port_error;
                }
-
                priv->ctx = ctx;
                strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path,
                        sizeof(priv->ibdev_path));
@@ -780,35 +906,37 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                priv->rx_vec_en = 1;
                err = mlx5_args(&args, pci_dev->device.devargs);
                if (err) {
-                       ERROR("failed to process device arguments: %s",
-                             strerror(err));
+                       DRV_LOG(ERR, "failed to process device arguments: %s",
+                               strerror(err));
+                       err = rte_errno;
                        goto port_error;
                }
                mlx5_args_assign(priv, &args);
-               if (ibv_query_device_ex(ctx, NULL, &device_attr_ex)) {
-                       ERROR("ibv_query_device_ex() failed");
+               err = ibv_query_device_ex(ctx, NULL, &device_attr_ex);
+               if (err) {
+                       DRV_LOG(ERR, "ibv_query_device_ex() failed");
                        goto port_error;
                }
-
                priv->hw_csum =
                        !!(device_attr_ex.device_cap_flags_ex &
                           IBV_DEVICE_RAW_IP_CSUM);
-               DEBUG("checksum offloading is %ssupported",
-                     (priv->hw_csum ? "" : "not "));
+               DRV_LOG(DEBUG, "checksum offloading is %ssupported",
+                       (priv->hw_csum ? "" : "not "));
 
 #ifdef HAVE_IBV_DEVICE_VXLAN_SUPPORT
                priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
                                         IBV_DEVICE_VXLAN_SUPPORT);
 #endif
-               DEBUG("Rx L2 tunnel checksum offloads are %ssupported",
-                     (priv->hw_csum_l2tun ? "" : "not "));
+               DRV_LOG(DEBUG, "Rx L2 tunnel checksum offloads are %ssupported",
+                       (priv->hw_csum_l2tun ? "" : "not "));
 
 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
                priv->counter_set_supported = !!(device_attr.max_counter_sets);
                ibv_describe_counter_set(ctx, 0, &cs_desc);
-               DEBUG("counter type = %d, num of cs = %ld, attributes = %d",
-                     cs_desc.counter_type, cs_desc.num_of_cs,
-                     cs_desc.attributes);
+               DRV_LOG(DEBUG,
+                       "counter type = %d, num of cs = %ld, attributes = %d",
+                       cs_desc.counter_type, cs_desc.num_of_cs,
+                       cs_desc.attributes);
 #endif
                priv->ind_table_max_size =
                        device_attr_ex.rss_caps.max_rwq_indirection_table_size;
@@ -817,27 +945,24 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                if (priv->ind_table_max_size >
                                (unsigned int)ETH_RSS_RETA_SIZE_512)
                        priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512;
-               DEBUG("maximum RX indirection table size is %u",
-                     priv->ind_table_max_size);
+               DRV_LOG(DEBUG, "maximum Rx indirection table size is %u",
+                       priv->ind_table_max_size);
                priv->hw_vlan_strip = !!(device_attr_ex.raw_packet_caps &
                                         IBV_RAW_PACKET_CAP_CVLAN_STRIPPING);
-               DEBUG("VLAN stripping is %ssupported",
-                     (priv->hw_vlan_strip ? "" : "not "));
+               DRV_LOG(DEBUG, "VLAN stripping is %ssupported",
+                       (priv->hw_vlan_strip ? "" : "not "));
 
-               priv->hw_fcs_strip =
-                               !!(device_attr_ex.orig_attr.device_cap_flags &
-                               IBV_WQ_FLAGS_SCATTER_FCS);
-               DEBUG("FCS stripping configuration is %ssupported",
-                     (priv->hw_fcs_strip ? "" : "not "));
+               priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps &
+                                       IBV_RAW_PACKET_CAP_SCATTER_FCS);
+               DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported",
+                       (priv->hw_fcs_strip ? "" : "not "));
 
 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
                priv->hw_padding = !!device_attr_ex.rx_pad_end_addr_align;
 #endif
-               DEBUG("hardware RX end alignment padding is %ssupported",
-                     (priv->hw_padding ? "" : "not "));
-
-               priv_get_num_vfs(priv, &num_vfs);
-               priv->sriov = (num_vfs || sriov);
+               DRV_LOG(DEBUG,
+                       "hardware Rx end alignment padding is %ssupported",
+                       (priv->hw_padding ? "" : "not "));
                priv->tso = ((priv->tso) &&
                            (device_attr_ex.tso_caps.max_tso > 0) &&
                            (device_attr_ex.tso_caps.supported_qpts &
@@ -846,18 +971,21 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                        priv->max_tso_payload_sz =
                                device_attr_ex.tso_caps.max_tso;
                if (priv->mps && !mps) {
-                       ERROR("multi-packet send not supported on this device"
-                             " (" MLX5_TXQ_MPW_EN ")");
+                       DRV_LOG(ERR,
+                               "multi-packet send not supported on this device"
+                               " (" MLX5_TXQ_MPW_EN ")");
                        err = ENOTSUP;
                        goto port_error;
                } else if (priv->mps && priv->tso) {
-                       WARN("multi-packet send not supported in conjunction "
-                             "with TSO. MPS disabled");
+                       DRV_LOG(WARNING,
+                               "multi-packet send not supported in conjunction"
+                               " with TSO. MPS disabled");
                        priv->mps = 0;
                }
-               INFO("%sMPS is %s",
-                    priv->mps == MLX5_MPW_ENHANCED ? "Enhanced " : "",
-                    priv->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled");
+               DRV_LOG(INFO, "%s MPS is %s",
+                       priv->mps == MLX5_MPW_ENHANCED ? "enhanced " : "",
+                       priv->mps != MLX5_MPW_DISABLED ? "enabled" :
+                                                        "disabled");
                /* Set default values for Enhanced MPW, a.k.a MPWv2. */
                if (priv->mps == MLX5_MPW_ENHANCED) {
                        if (args.txqs_inline == MLX5_ARG_UNSET)
@@ -870,59 +998,71 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                                                   MLX5_WQE_SIZE;
                }
                if (priv->cqe_comp && !cqe_comp) {
-                       WARN("Rx CQE compression isn't supported");
+                       DRV_LOG(WARNING, "Rx CQE compression isn't supported");
                        priv->cqe_comp = 0;
                }
+               eth_dev = rte_eth_dev_allocate(name);
+               if (eth_dev == NULL) {
+                       DRV_LOG(ERR, "can not allocate rte ethdev");
+                       err = ENOMEM;
+                       goto port_error;
+               }
+               eth_dev->data->dev_private = priv;
+               priv->dev_data = eth_dev->data;
+               eth_dev->data->mac_addrs = priv->mac;
+               eth_dev->device = &pci_dev->device;
+               rte_eth_copy_pci_info(eth_dev, pci_dev);
+               eth_dev->device->driver = &mlx5_driver.driver;
+               err = mlx5_uar_init_primary(eth_dev);
+               if (err) {
+                       err = rte_errno;
+                       goto port_error;
+               }
                /* Configure the first MAC address by default. */
-               if (priv_get_mac(priv, &mac.addr_bytes)) {
-                       ERROR("cannot get MAC address, is mlx5_en loaded?"
-                             " (errno: %s)", strerror(errno));
+               if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) {
+                       DRV_LOG(ERR,
+                               "port %u cannot get MAC address, is mlx5_en"
+                               " loaded? (errno: %s)",
+                               eth_dev->data->port_id, strerror(errno));
                        err = ENODEV;
                        goto port_error;
                }
-               INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
-                    priv->port,
-                    mac.addr_bytes[0], mac.addr_bytes[1],
-                    mac.addr_bytes[2], mac.addr_bytes[3],
-                    mac.addr_bytes[4], mac.addr_bytes[5]);
+               DRV_LOG(INFO,
+                       "port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+                       eth_dev->data->port_id,
+                       mac.addr_bytes[0], mac.addr_bytes[1],
+                       mac.addr_bytes[2], mac.addr_bytes[3],
+                       mac.addr_bytes[4], mac.addr_bytes[5]);
 #ifndef NDEBUG
                {
                        char ifname[IF_NAMESIZE];
 
-                       if (priv_get_ifname(priv, &ifname) == 0)
-                               DEBUG("port %u ifname is \"%s\"",
-                                     priv->port, ifname);
+                       if (mlx5_get_ifname(eth_dev, &ifname) == 0)
+                               DRV_LOG(DEBUG, "port %u ifname is \"%s\"",
+                                       eth_dev->data->port_id, ifname);
                        else
-                               DEBUG("port %u ifname is unknown", priv->port);
+                               DRV_LOG(DEBUG, "port %u ifname is unknown",
+                                       eth_dev->data->port_id);
                }
 #endif
                /* Get actual MTU if possible. */
-               priv_get_mtu(priv, &priv->mtu);
-               DEBUG("port %u MTU is %u", priv->port, priv->mtu);
-
-               eth_dev = rte_eth_dev_allocate(name);
-               if (eth_dev == NULL) {
-                       ERROR("can not allocate rte ethdev");
-                       err = ENOMEM;
+               err = mlx5_get_mtu(eth_dev, &priv->mtu);
+               if (err) {
+                       err = rte_errno;
                        goto port_error;
                }
-               eth_dev->data->dev_private = priv;
-               eth_dev->data->mac_addrs = priv->mac;
-               eth_dev->device = &pci_dev->device;
-               rte_eth_copy_pci_info(eth_dev, pci_dev);
-               eth_dev->device->driver = &mlx5_driver.driver;
+               DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id,
+                       priv->mtu);
                /*
                 * Initialize burst functions to prevent crashes before link-up.
                 */
                eth_dev->rx_pkt_burst = removed_rx_burst;
                eth_dev->tx_pkt_burst = removed_tx_burst;
-               priv->dev = eth_dev;
                eth_dev->dev_ops = &mlx5_dev_ops;
                /* Register MAC address. */
                claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
                TAILQ_INIT(&priv->flows);
                TAILQ_INIT(&priv->ctrl_flows);
-
                /* Hint libmlx5 to use PMD allocator for data plane resources */
                struct mlx5dv_ctx_allocators alctr = {
                        .alloc = &mlx5_alloc_verbs_buf,
@@ -931,12 +1071,17 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                };
                mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
                                        (void *)((uintptr_t)&alctr));
-
                /* Bring Ethernet device up. */
-               DEBUG("forcing Ethernet interface up");
-               priv_set_flags(priv, ~IFF_UP, IFF_UP);
+               DRV_LOG(DEBUG, "port %u forcing Ethernet interface up",
+                       eth_dev->data->port_id);
+               mlx5_set_link_up(eth_dev);
+               /*
+                * Even though the interrupt handler is not installed yet,
+                * interrupts will still trigger on the asyn_fd from
+                * Verbs context returned by ibv_open_device().
+                */
+               mlx5_link_update(eth_dev, 0);
                continue;
-
 port_error:
                if (priv)
                        rte_free(priv);
@@ -944,29 +1089,31 @@ port_error:
                        claim_zero(ibv_dealloc_pd(pd));
                if (ctx)
                        claim_zero(ibv_close_device(ctx));
+               if (eth_dev && rte_eal_process_type() == RTE_PROC_PRIMARY)
+                       rte_eth_dev_release_port(eth_dev);
                break;
        }
-
        /*
         * XXX if something went wrong in the loop above, there is a resource
         * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as
         * long as the dpdk does not provide a way to deallocate a ethdev and a
         * way to enumerate the registered ethdevs to free the previous ones.
         */
-
        /* no port found, complain */
        if (!mlx5_dev[idx].ports) {
-               err = ENODEV;
-               goto error;
+               rte_errno = ENODEV;
+               err = rte_errno;
        }
-
 error:
        if (attr_ctx)
                claim_zero(ibv_close_device(attr_ctx));
        if (list)
                ibv_free_device_list(list);
-       assert(err >= 0);
-       return -err;
+       if (err) {
+               rte_errno = err;
+               return -rte_errno;
+       }
+       return 0;
 }
 
 static const struct rte_pci_id mlx5_pci_id_map[] = {
@@ -1042,3 +1189,11 @@ rte_mlx5_pmd_init(void)
 RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__);
 RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib");
+
+/** Initialize driver log type. */
+RTE_INIT(vdev_netvsc_init_log)
+{
+       mlx5_logtype = rte_log_register("pmd.net.mlx5");
+       if (mlx5_logtype >= 0)
+               rte_log_set_level(mlx5_logtype, RTE_LOG_NOTICE);
+}
index d49595b..5e6027b 100644 (file)
@@ -90,8 +90,26 @@ struct mlx5_xstats_ctrl {
 /* Flow list . */
 TAILQ_HEAD(mlx5_flows, rte_flow);
 
+/**
+ * Type of objet being allocated.
+ */
+enum mlx5_verbs_alloc_type {
+       MLX5_VERBS_ALLOC_TYPE_NONE,
+       MLX5_VERBS_ALLOC_TYPE_TX_QUEUE,
+       MLX5_VERBS_ALLOC_TYPE_RX_QUEUE,
+};
+
+/**
+ * Verbs allocator needs a context to know in the callback which kind of
+ * resources it is allocating.
+ */
+struct mlx5_verbs_alloc_ctx {
+       enum mlx5_verbs_alloc_type type; /* Kind of object being allocated. */
+       const void *obj; /* Pointer to the DPDK object. */
+};
+
 struct priv {
-       struct rte_eth_dev *dev; /* Ethernet device of master process. */
+       struct rte_eth_dev_data *dev_data;  /* Pointer to device data. */
        struct ibv_context *ctx; /* Verbs context. */
        struct ibv_device_attr_ex device_attr; /* Device properties. */
        struct ibv_pd *pd; /* Protection Domain. */
@@ -107,11 +125,9 @@ struct priv {
        unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */
        unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */
        unsigned int hw_padding:1; /* End alignment padding is supported. */
-       unsigned int sriov:1; /* This is a VF or PF with VF devices. */
        unsigned int mps:2; /* Multi-packet send mode (0: disabled). */
        unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
        unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */
-       unsigned int pending_alarm:1; /* An alarm is pending. */
        unsigned int tso:1; /* Whether TSO is supported. */
        unsigned int tunnel_en:1;
        unsigned int isolated:1; /* Whether isolated mode is enabled. */
@@ -146,51 +162,16 @@ struct priv {
        LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
        uint32_t link_speed_capa; /* Link speed capabilities. */
        struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
-       rte_spinlock_t lock; /* Lock for control functions. */
+       rte_spinlock_t mr_lock; /* MR Lock. */
        int primary_socket; /* Unix socket for primary process. */
+       void *uar_base; /* Reserved address space for UAR mapping */
        struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
+       struct mlx5_verbs_alloc_ctx verbs_alloc_ctx;
+       /* Context for Verbs allocator. */
 };
 
-/**
- * Lock private structure to protect it from concurrent access in the
- * control path.
- *
- * @param priv
- *   Pointer to private structure.
- */
-static inline void
-priv_lock(struct priv *priv)
-{
-       rte_spinlock_lock(&priv->lock);
-}
-
-/**
- * Try to lock private structure to protect it from concurrent access in the
- * control path.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   1 if the lock is successfully taken; 0 otherwise.
- */
-static inline int
-priv_trylock(struct priv *priv)
-{
-       return rte_spinlock_trylock(&priv->lock);
-}
-
-/**
- * Unlock private structure.
- *
- * @param priv
- *   Pointer to private structure.
- */
-static inline void
-priv_unlock(struct priv *priv)
-{
-       rte_spinlock_unlock(&priv->lock);
-}
+#define PORT_ID(priv) ((priv)->dev_data->port_id)
+#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)])
 
 /* mlx5.c */
 
@@ -198,130 +179,138 @@ int mlx5_getenv_int(const char *);
 
 /* mlx5_ethdev.c */
 
-struct priv *mlx5_get_priv(struct rte_eth_dev *dev);
-int mlx5_is_secondary(void);
-int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]);
-int priv_ifreq(const struct priv *, int req, struct ifreq *);
-int priv_is_ib_cntr(const char *);
-int priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *);
-int priv_get_num_vfs(struct priv *, uint16_t *);
-int priv_get_mtu(struct priv *, uint16_t *);
-int priv_set_flags(struct priv *, unsigned int, unsigned int);
-int mlx5_dev_configure(struct rte_eth_dev *);
-void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *);
+int mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]);
+int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr);
+int mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu);
+int mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep,
+                  unsigned int flags);
+int mlx5_dev_configure(struct rte_eth_dev *dev);
+void mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info);
 const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev);
-int priv_link_update(struct priv *, int);
-int priv_force_link_status_change(struct priv *, int);
-int mlx5_link_update(struct rte_eth_dev *, int);
-int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t);
-int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *);
-int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *);
-int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
-                               struct rte_pci_addr *);
-void mlx5_dev_link_status_handler(void *);
-void mlx5_dev_interrupt_handler(void *);
-void priv_dev_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *);
-void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
+int mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete);
+int mlx5_force_link_status_change(struct rte_eth_dev *dev, int status);
+int mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu);
+int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev,
+                          struct rte_eth_fc_conf *fc_conf);
+int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev,
+                          struct rte_eth_fc_conf *fc_conf);
+int mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
+                               struct rte_pci_addr *pci_addr);
+void mlx5_dev_link_status_handler(void *arg);
+void mlx5_dev_interrupt_handler(void *arg);
+void mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev);
+void mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev);
 int mlx5_set_link_down(struct rte_eth_dev *dev);
 int mlx5_set_link_up(struct rte_eth_dev *dev);
-void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev);
-void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
+eth_tx_burst_t mlx5_select_tx_function(struct rte_eth_dev *dev);
+eth_rx_burst_t mlx5_select_rx_function(struct rte_eth_dev *dev);
 
 /* mlx5_mac.c */
 
-int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]);
-void mlx5_mac_addr_remove(struct rte_eth_dev *, uint32_t);
-int mlx5_mac_addr_add(struct rte_eth_dev *, struct ether_addr *, uint32_t,
-                     uint32_t);
-void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
+int mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[ETHER_ADDR_LEN]);
+void mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
+int mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
+                     uint32_t index, uint32_t vmdq);
+void mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr);
 
 /* mlx5_rss.c */
 
-int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
-int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
-int priv_rss_reta_index_resize(struct priv *, unsigned int);
-int mlx5_dev_rss_reta_query(struct rte_eth_dev *,
-                           struct rte_eth_rss_reta_entry64 *, uint16_t);
-int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
-                            struct rte_eth_rss_reta_entry64 *, uint16_t);
+int mlx5_rss_hash_update(struct rte_eth_dev *dev,
+                        struct rte_eth_rss_conf *rss_conf);
+int mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
+                          struct rte_eth_rss_conf *rss_conf);
+int mlx5_rss_reta_index_resize(struct rte_eth_dev *dev, unsigned int reta_size);
+int mlx5_dev_rss_reta_query(struct rte_eth_dev *dev,
+                           struct rte_eth_rss_reta_entry64 *reta_conf,
+                           uint16_t reta_size);
+int mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
+                            struct rte_eth_rss_reta_entry64 *reta_conf,
+                            uint16_t reta_size);
 
 /* mlx5_rxmode.c */
 
-void mlx5_promiscuous_enable(struct rte_eth_dev *);
-void mlx5_promiscuous_disable(struct rte_eth_dev *);
-void mlx5_allmulticast_enable(struct rte_eth_dev *);
-void mlx5_allmulticast_disable(struct rte_eth_dev *);
+void mlx5_promiscuous_enable(struct rte_eth_dev *dev);
+void mlx5_promiscuous_disable(struct rte_eth_dev *dev);
+void mlx5_allmulticast_enable(struct rte_eth_dev *dev);
+void mlx5_allmulticast_disable(struct rte_eth_dev *dev);
 
 /* mlx5_stats.c */
 
-void priv_xstats_init(struct priv *);
-int mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
-void mlx5_stats_reset(struct rte_eth_dev *);
-int mlx5_xstats_get(struct rte_eth_dev *,
-                   struct rte_eth_xstat *, unsigned int);
-void mlx5_xstats_reset(struct rte_eth_dev *);
-int mlx5_xstats_get_names(struct rte_eth_dev *,
-                         struct rte_eth_xstat_name *, unsigned int);
+void mlx5_xstats_init(struct rte_eth_dev *dev);
+int mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
+void mlx5_stats_reset(struct rte_eth_dev *dev);
+int mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats,
+                   unsigned int n);
+void mlx5_xstats_reset(struct rte_eth_dev *dev);
+int mlx5_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
+                         struct rte_eth_xstat_name *xstats_names,
+                         unsigned int n);
 
 /* mlx5_vlan.c */
 
-int mlx5_vlan_filter_set(struct rte_eth_dev *, uint16_t, int);
-int mlx5_vlan_offload_set(struct rte_eth_dev *, int);
-void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
+int mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on);
+void mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on);
+int mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask);
 
 /* mlx5_trigger.c */
 
-int mlx5_dev_start(struct rte_eth_dev *);
-void mlx5_dev_stop(struct rte_eth_dev *);
-int priv_dev_traffic_enable(struct priv *, struct rte_eth_dev *);
-int priv_dev_traffic_disable(struct priv *, struct rte_eth_dev *);
-int priv_dev_traffic_restart(struct priv *, struct rte_eth_dev *);
-int mlx5_traffic_restart(struct rte_eth_dev *);
+int mlx5_dev_start(struct rte_eth_dev *dev);
+void mlx5_dev_stop(struct rte_eth_dev *dev);
+int mlx5_traffic_enable(struct rte_eth_dev *dev);
+void mlx5_traffic_disable(struct rte_eth_dev *dev);
+int mlx5_traffic_restart(struct rte_eth_dev *dev);
 
 /* mlx5_flow.c */
 
-int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
-                        enum rte_filter_op, void *);
-int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
-                      const struct rte_flow_item [],
-                      const struct rte_flow_action [],
-                      struct rte_flow_error *);
-struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
-                                 const struct rte_flow_attr *,
-                                 const struct rte_flow_item [],
-                                 const struct rte_flow_action [],
-                                 struct rte_flow_error *);
-int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
-                     struct rte_flow_error *);
-void priv_flow_flush(struct priv *, struct mlx5_flows *);
-int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
-int mlx5_flow_query(struct rte_eth_dev *, struct rte_flow *,
-                   enum rte_flow_action_type, void *,
-                   struct rte_flow_error *);
-int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
-int priv_flow_start(struct priv *, struct mlx5_flows *);
-void priv_flow_stop(struct priv *, struct mlx5_flows *);
-int priv_flow_verify(struct priv *);
-int mlx5_ctrl_flow_vlan(struct rte_eth_dev *, struct rte_flow_item_eth *,
-                       struct rte_flow_item_eth *, struct rte_flow_item_vlan *,
-                       struct rte_flow_item_vlan *);
-int mlx5_ctrl_flow(struct rte_eth_dev *, struct rte_flow_item_eth *,
-                  struct rte_flow_item_eth *);
-int priv_flow_create_drop_queue(struct priv *);
-void priv_flow_delete_drop_queue(struct priv *);
+int mlx5_flow_validate(struct rte_eth_dev *dev,
+                      const struct rte_flow_attr *attr,
+                      const struct rte_flow_item items[],
+                      const struct rte_flow_action actions[],
+                      struct rte_flow_error *error);
+struct rte_flow *mlx5_flow_create(struct rte_eth_dev *dev,
+                                 const struct rte_flow_attr *attr,
+                                 const struct rte_flow_item items[],
+                                 const struct rte_flow_action actions[],
+                                 struct rte_flow_error *error);
+int mlx5_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow,
+                     struct rte_flow_error *error);
+void mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list);
+int mlx5_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error);
+int mlx5_flow_query(struct rte_eth_dev *dev, struct rte_flow *flow,
+                   enum rte_flow_action_type action, void *data,
+                   struct rte_flow_error *error);
+int mlx5_flow_isolate(struct rte_eth_dev *dev, int enable,
+                     struct rte_flow_error *error);
+int mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+                        enum rte_filter_type filter_type,
+                        enum rte_filter_op filter_op,
+                        void *arg);
+int mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list);
+void mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list);
+int mlx5_flow_verify(struct rte_eth_dev *dev);
+int mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
+                       struct rte_flow_item_eth *eth_spec,
+                       struct rte_flow_item_eth *eth_mask,
+                       struct rte_flow_item_vlan *vlan_spec,
+                       struct rte_flow_item_vlan *vlan_mask);
+int mlx5_ctrl_flow(struct rte_eth_dev *dev,
+                  struct rte_flow_item_eth *eth_spec,
+                  struct rte_flow_item_eth *eth_mask);
+int mlx5_flow_create_drop_queue(struct rte_eth_dev *dev);
+void mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev);
 
 /* mlx5_socket.c */
 
-int priv_socket_init(struct priv *priv);
-int priv_socket_uninit(struct priv *priv);
-void priv_socket_handle(struct priv *priv);
-int priv_socket_connect(struct priv *priv);
+int mlx5_socket_init(struct rte_eth_dev *priv);
+void mlx5_socket_uninit(struct rte_eth_dev *priv);
+void mlx5_socket_handle(struct rte_eth_dev *priv);
+int mlx5_socket_connect(struct rte_eth_dev *priv);
 
 /* mlx5_mr.c */
 
-struct mlx5_mr *priv_mr_new(struct priv *, struct rte_mempool *);
-struct mlx5_mr *priv_mr_get(struct priv *, struct rte_mempool *);
-int priv_mr_release(struct priv *, struct mlx5_mr *);
-int priv_mr_verify(struct priv *);
+struct mlx5_mr *mlx5_mr_new(struct rte_eth_dev *dev, struct rte_mempool *mp);
+struct mlx5_mr *mlx5_mr_get(struct rte_eth_dev *dev, struct rte_mempool *mp);
+int mlx5_mr_release(struct mlx5_mr *mr);
+int mlx5_mr_verify(struct rte_eth_dev *dev);
 
 #endif /* RTE_PMD_MLX5_H_ */
index 24caf7e..d706357 100644 (file)
 /* Supported RSS */
 #define MLX5_RSS_HF_MASK (~(ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP))
 
-/* Maximum number of attempts to query link status before giving up. */
-#define MLX5_MAX_LINK_QUERY_ATTEMPTS 5
+/* Timeout in seconds to get a valid link status. */
+#define MLX5_LINK_STATUS_TIMEOUT 10
+
+/* Reserved address space for UAR mapping. */
+#define MLX5_UAR_SIZE (1ULL << 32)
+
+/* Offset of reserved UAR address space to hugepage memory. Offset is used here
+ * to minimize possibility of address next to hugepage being used by other code
+ * in either primary or secondary process, failing to map TX UAR would make TX
+ * packets invisible to HW.
+ */
+#define MLX5_UAR_OFFSET (1ULL << 32)
 
 #endif /* RTE_PMD_MLX5_DEFS_H_ */
index ffe1cdd..5edc751 100644 (file)
@@ -35,6 +35,7 @@
 
 #include <stddef.h>
 #include <assert.h>
+#include <inttypes.h>
 #include <unistd.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <net/if.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
-#include <sys/utsname.h>
 #include <netinet/in.h>
 #include <linux/ethtool.h>
 #include <linux/sockios.h>
-#include <linux/version.h>
 #include <fcntl.h>
 #include <stdalign.h>
 #include <sys/un.h>
+#include <time.h>
 
 #include <rte_atomic.h>
 #include <rte_ethdev.h>
@@ -60,7 +60,6 @@
 #include <rte_mbuf.h>
 #include <rte_common.h>
 #include <rte_interrupts.h>
-#include <rte_alarm.h>
 #include <rte_malloc.h>
 
 #include "mlx5.h"
@@ -118,35 +117,21 @@ struct ethtool_link_settings {
 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
 #endif
 
-/**
- * Return private structure associated with an Ethernet device.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- *
- * @return
- *   Pointer to private structure.
- */
-struct priv *
-mlx5_get_priv(struct rte_eth_dev *dev)
-{
-       return dev->data->dev_private;
-}
-
 /**
  * Get interface name from private structure.
  *
- * @param[in] priv
- *   Pointer to private structure.
+ * @param[in] dev
+ *   Pointer to Ethernet device.
  * @param[out] ifname
  *   Interface name output buffer.
  *
  * @return
- *   0 on success, -1 on failure and errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
+mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE])
 {
+       struct priv *priv = dev->data->dev_private;
        DIR *dir;
        struct dirent *dent;
        unsigned int dev_type = 0;
@@ -157,8 +142,10 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
                MKSTR(path, "%s/device/net", priv->ibdev_path);
 
                dir = opendir(path);
-               if (dir == NULL)
-                       return -1;
+               if (dir == NULL) {
+                       rte_errno = errno;
+                       return -rte_errno;
+               }
        }
        while ((dent = readdir(dir)) != NULL) {
                char *name = dent->d_name;
@@ -208,355 +195,131 @@ try_dev_id:
                        snprintf(match, sizeof(match), "%s", name);
        }
        closedir(dir);
-       if (match[0] == '\0')
-               return -1;
-       strncpy(*ifname, match, sizeof(*ifname));
-       return 0;
-}
-
-/**
- * Check if the counter is located on ib counters file.
- *
- * @param[in] cntr
- *   Counter name.
- *
- * @return
- *   1 if counter is located on ib counters file , 0 otherwise.
- */
-int
-priv_is_ib_cntr(const char *cntr)
-{
-       if (!strcmp(cntr, "out_of_buffer"))
-               return 1;
-       return 0;
-}
-
-/**
- * Read from sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[out] buf
- *   Data output buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_sysfs_read(const struct priv *priv, const char *entry,
-               char *buf, size_t size)
-{
-       char ifname[IF_NAMESIZE];
-       FILE *file;
-       int ret;
-       int err;
-
-       if (priv_get_ifname(priv, &ifname))
-               return -1;
-
-       if (priv_is_ib_cntr(entry)) {
-               MKSTR(path, "%s/ports/1/hw_counters/%s",
-                     priv->ibdev_path, entry);
-               file = fopen(path, "rb");
-       } else {
-               MKSTR(path, "%s/device/net/%s/%s",
-                     priv->ibdev_path, ifname, entry);
-               file = fopen(path, "rb");
-       }
-       if (file == NULL)
-               return -1;
-       ret = fread(buf, 1, size, file);
-       err = errno;
-       if (((size_t)ret < size) && (ferror(file)))
-               ret = -1;
-       else
-               ret = size;
-       fclose(file);
-       errno = err;
-       return ret;
-}
-
-/**
- * Write to sysfs entry.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[in] entry
- *   Entry name relative to sysfs path.
- * @param[in] buf
- *   Data buffer.
- * @param size
- *   Buffer size.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_sysfs_write(const struct priv *priv, const char *entry,
-                char *buf, size_t size)
-{
-       char ifname[IF_NAMESIZE];
-       FILE *file;
-       int ret;
-       int err;
-
-       if (priv_get_ifname(priv, &ifname))
-               return -1;
-
-       MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
-
-       file = fopen(path, "wb");
-       if (file == NULL)
-               return -1;
-       ret = fwrite(buf, 1, size, file);
-       err = errno;
-       if (((size_t)ret < size) || (ferror(file)))
-               ret = -1;
-       else
-               ret = size;
-       fclose(file);
-       errno = err;
-       return ret;
-}
-
-/**
- * Get unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param[out] value
- *   Value output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
-{
-       int ret;
-       unsigned long value_ret;
-       char value_str[32];
-
-       ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
-       if (ret == -1) {
-               DEBUG("cannot read %s value from sysfs: %s",
-                     name, strerror(errno));
-               return -1;
-       }
-       value_str[ret] = '\0';
-       errno = 0;
-       value_ret = strtoul(value_str, NULL, 0);
-       if (errno) {
-               DEBUG("invalid %s value `%s': %s", name, value_str,
-                     strerror(errno));
-               return -1;
-       }
-       *value = value_ret;
-       return 0;
-}
-
-/**
- * Set unsigned long sysfs property.
- *
- * @param priv
- *   Pointer to private structure.
- * @param[in] name
- *   Entry name relative to sysfs path.
- * @param value
- *   Value to set.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-static int
-priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
-{
-       int ret;
-       MKSTR(value_str, "%lu", value);
-
-       ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
-       if (ret == -1) {
-               DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
-                     name, value_str, value, strerror(errno));
-               return -1;
+       if (match[0] == '\0') {
+               rte_errno = ENOENT;
+               return -rte_errno;
        }
+       strncpy(*ifname, match, sizeof(*ifname));
        return 0;
 }
 
 /**
  * Perform ifreq ioctl() on associated Ethernet device.
  *
- * @param[in] priv
- *   Pointer to private structure.
+ * @param[in] dev
+ *   Pointer to Ethernet device.
  * @param req
  *   Request number to pass to ioctl().
  * @param[out] ifr
  *   Interface request structure output buffer.
  *
  * @return
- *   0 on success, -1 on failure and errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr)
+mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
 {
        int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
-       int ret = -1;
+       int ret = 0;
 
-       if (sock == -1)
-               return ret;
-       if (priv_get_ifname(priv, &ifr->ifr_name) == 0)
-               ret = ioctl(sock, req, ifr);
+       if (sock == -1) {
+               rte_errno = errno;
+               return -rte_errno;
+       }
+       ret = mlx5_get_ifname(dev, &ifr->ifr_name);
+       if (ret)
+               goto error;
+       ret = ioctl(sock, req, ifr);
+       if (ret == -1) {
+               rte_errno = errno;
+               goto error;
+       }
        close(sock);
-       return ret;
-}
-
-/**
- * Return the number of active VFs for the current device.
- *
- * @param[in] priv
- *   Pointer to private structure.
- * @param[out] num_vfs
- *   Number of active VFs.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
- */
-int
-priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs)
-{
-       /* The sysfs entry name depends on the operating system. */
-       const char **name = (const char *[]){
-               "device/sriov_numvfs",
-               "device/mlx5_num_vfs",
-               NULL,
-       };
-       int ret;
-
-       do {
-               unsigned long ulong_num_vfs;
-
-               ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs);
-               if (!ret)
-                       *num_vfs = ulong_num_vfs;
-       } while (*(++name) && ret);
-       return ret;
+       return 0;
+error:
+       close(sock);
+       return -rte_errno;
 }
 
 /**
  * Get device MTU.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[out] mtu
  *   MTU value output buffer.
  *
  * @return
- *   0 on success, -1 on failure and errno is set.
- */
-int
-priv_get_mtu(struct priv *priv, uint16_t *mtu)
-{
-       unsigned long ulong_mtu;
-
-       if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
-               return -1;
-       *mtu = ulong_mtu;
-       return 0;
-}
-
-/**
- * Read device counter from sysfs.
- *
- * @param priv
- *   Pointer to private structure.
- * @param name
- *   Counter name.
- * @param[out] cntr
- *   Counter output buffer.
- *
- * @return
- *   0 on success, -1 on failure and errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr)
+mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
 {
-       unsigned long ulong_ctr;
+       struct ifreq request;
+       int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);
 
-       if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
-               return -1;
-       *cntr = ulong_ctr;
+       if (ret)
+               return ret;
+       *mtu = request.ifr_mtu;
        return 0;
 }
 
 /**
  * Set device MTU.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param mtu
  *   MTU value to set.
  *
  * @return
- *   0 on success, -1 on failure and errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_set_mtu(struct priv *priv, uint16_t mtu)
+mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
 {
-       uint16_t new_mtu;
+       struct ifreq request = { .ifr_mtu = mtu, };
 
-       if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
-           priv_get_mtu(priv, &new_mtu))
-               return -1;
-       if (new_mtu == mtu)
-               return 0;
-       errno = EINVAL;
-       return -1;
+       return mlx5_ifreq(dev, SIOCSIFMTU, &request);
 }
 
 /**
  * Set device flags.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param keep
  *   Bitmask for flags that must remain untouched.
  * @param flags
  *   Bitmask for flags to modify.
  *
  * @return
- *   0 on success, -1 on failure and errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
+mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)
 {
-       unsigned long tmp;
+       struct ifreq request;
+       int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);
 
-       if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
-               return -1;
-       tmp &= keep;
-       tmp |= (flags & (~keep));
-       return priv_set_sysfs_ulong(priv, "flags", tmp);
+       if (ret)
+               return ret;
+       request.ifr_flags &= keep;
+       request.ifr_flags |= flags & ~keep;
+       return mlx5_ifreq(dev, SIOCSIFFLAGS, &request);
 }
 
 /**
- * Ethernet device configuration.
- *
- * Prepare the driver for a given number of TX and RX queues.
+ * DPDK callback for Ethernet device configuration.
  *
  * @param dev
  *   Pointer to Ethernet device structure.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-dev_configure(struct rte_eth_dev *dev)
+int
+mlx5_dev_configure(struct rte_eth_dev *dev)
 {
        struct priv *priv = dev->data->dev_private;
        unsigned int rxqs_n = dev->data->nb_rx_queues;
@@ -566,19 +329,24 @@ dev_configure(struct rte_eth_dev *dev)
        unsigned int reta_idx_n;
        const uint8_t use_app_rss_key =
                !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
+       int ret = 0;
 
        if (use_app_rss_key &&
            (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len !=
             rss_hash_default_key_len)) {
-               /* MLX5 RSS only support 40bytes key. */
-               return EINVAL;
+               DRV_LOG(ERR, "port %u RSS key len must be %zu Bytes long",
+                       dev->data->port_id, rss_hash_default_key_len);
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
        priv->rss_conf.rss_key =
                rte_realloc(priv->rss_conf.rss_key,
                            rss_hash_default_key_len, 0);
        if (!priv->rss_conf.rss_key) {
-               ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n);
-               return ENOMEM;
+               DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)",
+                       dev->data->port_id, rxqs_n);
+               rte_errno = ENOMEM;
+               return -rte_errno;
        }
        memcpy(priv->rss_conf.rss_key,
               use_app_rss_key ?
@@ -590,18 +358,20 @@ dev_configure(struct rte_eth_dev *dev)
        priv->rxqs = (void *)dev->data->rx_queues;
        priv->txqs = (void *)dev->data->tx_queues;
        if (txqs_n != priv->txqs_n) {
-               INFO("%p: TX queues number update: %u -> %u",
-                    (void *)dev, priv->txqs_n, txqs_n);
+               DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u",
+                       dev->data->port_id, priv->txqs_n, txqs_n);
                priv->txqs_n = txqs_n;
        }
        if (rxqs_n > priv->ind_table_max_size) {
-               ERROR("cannot handle this many RX queues (%u)", rxqs_n);
-               return EINVAL;
+               DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)",
+                       dev->data->port_id, rxqs_n);
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
        if (rxqs_n == priv->rxqs_n)
                return 0;
-       INFO("%p: RX queues number update: %u -> %u",
-            (void *)dev, priv->rxqs_n, rxqs_n);
+       DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u",
+               dev->data->port_id, priv->rxqs_n, rxqs_n);
        priv->rxqs_n = rxqs_n;
        /* If the requested number of RX queues is not a power of two, use the
         * maximum indirection table size for better balancing.
@@ -609,8 +379,9 @@ dev_configure(struct rte_eth_dev *dev)
        reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ?
                                     priv->ind_table_max_size :
                                     rxqs_n));
-       if (priv_rss_reta_index_resize(priv, reta_idx_n))
-               return ENOMEM;
+       ret = mlx5_rss_reta_index_resize(dev, reta_idx_n);
+       if (ret)
+               return ret;
        /* When the number of RX queues is not a power of two, the remaining
         * table entries are padded with reused WQs and hashes are not spread
         * uniformly. */
@@ -622,28 +393,6 @@ dev_configure(struct rte_eth_dev *dev)
        return 0;
 }
 
-/**
- * DPDK callback for Ethernet device configuration.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_configure(struct rte_eth_dev *dev)
-{
-       struct priv *priv = dev->data->dev_private;
-       int ret;
-
-       priv_lock(priv);
-       ret = dev_configure(dev);
-       assert(ret >= 0);
-       priv_unlock(priv);
-       return -ret;
-}
-
 /**
  * DPDK callback to get information about the device.
  *
@@ -655,13 +404,11 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
 void
 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 {
-       struct priv *priv = mlx5_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        unsigned int max;
        char ifname[IF_NAMESIZE];
 
        info->pci_dev = RTE_ETH_DEV_TO_PCI(dev);
-
-       priv_lock(priv);
        /* FIXME: we should ask the device for these values. */
        info->min_rx_bufsize = 32;
        info->max_rx_pktlen = 65536;
@@ -699,16 +446,24 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
                info->tx_offload_capa |= (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
                                          DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
                                          DEV_TX_OFFLOAD_GRE_TNL_TSO);
-       if (priv_get_ifname(priv, &ifname) == 0)
+       if (mlx5_get_ifname(dev, &ifname) == 0)
                info->if_index = if_nametoindex(ifname);
        info->reta_size = priv->reta_idx_n ?
                priv->reta_idx_n : priv->ind_table_max_size;
-       info->hash_key_size = priv->rss_conf.rss_key_len;
+       info->hash_key_size = rss_hash_default_key_len;
        info->speed_capa = priv->link_speed_capa;
        info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK;
-       priv_unlock(priv);
 }
 
+/**
+ * Get supported packet types.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   A pointer to the supported Packet types array.
+ */
 const uint32_t *
 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 {
@@ -741,35 +496,41 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
  *
  * @param dev
  *   Pointer to Ethernet device structure.
- * @param wait_to_complete
- *   Wait for request completion (ignored).
+ * @param[out] link
+ *   Storage for current link status.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
+mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
+                              struct rte_eth_link *link)
 {
-       struct priv *priv = mlx5_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        struct ethtool_cmd edata = {
                .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */
        };
        struct ifreq ifr;
        struct rte_eth_link dev_link;
        int link_speed = 0;
+       int ret;
 
-       /* priv_lock() is not taken to allow concurrent calls. */
-
-       (void)wait_to_complete;
-       if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
-               WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
-               return -1;
+       ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
+       if (ret) {
+               DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               return ret;
        }
        memset(&dev_link, 0, sizeof(dev_link));
        dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
                                (ifr.ifr_flags & IFF_RUNNING));
        ifr.ifr_data = (void *)&edata;
-       if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
-               WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
-                    strerror(errno));
-               return -1;
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret) {
+               DRV_LOG(WARNING,
+                       "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               return ret;
        }
        link_speed = ethtool_cmd_speed(&edata);
        if (link_speed == -1)
@@ -793,13 +554,13 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
                                ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
        dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
                        ETH_LINK_SPEED_FIXED);
-       if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
-               /* Link status changed. */
-               dev->data->dev_link = dev_link;
-               return 0;
+       if ((dev_link.link_speed && !dev_link.link_status) ||
+           (!dev_link.link_speed && dev_link.link_status)) {
+               rte_errno = EAGAIN;
+               return -rte_errno;
        }
-       /* Link status is still the same. */
-       return -1;
+       *link = dev_link;
+       return 0;
 }
 
 /**
@@ -807,31 +568,41 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
  *
  * @param dev
  *   Pointer to Ethernet device structure.
- * @param wait_to_complete
- *   Wait for request completion (ignored).
+ * @param[out] link
+ *   Storage for current link status.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
+mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
+                            struct rte_eth_link *link)
+
 {
-       struct priv *priv = mlx5_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
        struct ifreq ifr;
        struct rte_eth_link dev_link;
        uint64_t sc;
+       int ret;
 
-       (void)wait_to_complete;
-       if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
-               WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
-               return -1;
+       ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
+       if (ret) {
+               DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               return ret;
        }
        memset(&dev_link, 0, sizeof(dev_link));
        dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
                                (ifr.ifr_flags & IFF_RUNNING));
        ifr.ifr_data = (void *)&gcmd;
-       if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
-               DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s",
-                     strerror(errno));
-               return -1;
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret) {
+               DRV_LOG(DEBUG,
+                       "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)"
+                       " failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               return ret;
        }
        gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords;
 
@@ -842,10 +613,13 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
 
        *ecmd = gcmd;
        ifr.ifr_data = (void *)ecmd;
-       if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
-               DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s",
-                     strerror(errno));
-               return -1;
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret) {
+               DRV_LOG(DEBUG,
+                       "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)"
+                       " failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               return ret;
        }
        dev_link.link_speed = ecmd->speed;
        sc = ecmd->link_mode_masks[0] |
@@ -889,121 +663,13 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
                                ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
        dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
                                  ETH_LINK_SPEED_FIXED);
-       if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
-               /* Link status changed. */
-               dev->data->dev_link = dev_link;
-               return 0;
+       if ((dev_link.link_speed && !dev_link.link_status) ||
+           (!dev_link.link_speed && dev_link.link_status)) {
+               rte_errno = EAGAIN;
+               return -rte_errno;
        }
-       /* Link status is still the same. */
-       return -1;
-}
-
-/**
- * Enable receiving and transmitting traffic.
- *
- * @param priv
- *   Pointer to private structure.
- */
-static void
-priv_link_start(struct priv *priv)
-{
-       struct rte_eth_dev *dev = priv->dev;
-       int err;
-
-       priv_dev_select_tx_function(priv, dev);
-       priv_dev_select_rx_function(priv, dev);
-       err = priv_dev_traffic_enable(priv, dev);
-       if (err)
-               ERROR("%p: error occurred while configuring control flows: %s",
-                     (void *)priv, strerror(err));
-       err = priv_flow_start(priv, &priv->flows);
-       if (err)
-               ERROR("%p: error occurred while configuring flows: %s",
-                     (void *)priv, strerror(err));
-}
-
-/**
- * Disable receiving and transmitting traffic.
- *
- * @param priv
- *   Pointer to private structure.
- */
-static void
-priv_link_stop(struct priv *priv)
-{
-       struct rte_eth_dev *dev = priv->dev;
-
-       priv_flow_stop(priv, &priv->flows);
-       priv_dev_traffic_disable(priv, dev);
-       dev->rx_pkt_burst = removed_rx_burst;
-       dev->tx_pkt_burst = removed_tx_burst;
-}
-
-/**
- * Retrieve physical link information and update rx/tx_pkt_burst callbacks
- * accordingly.
- *
- * @param priv
- *   Pointer to private structure.
- * @param wait_to_complete
- *   Wait for request completion (ignored).
- */
-int
-priv_link_update(struct priv *priv, int wait_to_complete)
-{
-       struct rte_eth_dev *dev = priv->dev;
-       struct utsname utsname;
-       int ver[3];
-       int ret;
-       struct rte_eth_link dev_link = dev->data->dev_link;
-
-       if (uname(&utsname) == -1 ||
-           sscanf(utsname.release, "%d.%d.%d",
-                  &ver[0], &ver[1], &ver[2]) != 3 ||
-           KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0))
-               ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
-       else
-               ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
-       /* If lsc interrupt is disabled, should always be ready for traffic. */
-       if (!dev->data->dev_conf.intr_conf.lsc) {
-               priv_link_start(priv);
-               return ret;
-       }
-       /* Re-select burst callbacks only if link status has been changed. */
-       if (!ret && dev_link.link_status != dev->data->dev_link.link_status) {
-               if (dev->data->dev_link.link_status == ETH_LINK_UP)
-                       priv_link_start(priv);
-               else
-                       priv_link_stop(priv);
-       }
-       return ret;
-}
-
-/**
- * Querying the link status till it changes to the desired state.
- * Number of query attempts is bounded by MLX5_MAX_LINK_QUERY_ATTEMPTS.
- *
- * @param priv
- *   Pointer to private structure.
- * @param status
- *   Link desired status.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-priv_force_link_status_change(struct priv *priv, int status)
-{
-       int try = 0;
-
-       while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) {
-               priv_link_update(priv, 0);
-               if (priv->dev->data->dev_link.link_status == status)
-                       return 0;
-               try++;
-               sleep(1);
-       }
-       return -EAGAIN;
+       *link = dev_link;
+       return 0;
 }
 
 /**
@@ -1012,17 +678,42 @@ priv_force_link_status_change(struct priv *priv, int status)
  * @param dev
  *   Pointer to Ethernet device structure.
  * @param wait_to_complete
- *   Wait for request completion (ignored).
+ *   Wait for request completion.
+ *
+ * @return
+ *   0 if link status was not updated, positive if it was, a negative errno
+ *   value otherwise and rte_errno is set.
  */
 int
 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
 {
-       struct priv *priv = dev->data->dev_private;
        int ret;
+       struct rte_eth_link dev_link;
+       time_t start_time = time(NULL);
 
-       priv_lock(priv);
-       ret = priv_link_update(priv, wait_to_complete);
-       priv_unlock(priv);
+       do {
+               ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
+               if (ret)
+                       ret = mlx5_link_update_unlocked_gset(dev, &dev_link);
+               if (ret == 0)
+                       break;
+               /* Handle wait to complete situation. */
+               if (wait_to_complete && ret == -EAGAIN) {
+                       if (abs((int)difftime(time(NULL), start_time)) <
+                           MLX5_LINK_STATUS_TIMEOUT) {
+                               usleep(0);
+                               continue;
+                       } else {
+                               rte_errno = EBUSY;
+                               return -rte_errno;
+                       }
+               } else if (ret < 0) {
+                       return ret;
+               }
+       } while (wait_to_complete);
+       ret = !!memcmp(&dev->data->dev_link, &dev_link,
+                      sizeof(struct rte_eth_link));
+       dev->data->dev_link = dev_link;
        return ret;
 }
 
@@ -1035,39 +726,33 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
  *   New MTU.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
 {
        struct priv *priv = dev->data->dev_private;
-       uint16_t kern_mtu;
-       int ret = 0;
+       uint16_t kern_mtu = 0;
+       int ret;
 
-       priv_lock(priv);
-       ret = priv_get_mtu(priv, &kern_mtu);
+       ret = mlx5_get_mtu(dev, &kern_mtu);
        if (ret)
-               goto out;
+               return ret;
        /* Set kernel interface MTU first. */
-       ret = priv_set_mtu(priv, mtu);
+       ret = mlx5_set_mtu(dev, mtu);
        if (ret)
-               goto out;
-       ret = priv_get_mtu(priv, &kern_mtu);
+               return ret;
+       ret = mlx5_get_mtu(dev, &kern_mtu);
        if (ret)
-               goto out;
+               return ret;
        if (kern_mtu == mtu) {
                priv->mtu = mtu;
-               DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
+               DRV_LOG(DEBUG, "port %u adapter MTU set to %u",
+                       dev->data->port_id, mtu);
+               return 0;
        }
-       priv_unlock(priv);
-       return 0;
-out:
-       ret = errno;
-       WARN("cannot set port %u MTU to %u: %s", priv->port, mtu,
-            strerror(ret));
-       priv_unlock(priv);
-       assert(ret >= 0);
-       return -ret;
+       rte_errno = EAGAIN;
+       return -rte_errno;
 }
 
 /**
@@ -1079,12 +764,11 @@ out:
  *   Flow control output buffer.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 {
-       struct priv *priv = dev->data->dev_private;
        struct ifreq ifr;
        struct ethtool_pauseparam ethpause = {
                .cmd = ETHTOOL_GPAUSEPARAM
@@ -1092,15 +776,14 @@ mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        int ret;
 
        ifr.ifr_data = (void *)&ethpause;
-       priv_lock(priv);
-       if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
-               ret = errno;
-               WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
-                    " failed: %s",
-                    strerror(ret));
-               goto out;
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret) {
+               DRV_LOG(WARNING,
+                       "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:"
+                       " %s",
+                       dev->data->port_id, strerror(rte_errno));
+               return ret;
        }
-
        fc_conf->autoneg = ethpause.autoneg;
        if (ethpause.rx_pause && ethpause.tx_pause)
                fc_conf->mode = RTE_FC_FULL;
@@ -1110,12 +793,7 @@ mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
                fc_conf->mode = RTE_FC_TX_PAUSE;
        else
                fc_conf->mode = RTE_FC_NONE;
-       ret = 0;
-
-out:
-       priv_unlock(priv);
-       assert(ret >= 0);
-       return -ret;
+       return 0;
 }
 
 /**
@@ -1127,12 +805,11 @@ out:
  *   Flow control parameters.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 {
-       struct priv *priv = dev->data->dev_private;
        struct ifreq ifr;
        struct ethtool_pauseparam ethpause = {
                .cmd = ETHTOOL_SPAUSEPARAM
@@ -1152,21 +829,15 @@ mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
                ethpause.tx_pause = 1;
        else
                ethpause.tx_pause = 0;
-
-       priv_lock(priv);
-       if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
-               ret = errno;
-               WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
-                    " failed: %s",
-                    strerror(ret));
-               goto out;
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret) {
+               DRV_LOG(WARNING,
+                       "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
+                       " failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               return ret;
        }
-       ret = 0;
-
-out:
-       priv_unlock(priv);
-       assert(ret >= 0);
-       return -ret;
+       return 0;
 }
 
 /**
@@ -1178,7 +849,7 @@ out:
  *   PCI bus address output buffer.
  *
  * @return
- *   0 on success, -1 on failure and errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
@@ -1189,8 +860,10 @@ mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
        MKSTR(path, "%s/device/uevent", device->ibdev_path);
 
        file = fopen(path, "rb");
-       if (file == NULL)
-               return -1;
+       if (file == NULL) {
+               rte_errno = errno;
+               return -rte_errno;
+       }
        while (fgets(line, sizeof(line), file) == line) {
                size_t len = strlen(line);
                int ret;
@@ -1219,47 +892,11 @@ mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
        return 0;
 }
 
-/**
- * Update the link status.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   Zero if the callback process can be called immediately.
- */
-static int
-priv_link_status_update(struct priv *priv)
-{
-       struct rte_eth_link *link = &priv->dev->data->dev_link;
-
-       priv_link_update(priv, 0);
-       if (((link->link_speed == 0) && link->link_status) ||
-               ((link->link_speed != 0) && !link->link_status)) {
-               /*
-                * Inconsistent status. Event likely occurred before the
-                * kernel netdevice exposes the new status.
-                */
-               if (!priv->pending_alarm) {
-                       priv->pending_alarm = 1;
-                       rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
-                                         mlx5_dev_link_status_handler,
-                                         priv->dev);
-               }
-               return 1;
-       } else if (unlikely(priv->pending_alarm)) {
-               /* Link interrupt occurred while alarm is already scheduled. */
-               priv->pending_alarm = 0;
-               rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev);
-       }
-       return 0;
-}
-
 /**
  * Device status handler.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param events
  *   Pointer to event flags holder.
  *
@@ -1267,60 +904,36 @@ priv_link_status_update(struct priv *priv)
  *   Events bitmap of callback process which can be called immediately.
  */
 static uint32_t
-priv_dev_status_handler(struct priv *priv)
+mlx5_dev_status_handler(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct ibv_async_event event;
        uint32_t ret = 0;
 
+       if (mlx5_link_update(dev, 0) == -EAGAIN) {
+               usleep(0);
+               return 0;
+       }
        /* Read all message and acknowledge them. */
        for (;;) {
                if (ibv_get_async_event(priv->ctx, &event))
                        break;
                if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
                        event.event_type == IBV_EVENT_PORT_ERR) &&
-                       (priv->dev->data->dev_conf.intr_conf.lsc == 1))
+                       (dev->data->dev_conf.intr_conf.lsc == 1))
                        ret |= (1 << RTE_ETH_EVENT_INTR_LSC);
                else if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
-                       priv->dev->data->dev_conf.intr_conf.rmv == 1)
+                       dev->data->dev_conf.intr_conf.rmv == 1)
                        ret |= (1 << RTE_ETH_EVENT_INTR_RMV);
                else
-                       DEBUG("event type %d on port %d not handled",
-                             event.event_type, event.element.port_num);
+                       DRV_LOG(DEBUG,
+                               "port %u event type %d on not handled",
+                               dev->data->port_id, event.event_type);
                ibv_ack_async_event(&event);
        }
-       if (ret & (1 << RTE_ETH_EVENT_INTR_LSC))
-               if (priv_link_status_update(priv))
-                       ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC);
        return ret;
 }
 
-/**
- * Handle delayed link status event.
- *
- * @param arg
- *   Registered argument.
- */
-void
-mlx5_dev_link_status_handler(void *arg)
-{
-       struct rte_eth_dev *dev = arg;
-       struct priv *priv = dev->data->dev_private;
-       int ret;
-
-       while (!priv_trylock(priv)) {
-               /* Alarm is being canceled. */
-               if (priv->pending_alarm == 0)
-                       return;
-               rte_pause();
-       }
-       priv->pending_alarm = 0;
-       ret = priv_link_status_update(priv);
-       priv_unlock(priv);
-       if (!ret)
-               _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
-                                             NULL);
-}
-
 /**
  * Handle interrupts from the NIC.
  *
@@ -1333,12 +946,9 @@ void
 mlx5_dev_interrupt_handler(void *cb_arg)
 {
        struct rte_eth_dev *dev = cb_arg;
-       struct priv *priv = dev->data->dev_private;
        uint32_t events;
 
-       priv_lock(priv);
-       events = priv_dev_status_handler(priv);
-       priv_unlock(priv);
+       events = mlx5_dev_status_handler(dev);
        if (events & (1 << RTE_ETH_EVENT_INTR_LSC))
                _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
                                              NULL);
@@ -1357,24 +967,21 @@ static void
 mlx5_dev_handler_socket(void *cb_arg)
 {
        struct rte_eth_dev *dev = cb_arg;
-       struct priv *priv = dev->data->dev_private;
 
-       priv_lock(priv);
-       priv_socket_handle(priv);
-       priv_unlock(priv);
+       mlx5_socket_handle(dev);
 }
 
 /**
  * Uninstall interrupt handler.
  *
- * @param priv
- *   Pointer to private structure.
  * @param dev
- *   Pointer to the rte_eth_dev structure.
+ *   Pointer to Ethernet device.
  */
 void
-priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
+mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
+
        if (dev->data->dev_conf.intr_conf.lsc ||
            dev->data->dev_conf.intr_conf.rmv)
                rte_intr_callback_unregister(&priv->intr_handle,
@@ -1382,10 +989,6 @@ priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
        if (priv->primary_socket)
                rte_intr_callback_unregister(&priv->intr_handle_socket,
                                             mlx5_dev_handler_socket, dev);
-       if (priv->pending_alarm) {
-               priv->pending_alarm = 0;
-               rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
-       }
        priv->intr_handle.fd = 0;
        priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
        priv->intr_handle_socket.fd = 0;
@@ -1395,21 +998,24 @@ priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
 /**
  * Install interrupt handler.
  *
- * @param priv
- *   Pointer to private structure.
  * @param dev
- *   Pointer to the rte_eth_dev structure.
+ *   Pointer to Ethernet device.
  */
 void
-priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
+mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev)
 {
-       int rc, flags;
+       struct priv *priv = dev->data->dev_private;
+       int ret;
+       int flags;
 
        assert(priv->ctx->async_fd > 0);
        flags = fcntl(priv->ctx->async_fd, F_GETFL);
-       rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
-       if (rc < 0) {
-               INFO("failed to change file descriptor async event queue");
+       ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
+       if (ret) {
+               DRV_LOG(INFO,
+                       "port %u failed to change file descriptor async event"
+                       " queue",
+                       dev->data->port_id);
                dev->data->dev_conf.intr_conf.lsc = 0;
                dev->data->dev_conf.intr_conf.rmv = 0;
        }
@@ -1420,9 +1026,11 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
                rte_intr_callback_register(&priv->intr_handle,
                                           mlx5_dev_interrupt_handler, dev);
        }
-
-       rc = priv_socket_init(priv);
-       if (!rc && priv->primary_socket) {
+       ret = mlx5_socket_init(dev);
+       if (ret)
+               DRV_LOG(ERR, "port %u cannot initialise socket: %s",
+                       dev->data->port_id, strerror(rte_errno));
+       else if (priv->primary_socket) {
                priv->intr_handle_socket.fd = priv->primary_socket;
                priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT;
                rte_intr_callback_register(&priv->intr_handle_socket,
@@ -1430,23 +1038,6 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
        }
 }
 
-/**
- * Change the link state (UP / DOWN).
- *
- * @param priv
- *   Pointer to private data structure.
- * @param up
- *   Nonzero for link up, otherwise link down.
- *
- * @return
- *   0 on success, errno value on failure.
- */
-static int
-priv_dev_set_link(struct priv *priv, int up)
-{
-       return priv_set_flags(priv, ~IFF_UP, up ? IFF_UP : ~IFF_UP);
-}
-
 /**
  * DPDK callback to bring the link DOWN.
  *
@@ -1454,18 +1045,12 @@ priv_dev_set_link(struct priv *priv, int up)
  *   Pointer to Ethernet device structure.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_set_link_down(struct rte_eth_dev *dev)
 {
-       struct priv *priv = dev->data->dev_private;
-       int err;
-
-       priv_lock(priv);
-       err = priv_dev_set_link(priv, 0);
-       priv_unlock(priv);
-       return err;
+       return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP);
 }
 
 /**
@@ -1475,72 +1060,77 @@ mlx5_set_link_down(struct rte_eth_dev *dev)
  *   Pointer to Ethernet device structure.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_set_link_up(struct rte_eth_dev *dev)
 {
-       struct priv *priv = dev->data->dev_private;
-       int err;
-
-       priv_lock(priv);
-       err = priv_dev_set_link(priv, 1);
-       priv_unlock(priv);
-       return err;
+       return mlx5_set_flags(dev, ~IFF_UP, IFF_UP);
 }
 
 /**
  * Configure the TX function to use.
  *
- * @param priv
- *   Pointer to private data structure.
  * @param dev
  *   Pointer to rte_eth_dev structure.
+ *
+ * @return
+ *   Pointer to selected Tx burst function.
  */
-void
-priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev)
+eth_tx_burst_t
+mlx5_select_tx_function(struct rte_eth_dev *dev)
 {
-       assert(priv != NULL);
-       assert(dev != NULL);
-       dev->tx_pkt_burst = mlx5_tx_burst;
+       struct priv *priv = dev->data->dev_private;
+       eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst;
+
        /* Select appropriate TX function. */
        if (priv->mps == MLX5_MPW_ENHANCED) {
-               if (priv_check_vec_tx_support(priv) > 0) {
-                       if (priv_check_raw_vec_tx_support(priv) > 0)
-                               dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
+               if (mlx5_check_vec_tx_support(dev) > 0) {
+                       if (mlx5_check_raw_vec_tx_support(dev) > 0)
+                               tx_pkt_burst = mlx5_tx_burst_raw_vec;
                        else
-                               dev->tx_pkt_burst = mlx5_tx_burst_vec;
-                       DEBUG("selected Enhanced MPW TX vectorized function");
+                               tx_pkt_burst = mlx5_tx_burst_vec;
+                       DRV_LOG(DEBUG,
+                               "port %u selected enhanced MPW Tx vectorized"
+                               " function",
+                               dev->data->port_id);
                } else {
-                       dev->tx_pkt_burst = mlx5_tx_burst_empw;
-                       DEBUG("selected Enhanced MPW TX function");
+                       tx_pkt_burst = mlx5_tx_burst_empw;
+                       DRV_LOG(DEBUG,
+                               "port %u selected enhanced MPW Tx function",
+                               dev->data->port_id);
                }
        } else if (priv->mps && priv->txq_inline) {
-               dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
-               DEBUG("selected MPW inline TX function");
+               tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+               DRV_LOG(DEBUG, "port %u selected MPW inline Tx function",
+                       dev->data->port_id);
        } else if (priv->mps) {
-               dev->tx_pkt_burst = mlx5_tx_burst_mpw;
-               DEBUG("selected MPW TX function");
+               tx_pkt_burst = mlx5_tx_burst_mpw;
+               DRV_LOG(DEBUG, "port %u selected MPW Tx function",
+                       dev->data->port_id);
        }
+       return tx_pkt_burst;
 }
 
 /**
  * Configure the RX function to use.
  *
- * @param priv
- *   Pointer to private data structure.
  * @param dev
  *   Pointer to rte_eth_dev structure.
+ *
+ * @return
+ *   Pointer to selected Rx burst function.
  */
-void
-priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev)
+eth_rx_burst_t
+mlx5_select_rx_function(struct rte_eth_dev *dev)
 {
-       assert(priv != NULL);
+       eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst;
+
        assert(dev != NULL);
-       if (priv_check_vec_rx_support(priv) > 0) {
-               dev->rx_pkt_burst = mlx5_rx_burst_vec;
-               DEBUG("selected RX vectorized function");
-       } else {
-               dev->rx_pkt_burst = mlx5_rx_burst;
+       if (mlx5_check_vec_rx_support(dev) > 0) {
+               rx_pkt_burst = mlx5_rx_burst_vec;
+               DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
+                       dev->data->port_id);
        }
+       return rx_pkt_burst;
 }
index 092644f..57b654c 100644 (file)
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include <rte_common.h>
 #include <rte_ethdev.h>
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
 #include <rte_malloc.h>
+#include <rte_ip.h>
 
 #include "mlx5.h"
 #include "mlx5_defs.h"
@@ -83,40 +85,46 @@ ibv_destroy_counter_set(struct ibv_counter_set *cs)
 extern const struct eth_dev_ops mlx5_dev_ops;
 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 
+/** Structure give to the conversion functions. */
+struct mlx5_flow_data {
+       struct mlx5_flow_parse *parser; /** Parser context. */
+       struct rte_flow_error *error; /** Error context. */
+};
+
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
                     const void *default_mask,
-                    void *data);
+                    struct mlx5_flow_data *data);
 
 static int
 mlx5_flow_create_vlan(const struct rte_flow_item *item,
                      const void *default_mask,
-                     void *data);
+                     struct mlx5_flow_data *data);
 
 static int
 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
                      const void *default_mask,
-                     void *data);
+                     struct mlx5_flow_data *data);
 
 static int
 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
                      const void *default_mask,
-                     void *data);
+                     struct mlx5_flow_data *data);
 
 static int
 mlx5_flow_create_udp(const struct rte_flow_item *item,
                     const void *default_mask,
-                    void *data);
+                    struct mlx5_flow_data *data);
 
 static int
 mlx5_flow_create_tcp(const struct rte_flow_item *item,
                     const void *default_mask,
-                    void *data);
+                    struct mlx5_flow_data *data);
 
 static int
 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
                       const void *default_mask,
-                      void *data);
+                      struct mlx5_flow_data *data);
 
 struct mlx5_flow_parse;
 
@@ -128,7 +136,7 @@ static int
 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
 
 static int
-mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
+mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
 
 /* Hash RX queue types. */
 enum hash_rxq_type {
@@ -157,7 +165,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_SRC_PORT_TCP |
                                IBV_RX_HASH_DST_PORT_TCP),
                .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
-               .flow_priority = 0,
+               .flow_priority = 1,
                .ip_version = MLX5_IPV4,
        },
        [HASH_RXQ_UDPV4] = {
@@ -166,7 +174,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_SRC_PORT_UDP |
                                IBV_RX_HASH_DST_PORT_UDP),
                .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
-               .flow_priority = 0,
+               .flow_priority = 1,
                .ip_version = MLX5_IPV4,
        },
        [HASH_RXQ_IPV4] = {
@@ -174,7 +182,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_DST_IPV4),
                .dpdk_rss_hf = (ETH_RSS_IPV4 |
                                ETH_RSS_FRAG_IPV4),
-               .flow_priority = 1,
+               .flow_priority = 2,
                .ip_version = MLX5_IPV4,
        },
        [HASH_RXQ_TCPV6] = {
@@ -183,7 +191,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_SRC_PORT_TCP |
                                IBV_RX_HASH_DST_PORT_TCP),
                .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
-               .flow_priority = 0,
+               .flow_priority = 1,
                .ip_version = MLX5_IPV6,
        },
        [HASH_RXQ_UDPV6] = {
@@ -192,7 +200,7 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_SRC_PORT_UDP |
                                IBV_RX_HASH_DST_PORT_UDP),
                .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
-               .flow_priority = 0,
+               .flow_priority = 1,
                .ip_version = MLX5_IPV6,
        },
        [HASH_RXQ_IPV6] = {
@@ -200,13 +208,13 @@ const struct hash_rxq_init hash_rxq_init[] = {
                                IBV_RX_HASH_DST_IPV6),
                .dpdk_rss_hf = (ETH_RSS_IPV6 |
                                ETH_RSS_FRAG_IPV6),
-               .flow_priority = 1,
+               .flow_priority = 2,
                .ip_version = MLX5_IPV6,
        },
        [HASH_RXQ_ETH] = {
                .hash_fields = 0,
                .dpdk_rss_hf = 0,
-               .flow_priority = 2,
+               .flow_priority = 3,
        },
 };
 
@@ -286,11 +294,12 @@ struct mlx5_flow_items {
         *   Internal structure to store the conversion.
         *
         * @return
-        *   0 on success, negative value otherwise.
+        *   0 on success, a negative errno value otherwise and rte_errno is
+        *   set.
         */
        int (*convert)(const struct rte_flow_item *item,
                       const void *default_mask,
-                      void *data);
+                      struct mlx5_flow_data *data);
        /** Size in bytes of the destination structure. */
        const unsigned int dst_sz;
        /** List of possible following items.  */
@@ -474,10 +483,18 @@ struct mlx5_fdir {
                struct rte_flow_item_ipv4 ipv4;
                struct rte_flow_item_ipv6 ipv6;
        } l3;
+       union {
+               struct rte_flow_item_ipv4 ipv4;
+               struct rte_flow_item_ipv6 ipv6;
+       } l3_mask;
        union {
                struct rte_flow_item_udp udp;
                struct rte_flow_item_tcp tcp;
        } l4;
+       union {
+               struct rte_flow_item_udp udp;
+               struct rte_flow_item_tcp tcp;
+       } l4_mask;
        struct rte_flow_action_queue queue;
 };
 
@@ -488,7 +505,7 @@ struct ibv_spec_header {
 };
 
 /**
- * Check support for a given item.
+ * Check item is fully supported by the NIC matching capability.
  *
  * @param item[in]
  *   Item specification.
@@ -499,87 +516,71 @@ struct ibv_spec_header {
  *   Bit-Mask size in bytes.
  *
  * @return
- *   0 on success.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_item_validate(const struct rte_flow_item *item,
                        const uint8_t *mask, unsigned int size)
 {
-       int ret = 0;
-
-       if (!item->spec && (item->mask || item->last))
-               return -1;
-       if (item->spec && !item->mask) {
-               unsigned int i;
-               const uint8_t *spec = item->spec;
-
-               for (i = 0; i < size; ++i)
-                       if ((spec[i] | mask[i]) != mask[i])
-                               return -1;
-       }
-       if (item->last && !item->mask) {
-               unsigned int i;
-               const uint8_t *spec = item->last;
-
-               for (i = 0; i < size; ++i)
-                       if ((spec[i] | mask[i]) != mask[i])
-                               return -1;
-       }
-       if (item->mask) {
-               unsigned int i;
-               const uint8_t *spec = item->spec;
-
-               for (i = 0; i < size; ++i)
-                       if ((spec[i] | mask[i]) != mask[i])
-                               return -1;
-       }
-       if (item->spec && item->last) {
-               uint8_t spec[size];
-               uint8_t last[size];
-               const uint8_t *apply = mask;
-               unsigned int i;
+       unsigned int i;
+       const uint8_t *spec = item->spec;
+       const uint8_t *last = item->last;
+       const uint8_t *m = item->mask ? item->mask : mask;
 
-               if (item->mask)
-                       apply = item->mask;
-               for (i = 0; i < size; ++i) {
-                       spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
-                       last[i] = ((const uint8_t *)item->last)[i] & apply[i];
-               }
-               ret = memcmp(spec, last, size);
+       if (!spec && (item->mask || last))
+               goto error;
+       if (!spec)
+               return 0;
+       /*
+        * Single-pass check to make sure that:
+        * - item->mask is supported, no bits are set outside mask.
+        * - Both masked item->spec and item->last are equal (no range
+        *   supported).
+        */
+       for (i = 0; i < size; i++) {
+               if (!m[i])
+                       continue;
+               if ((m[i] | mask[i]) != mask[i])
+                       goto error;
+               if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
+                       goto error;
        }
-       return ret;
+       return 0;
+error:
+       rte_errno = ENOTSUP;
+       return -rte_errno;
 }
 
 /**
  * Copy the RSS configuration from the user ones, of the rss_conf is null,
  * uses the driver one.
  *
- * @param priv
- *   Pointer to private structure.
  * @param parser
  *   Internal parser structure.
  * @param rss_conf
  *   User RSS configuration to save.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_convert_rss_conf(struct priv *priv,
-                          struct mlx5_flow_parse *parser,
+mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
                           const struct rte_eth_rss_conf *rss_conf)
 {
        /*
         * This function is also called at the beginning of
-        * priv_flow_convert_actions() to initialize the parser with the
+        * mlx5_flow_convert_actions() to initialize the parser with the
         * device default RSS configuration.
         */
-       (void)priv;
        if (rss_conf) {
-               if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
-                       return EINVAL;
-               if (rss_conf->rss_key_len != 40)
-                       return EINVAL;
+               if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
+                       rte_errno = EINVAL;
+                       return -rte_errno;
+               }
+               if (rss_conf->rss_key_len != 40) {
+                       rte_errno = EINVAL;
+                       return -rte_errno;
+               }
                if (rss_conf->rss_key_len && rss_conf->rss_key) {
                        parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
                        memcpy(parser->rss_key, rss_conf->rss_key,
@@ -594,26 +595,18 @@ priv_flow_convert_rss_conf(struct priv *priv,
 /**
  * Extract attribute to the parser.
  *
- * @param priv
- *   Pointer to private structure.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
- * @param[in, out] parser
- *   Internal parser structure.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_convert_attributes(struct priv *priv,
-                            const struct rte_flow_attr *attr,
-                            struct rte_flow_error *error,
-                            struct mlx5_flow_parse *parser)
+mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
+                            struct rte_flow_error *error)
 {
-       (void)priv;
-       (void)parser;
        if (attr->group) {
                rte_flow_error_set(error, ENOTSUP,
                                   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
@@ -648,8 +641,8 @@ priv_flow_convert_attributes(struct priv *priv,
 /**
  * Extract actions request to the parser.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[in] actions
  *   Associated actions (list terminated by the END action).
  * @param[out] error
@@ -661,18 +654,23 @@ priv_flow_convert_attributes(struct priv *priv,
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_convert_actions(struct priv *priv,
+mlx5_flow_convert_actions(struct rte_eth_dev *dev,
                          const struct rte_flow_action actions[],
                          struct rte_flow_error *error,
                          struct mlx5_flow_parse *parser)
 {
+       struct priv *priv = dev->data->dev_private;
+       int ret;
+
        /*
         * Add default RSS configuration necessary for Verbs to create QP even
         * if no RSS is necessary.
         */
-       priv_flow_convert_rss_conf(priv, parser,
-                                  (const struct rte_eth_rss_conf *)
-                                  &priv->rss_conf);
+       ret = mlx5_flow_convert_rss_conf(parser,
+                                        (const struct rte_eth_rss_conf *)
+                                        &priv->rss_conf);
+       if (ret)
+               return ret;
        for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
                if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
                        continue;
@@ -737,6 +735,14 @@ priv_flow_convert_actions(struct priv *priv,
                                        return -rte_errno;
                                }
                        }
+                       if (rss->num > RTE_DIM(parser->queues)) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ACTION,
+                                                  actions,
+                                                  "too many queues for RSS"
+                                                  " context");
+                               return -rte_errno;
+                       }
                        for (n = 0; n < rss->num; ++n) {
                                if (rss->queue[n] >= priv->rxqs_n) {
                                        rte_flow_error_set(error, EINVAL,
@@ -750,8 +756,7 @@ priv_flow_convert_actions(struct priv *priv,
                        for (n = 0; n < rss->num; ++n)
                                parser->queues[n] = rss->queue[n];
                        parser->queues_n = rss->num;
-                       if (priv_flow_convert_rss_conf(priv, parser,
-                                                      rss->rss_conf)) {
+                       if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
                                rte_flow_error_set(error, EINVAL,
                                                   RTE_FLOW_ERROR_TYPE_ACTION,
                                                   actions,
@@ -805,8 +810,6 @@ exit_action_not_supported:
 /**
  * Validate items.
  *
- * @param priv
- *   Pointer to private structure.
  * @param[in] items
  *   Pattern specification (list terminated by the END pattern item).
  * @param[out] error
@@ -818,22 +821,20 @@ exit_action_not_supported:
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_convert_items_validate(struct priv *priv,
-                                const struct rte_flow_item items[],
+mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
                                 struct rte_flow_error *error,
                                 struct mlx5_flow_parse *parser)
 {
        const struct mlx5_flow_items *cur_item = mlx5_flow_items;
        unsigned int i;
+       int ret = 0;
 
-       (void)priv;
        /* Initialise the offsets to start after verbs attribute. */
        for (i = 0; i != hash_rxq_init_n; ++i)
                parser->queue[i].offset = sizeof(struct ibv_flow_attr);
        for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
                const struct mlx5_flow_items *token = NULL;
                unsigned int n;
-               int err;
 
                if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
                        continue;
@@ -846,13 +847,15 @@ priv_flow_convert_items_validate(struct priv *priv,
                                break;
                        }
                }
-               if (!token)
+               if (!token) {
+                       ret = -ENOTSUP;
                        goto exit_item_not_supported;
+               }
                cur_item = token;
-               err = mlx5_flow_item_validate(items,
+               ret = mlx5_flow_item_validate(items,
                                              (const uint8_t *)cur_item->mask,
                                              cur_item->mask_sz);
-               if (err)
+               if (ret)
                        goto exit_item_not_supported;
                if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
                        if (parser->inner) {
@@ -889,57 +892,76 @@ priv_flow_convert_items_validate(struct priv *priv,
        }
        return 0;
 exit_item_not_supported:
-       rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
-                          items, "item not supported");
-       return -rte_errno;
+       return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
+                                 items, "item not supported");
 }
 
 /**
  * Allocate memory space to store verbs flow attributes.
  *
- * @param priv
- *   Pointer to private structure.
- * @param[in] priority
- *   Flow priority.
  * @param[in] size
  *   Amount of byte to allocate.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   A verbs flow attribute on success, NULL otherwise.
+ *   A verbs flow attribute on success, NULL otherwise and rte_errno is set.
  */
-static struct ibv_flow_attr*
-priv_flow_convert_allocate(struct priv *priv,
-                          unsigned int priority,
-                          unsigned int size,
-                          struct rte_flow_error *error)
+static struct ibv_flow_attr *
+mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
 {
        struct ibv_flow_attr *ibv_attr;
 
-       (void)priv;
        ibv_attr = rte_calloc(__func__, 1, size, 0);
        if (!ibv_attr) {
                rte_flow_error_set(error, ENOMEM,
                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
                                   NULL,
-                                  "cannot allocate verbs spec attributes.");
+                                  "cannot allocate verbs spec attributes");
                return NULL;
        }
-       ibv_attr->priority = priority;
        return ibv_attr;
 }
 
+/**
+ * Make inner packet matching with an higher priority from the non Inner
+ * matching.
+ *
+ * @param[in, out] parser
+ *   Internal parser structure.
+ * @param attr
+ *   User flow attribute.
+ */
+static void
+mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
+                         const struct rte_flow_attr *attr)
+{
+       unsigned int i;
+
+       if (parser->drop) {
+               parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
+                       attr->priority +
+                       hash_rxq_init[HASH_RXQ_ETH].flow_priority;
+               return;
+       }
+       for (i = 0; i != hash_rxq_init_n; ++i) {
+               if (parser->queue[i].ibv_attr) {
+                       parser->queue[i].ibv_attr->priority =
+                               attr->priority +
+                               hash_rxq_init[i].flow_priority -
+                               (parser->inner ? 1 : 0);
+               }
+       }
+}
+
 /**
  * Finalise verbs flow attributes.
  *
- * @param priv
- *   Pointer to private structure.
  * @param[in, out] parser
  *   Internal parser structure.
  */
 static void
-priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
+mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
 {
        const unsigned int ipv4 =
                hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
@@ -950,7 +972,16 @@ priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
        const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
        unsigned int i;
 
-       (void)priv;
+       /* Remove any other flow not matching the pattern. */
+       if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
+               for (i = 0; i != hash_rxq_init_n; ++i) {
+                       if (i == HASH_RXQ_ETH)
+                               continue;
+                       rte_free(parser->queue[i].ibv_attr);
+                       parser->queue[i].ibv_attr = NULL;
+               }
+               return;
+       }
        if (parser->layer == HASH_RXQ_ETH) {
                goto fill;
        } else {
@@ -1049,8 +1080,8 @@ fill:
 /**
  * Validate and convert a flow supported by the NIC.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[in] attr
  *   Flow rule attributes.
  * @param[in] pattern
@@ -1066,7 +1097,7 @@ fill:
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_convert(struct priv *priv,
+mlx5_flow_convert(struct rte_eth_dev *dev,
                  const struct rte_flow_attr *attr,
                  const struct rte_flow_item items[],
                  const struct rte_flow_action actions[],
@@ -1083,35 +1114,31 @@ priv_flow_convert(struct priv *priv,
                .layer = HASH_RXQ_ETH,
                .mark_id = MLX5_FLOW_MARK_DEFAULT,
        };
-       ret = priv_flow_convert_attributes(priv, attr, error, parser);
+       ret = mlx5_flow_convert_attributes(attr, error);
        if (ret)
                return ret;
-       ret = priv_flow_convert_actions(priv, actions, error, parser);
+       ret = mlx5_flow_convert_actions(dev, actions, error, parser);
        if (ret)
                return ret;
-       ret = priv_flow_convert_items_validate(priv, items, error, parser);
+       ret = mlx5_flow_convert_items_validate(items, error, parser);
        if (ret)
                return ret;
-       priv_flow_convert_finalise(priv, parser);
+       mlx5_flow_convert_finalise(parser);
        /*
         * Second step.
         * Allocate the memory space to store verbs specifications.
         */
        if (parser->drop) {
+               unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
+
                parser->queue[HASH_RXQ_ETH].ibv_attr =
-                       priv_flow_convert_allocate
-                       (priv, attr->priority,
-                        parser->queue[HASH_RXQ_ETH].offset,
-                        error);
+                       mlx5_flow_convert_allocate(offset, error);
                if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
-                       return ENOMEM;
+                       goto exit_enomem;
                parser->queue[HASH_RXQ_ETH].offset =
                        sizeof(struct ibv_flow_attr);
        } else {
                for (i = 0; i != hash_rxq_init_n; ++i) {
-                       unsigned int priority =
-                               attr->priority +
-                               hash_rxq_init[i].flow_priority;
                        unsigned int offset;
 
                        if (!(parser->rss_conf.rss_hf &
@@ -1120,8 +1147,7 @@ priv_flow_convert(struct priv *priv,
                                continue;
                        offset = parser->queue[i].offset;
                        parser->queue[i].ibv_attr =
-                               priv_flow_convert_allocate(priv, priority,
-                                                          offset, error);
+                               mlx5_flow_convert_allocate(offset, error);
                        if (!parser->queue[i].ibv_attr)
                                goto exit_enomem;
                        parser->queue[i].offset = sizeof(struct ibv_flow_attr);
@@ -1130,6 +1156,11 @@ priv_flow_convert(struct priv *priv,
        /* Third step. Conversion parse, fill the specifications. */
        parser->inner = 0;
        for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+               struct mlx5_flow_data data = {
+                       .parser = parser,
+                       .error = error,
+               };
+
                if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
                        continue;
                cur_item = &mlx5_flow_items[items->type];
@@ -1137,18 +1168,14 @@ priv_flow_convert(struct priv *priv,
                                        (cur_item->default_mask ?
                                         cur_item->default_mask :
                                         cur_item->mask),
-                                       parser);
-               if (ret) {
-                       rte_flow_error_set(error, ret,
-                                          RTE_FLOW_ERROR_TYPE_ITEM,
-                                          items, "item not supported");
+                                        &data);
+               if (ret)
                        goto exit_free;
-               }
        }
        if (parser->mark)
                mlx5_flow_create_flag_mark(parser, parser->mark_id);
        if (parser->count && parser->create) {
-               mlx5_flow_create_count(priv, parser);
+               mlx5_flow_create_count(dev, parser);
                if (!parser->cs)
                        goto exit_count_error;
        }
@@ -1156,13 +1183,9 @@ priv_flow_convert(struct priv *priv,
         * Last step. Complete missing specification to reach the RSS
         * configuration.
         */
-       if (!parser->drop) {
-               priv_flow_convert_finalise(priv, parser);
-       } else {
-               parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
-                       attr->priority +
-                       hash_rxq_init[parser->layer].flow_priority;
-       }
+       if (!parser->drop)
+               mlx5_flow_convert_finalise(parser);
+       mlx5_flow_update_priority(parser, attr);
 exit_free:
        /* Only verification is expected, all resources should be released. */
        if (!parser->create) {
@@ -1181,13 +1204,13 @@ exit_enomem:
                        parser->queue[i].ibv_attr = NULL;
                }
        }
-       rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                          NULL, "cannot allocate verbs spec attributes.");
-       return ret;
+       rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                          NULL, "cannot allocate verbs spec attributes");
+       return -rte_errno;
 exit_count_error:
        rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                          NULL, "cannot create counter.");
-       return rte_errno;
+                          NULL, "cannot create counter");
+       return -rte_errno;
 }
 
 /**
@@ -1233,15 +1256,18 @@ mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
  *   Default bit-masks to use when item->mask is not provided.
  * @param data[in, out]
  *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item,
                     const void *default_mask,
-                    void *data)
+                    struct mlx5_flow_data *data)
 {
        const struct rte_flow_item_eth *spec = item->spec;
        const struct rte_flow_item_eth *mask = item->mask;
-       struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+       struct mlx5_flow_parse *parser = data->parser;
        const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
        struct ibv_flow_spec_eth eth = {
                .type = parser->inner | IBV_FLOW_SPEC_ETH,
@@ -1282,15 +1308,18 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
  *   Default bit-masks to use when item->mask is not provided.
  * @param data[in, out]
  *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_create_vlan(const struct rte_flow_item *item,
                      const void *default_mask,
-                     void *data)
+                     struct mlx5_flow_data *data)
 {
        const struct rte_flow_item_vlan *spec = item->spec;
        const struct rte_flow_item_vlan *mask = item->mask;
-       struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+       struct mlx5_flow_parse *parser = data->parser;
        struct ibv_flow_spec_eth *eth;
        const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
 
@@ -1308,9 +1337,18 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
                        eth->val.vlan_tag = spec->tci;
                        eth->mask.vlan_tag = mask->tci;
                        eth->val.vlan_tag &= eth->mask.vlan_tag;
+                       /*
+                        * From verbs perspective an empty VLAN is equivalent
+                        * to a packet without VLAN layer.
+                        */
+                       if (!eth->mask.vlan_tag)
+                               goto error;
                }
+               return 0;
        }
-       return 0;
+error:
+       return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
+                                 item, "VLAN cannot be empty");
 }
 
 /**
@@ -1322,15 +1360,18 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
  *   Default bit-masks to use when item->mask is not provided.
  * @param data[in, out]
  *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
                      const void *default_mask,
-                     void *data)
+                     struct mlx5_flow_data *data)
 {
        const struct rte_flow_item_ipv4 *spec = item->spec;
        const struct rte_flow_item_ipv4 *mask = item->mask;
-       struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+       struct mlx5_flow_parse *parser = data->parser;
        unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
        struct ibv_flow_spec_ipv4_ext ipv4 = {
                .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
@@ -1374,15 +1415,18 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
  *   Default bit-masks to use when item->mask is not provided.
  * @param data[in, out]
  *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
                      const void *default_mask,
-                     void *data)
+                     struct mlx5_flow_data *data)
 {
        const struct rte_flow_item_ipv6 *spec = item->spec;
        const struct rte_flow_item_ipv6 *mask = item->mask;
-       struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+       struct mlx5_flow_parse *parser = data->parser;
        unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
        struct ibv_flow_spec_ipv6 ipv6 = {
                .type = parser->inner | IBV_FLOW_SPEC_IPV6,
@@ -1394,6 +1438,8 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
                parser->layer = HASH_RXQ_IPV6;
        if (spec) {
                unsigned int i;
+               uint32_t vtc_flow_val;
+               uint32_t vtc_flow_mask;
 
                if (!mask)
                        mask = default_mask;
@@ -1405,7 +1451,20 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
                       RTE_DIM(ipv6.mask.src_ip));
                memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
                       RTE_DIM(ipv6.mask.dst_ip));
-               ipv6.mask.flow_label = mask->hdr.vtc_flow;
+               vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
+               vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
+               ipv6.val.flow_label =
+                       rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
+                                        IPV6_HDR_FL_SHIFT);
+               ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
+                                        IPV6_HDR_TC_SHIFT;
+               ipv6.val.next_hdr = spec->hdr.proto;
+               ipv6.val.hop_limit = spec->hdr.hop_limits;
+               ipv6.mask.flow_label =
+                       rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
+                                        IPV6_HDR_FL_SHIFT);
+               ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
+                                         IPV6_HDR_TC_SHIFT;
                ipv6.mask.next_hdr = mask->hdr.proto;
                ipv6.mask.hop_limit = mask->hdr.hop_limits;
                /* Remove unwanted bits from values. */
@@ -1414,6 +1473,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
                        ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
                }
                ipv6.val.flow_label &= ipv6.mask.flow_label;
+               ipv6.val.traffic_class &= ipv6.mask.traffic_class;
                ipv6.val.next_hdr &= ipv6.mask.next_hdr;
                ipv6.val.hop_limit &= ipv6.mask.hop_limit;
        }
@@ -1430,15 +1490,18 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
  *   Default bit-masks to use when item->mask is not provided.
  * @param data[in, out]
  *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_create_udp(const struct rte_flow_item *item,
                     const void *default_mask,
-                    void *data)
+                    struct mlx5_flow_data *data)
 {
        const struct rte_flow_item_udp *spec = item->spec;
        const struct rte_flow_item_udp *mask = item->mask;
-       struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+       struct mlx5_flow_parse *parser = data->parser;
        unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
        struct ibv_flow_spec_tcp_udp udp = {
                .type = parser->inner | IBV_FLOW_SPEC_UDP,
@@ -1476,15 +1539,18 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
  *   Default bit-masks to use when item->mask is not provided.
  * @param data[in, out]
  *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_create_tcp(const struct rte_flow_item *item,
                     const void *default_mask,
-                    void *data)
+                    struct mlx5_flow_data *data)
 {
        const struct rte_flow_item_tcp *spec = item->spec;
        const struct rte_flow_item_tcp *mask = item->mask;
-       struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+       struct mlx5_flow_parse *parser = data->parser;
        unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
        struct ibv_flow_spec_tcp_udp tcp = {
                .type = parser->inner | IBV_FLOW_SPEC_TCP,
@@ -1522,15 +1588,18 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
  *   Default bit-masks to use when item->mask is not provided.
  * @param data[in, out]
  *   User structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
                       const void *default_mask,
-                      void *data)
+                      struct mlx5_flow_data *data)
 {
        const struct rte_flow_item_vxlan *spec = item->spec;
        const struct rte_flow_item_vxlan *mask = item->mask;
-       struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+       struct mlx5_flow_parse *parser = data->parser;
        unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
        struct ibv_flow_spec_tunnel vxlan = {
                .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
@@ -1562,7 +1631,10 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
         * To avoid such situation, VNI 0 is currently refused.
         */
        if (!vxlan.val.tunnel_id)
-               return EINVAL;
+               return rte_flow_error_set(data->error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM,
+                                         item,
+                                         "VxLAN vni cannot be 0");
        mlx5_flow_create_copy(parser, &vxlan, size);
        return 0;
 }
@@ -1574,6 +1646,9 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
  *   Internal parser structure.
  * @param mark_id
  *   Mark identifier.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
@@ -1593,19 +1668,20 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
 /**
  * Convert count action to Verbs specification.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param parser
  *   Pointer to MLX5 flow parser structure.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_flow_create_count(struct priv *priv __rte_unused,
+mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
                       struct mlx5_flow_parse *parser __rte_unused)
 {
 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+       struct priv *priv = dev->data->dev_private;
        unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
        struct ibv_counter_set_init_attr init_attr = {0};
        struct ibv_flow_spec_counter_action counter = {
@@ -1616,8 +1692,10 @@ mlx5_flow_create_count(struct priv *priv __rte_unused,
 
        init_attr.counter_set_id = 0;
        parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
-       if (!parser->cs)
-               return EINVAL;
+       if (!parser->cs) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
        counter.counter_set_handle = parser->cs->handle;
        mlx5_flow_create_copy(parser, &counter, size);
 #endif
@@ -1627,8 +1705,8 @@ mlx5_flow_create_count(struct priv *priv __rte_unused,
 /**
  * Complete flow rule creation with a drop queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param parser
  *   Internal parser structure.
  * @param flow
@@ -1637,17 +1715,17 @@ mlx5_flow_create_count(struct priv *priv __rte_unused,
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_create_action_queue_drop(struct priv *priv,
+mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
                                   struct mlx5_flow_parse *parser,
                                   struct rte_flow *flow,
                                   struct rte_flow_error *error)
 {
+       struct priv *priv = dev->data->dev_private;
        struct ibv_flow_spec_action_drop *drop;
        unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
-       int err = 0;
 
        assert(priv->pd);
        assert(priv->ctx);
@@ -1664,7 +1742,7 @@ priv_flow_create_action_queue_drop(struct priv *priv,
                parser->queue[HASH_RXQ_ETH].ibv_attr;
        if (parser->count)
                flow->cs = parser->cs;
-       if (!priv->dev->data->dev_started)
+       if (!dev->data->dev_started)
                return 0;
        parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
        flow->frxq[HASH_RXQ_ETH].ibv_flow =
@@ -1673,7 +1751,6 @@ priv_flow_create_action_queue_drop(struct priv *priv,
        if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
                rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
                                   NULL, "flow rule creation failure");
-               err = ENOMEM;
                goto error;
        }
        return 0;
@@ -1692,14 +1769,14 @@ error:
                flow->cs = NULL;
                parser->cs = NULL;
        }
-       return err;
+       return -rte_errno;
 }
 
 /**
  * Create hash Rx queues when RSS is enabled.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param parser
  *   Internal parser structure.
  * @param flow
@@ -1708,10 +1785,10 @@ error:
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   0 on success, a errno value otherwise and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_create_action_queue_rss(struct priv *priv,
+mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
                                  struct mlx5_flow_parse *parser,
                                  struct rte_flow *flow,
                                  struct rte_flow_error *error)
@@ -1726,29 +1803,29 @@ priv_flow_create_action_queue_rss(struct priv *priv,
                flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
                parser->queue[i].ibv_attr = NULL;
                hash_fields = hash_rxq_init[i].hash_fields;
-               if (!priv->dev->data->dev_started)
+               if (!dev->data->dev_started)
                        continue;
                flow->frxq[i].hrxq =
-                       mlx5_priv_hrxq_get(priv,
-                                          parser->rss_conf.rss_key,
-                                          parser->rss_conf.rss_key_len,
-                                          hash_fields,
-                                          parser->queues,
-                                          parser->queues_n);
+                       mlx5_hrxq_get(dev,
+                                     parser->rss_conf.rss_key,
+                                     parser->rss_conf.rss_key_len,
+                                     hash_fields,
+                                     parser->queues,
+                                     parser->queues_n);
                if (flow->frxq[i].hrxq)
                        continue;
                flow->frxq[i].hrxq =
-                       mlx5_priv_hrxq_new(priv,
-                                          parser->rss_conf.rss_key,
-                                          parser->rss_conf.rss_key_len,
-                                          hash_fields,
-                                          parser->queues,
-                                          parser->queues_n);
+                       mlx5_hrxq_new(dev,
+                                     parser->rss_conf.rss_key,
+                                     parser->rss_conf.rss_key_len,
+                                     hash_fields,
+                                     parser->queues,
+                                     parser->queues_n);
                if (!flow->frxq[i].hrxq) {
-                       rte_flow_error_set(error, ENOMEM,
-                                          RTE_FLOW_ERROR_TYPE_HANDLE,
-                                          NULL, "cannot create hash rxq");
-                       return ENOMEM;
+                       return rte_flow_error_set(error, ENOMEM,
+                                                 RTE_FLOW_ERROR_TYPE_HANDLE,
+                                                 NULL,
+                                                 "cannot create hash rxq");
                }
        }
        return 0;
@@ -1757,8 +1834,8 @@ priv_flow_create_action_queue_rss(struct priv *priv,
 /**
  * Complete flow rule creation.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param parser
  *   Internal parser structure.
  * @param flow
@@ -1767,26 +1844,28 @@ priv_flow_create_action_queue_rss(struct priv *priv,
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   0 on success, a errno value otherwise and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_create_action_queue(struct priv *priv,
+mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
                              struct mlx5_flow_parse *parser,
                              struct rte_flow *flow,
                              struct rte_flow_error *error)
 {
-       int err = 0;
+       struct priv *priv __rte_unused = dev->data->dev_private;
+       int ret;
        unsigned int i;
+       unsigned int flows_n = 0;
 
        assert(priv->pd);
        assert(priv->ctx);
        assert(!parser->drop);
-       err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
-       if (err)
+       ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
+       if (ret)
                goto error;
        if (parser->count)
                flow->cs = parser->cs;
-       if (!priv->dev->data->dev_started)
+       if (!dev->data->dev_started)
                return 0;
        for (i = 0; i != hash_rxq_init_n; ++i) {
                if (!flow->frxq[i].hrxq)
@@ -1798,13 +1877,19 @@ priv_flow_create_action_queue(struct priv *priv,
                        rte_flow_error_set(error, ENOMEM,
                                           RTE_FLOW_ERROR_TYPE_HANDLE,
                                           NULL, "flow rule creation failure");
-                       err = ENOMEM;
                        goto error;
                }
-               DEBUG("%p type %d QP %p ibv_flow %p",
-                     (void *)flow, i,
-                     (void *)flow->frxq[i].hrxq,
-                     (void *)flow->frxq[i].ibv_flow);
+               ++flows_n;
+               DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
+                       dev->data->port_id,
+                       (void *)flow, i,
+                       (void *)flow->frxq[i].hrxq,
+                       (void *)flow->frxq[i].ibv_flow);
+       }
+       if (!flows_n) {
+               rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
+                                  NULL, "internal error in flow creation");
+               goto error;
        }
        for (i = 0; i != parser->queues_n; ++i) {
                struct mlx5_rxq_data *q =
@@ -1814,6 +1899,7 @@ priv_flow_create_action_queue(struct priv *priv,
        }
        return 0;
 error:
+       ret = rte_errno; /* Save rte_errno before cleanup. */
        assert(flow);
        for (i = 0; i != hash_rxq_init_n; ++i) {
                if (flow->frxq[i].ibv_flow) {
@@ -1822,7 +1908,7 @@ error:
                        claim_zero(ibv_destroy_flow(ibv_flow));
                }
                if (flow->frxq[i].hrxq)
-                       mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+                       mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
                if (flow->frxq[i].ibv_attr)
                        rte_free(flow->frxq[i].ibv_attr);
        }
@@ -1831,14 +1917,15 @@ error:
                flow->cs = NULL;
                parser->cs = NULL;
        }
-       return err;
+       rte_errno = ret; /* Restore rte_errno. */
+       return -rte_errno;
 }
 
 /**
  * Convert a flow.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param list
  *   Pointer to a TAILQ flow list.
  * @param[in] attr
@@ -1851,23 +1938,23 @@ error:
  *   Perform verbose error reporting if not NULL.
  *
  * @return
- *   A flow on success, NULL otherwise.
+ *   A flow on success, NULL otherwise and rte_errno is set.
  */
 static struct rte_flow *
-priv_flow_create(struct priv *priv,
-                struct mlx5_flows *list,
-                const struct rte_flow_attr *attr,
-                const struct rte_flow_item items[],
-                const struct rte_flow_action actions[],
-                struct rte_flow_error *error)
+mlx5_flow_list_create(struct rte_eth_dev *dev,
+                     struct mlx5_flows *list,
+                     const struct rte_flow_attr *attr,
+                     const struct rte_flow_item items[],
+                     const struct rte_flow_action actions[],
+                     struct rte_flow_error *error)
 {
        struct mlx5_flow_parse parser = { .create = 1, };
        struct rte_flow *flow = NULL;
        unsigned int i;
-       int err;
+       int ret;
 
-       err = priv_flow_convert(priv, attr, items, actions, error, &parser);
-       if (err)
+       ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
+       if (ret)
                goto exit;
        flow = rte_calloc(__func__, 1,
                          sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
@@ -1890,14 +1977,15 @@ priv_flow_create(struct priv *priv,
        memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
        /* finalise the flow. */
        if (parser.drop)
-               err = priv_flow_create_action_queue_drop(priv, &parser, flow,
+               ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
                                                         error);
        else
-               err = priv_flow_create_action_queue(priv, &parser, flow, error);
-       if (err)
+               ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
+       if (ret)
                goto exit;
        TAILQ_INSERT_TAIL(list, flow, next);
-       DEBUG("Flow created %p", (void *)flow);
+       DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
+               (void *)flow);
        return flow;
 exit:
        for (i = 0; i != hash_rxq_init_n; ++i) {
@@ -1921,14 +2009,9 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
                   const struct rte_flow_action actions[],
                   struct rte_flow_error *error)
 {
-       struct priv *priv = dev->data->dev_private;
-       int ret;
        struct mlx5_flow_parse parser = { .create = 0, };
 
-       priv_lock(priv);
-       ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
-       priv_unlock(priv);
-       return ret;
+       return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
 }
 
 /**
@@ -1945,30 +2028,26 @@ mlx5_flow_create(struct rte_eth_dev *dev,
                 struct rte_flow_error *error)
 {
        struct priv *priv = dev->data->dev_private;
-       struct rte_flow *flow;
 
-       priv_lock(priv);
-       flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
-                               error);
-       priv_unlock(priv);
-       return flow;
+       return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
+                                    error);
 }
 
 /**
- * Destroy a flow.
+ * Destroy a flow in a list.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param list
  *   Pointer to a TAILQ flow list.
  * @param[in] flow
  *   Flow to destroy.
  */
 static void
-priv_flow_destroy(struct priv *priv,
-                 struct mlx5_flows *list,
-                 struct rte_flow *flow)
+mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
+                      struct rte_flow *flow)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
 
        if (flow->drop || !flow->mark)
@@ -2015,7 +2094,7 @@ free:
                        if (frxq->ibv_flow)
                                claim_zero(ibv_destroy_flow(frxq->ibv_flow));
                        if (frxq->hrxq)
-                               mlx5_priv_hrxq_release(priv, frxq->hrxq);
+                               mlx5_hrxq_release(dev, frxq->hrxq);
                        if (frxq->ibv_attr)
                                rte_free(frxq->ibv_attr);
                }
@@ -2025,53 +2104,60 @@ free:
                flow->cs = NULL;
        }
        TAILQ_REMOVE(list, flow, next);
-       DEBUG("Flow destroyed %p", (void *)flow);
+       DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
+               (void *)flow);
        rte_free(flow);
 }
 
 /**
  * Destroy all flows.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param list
  *   Pointer to a TAILQ flow list.
  */
 void
-priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
+mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
 {
        while (!TAILQ_EMPTY(list)) {
                struct rte_flow *flow;
 
                flow = TAILQ_FIRST(list);
-               priv_flow_destroy(priv, list, flow);
+               mlx5_flow_list_destroy(dev, list, flow);
        }
 }
 
 /**
  * Create drop queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
  * @return
- *   0 on success.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_flow_create_drop_queue(struct priv *priv)
+mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_hrxq_drop *fdq = NULL;
 
        assert(priv->pd);
        assert(priv->ctx);
        fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
        if (!fdq) {
-               WARN("cannot allocate memory for drop queue");
-               goto error;
+               DRV_LOG(WARNING,
+                       "port %u cannot allocate memory for drop queue",
+                       dev->data->port_id);
+               rte_errno = ENOMEM;
+               return -rte_errno;
        }
        fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
        if (!fdq->cq) {
-               WARN("cannot allocate CQ for drop queue");
+               DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
+                       dev->data->port_id);
+               rte_errno = errno;
                goto error;
        }
        fdq->wq = ibv_create_wq(priv->ctx,
@@ -2083,7 +2169,9 @@ priv_flow_create_drop_queue(struct priv *priv)
                        .cq = fdq->cq,
                        });
        if (!fdq->wq) {
-               WARN("cannot allocate WQ for drop queue");
+               DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
+                       dev->data->port_id);
+               rte_errno = errno;
                goto error;
        }
        fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
@@ -2093,7 +2181,11 @@ priv_flow_create_drop_queue(struct priv *priv)
                        .comp_mask = 0,
                        });
        if (!fdq->ind_table) {
-               WARN("cannot allocate indirection table for drop queue");
+               DRV_LOG(WARNING,
+                       "port %u cannot allocate indirection table for drop"
+                       " queue",
+                       dev->data->port_id);
+               rte_errno = errno;
                goto error;
        }
        fdq->qp = ibv_create_qp_ex(priv->ctx,
@@ -2114,7 +2206,9 @@ priv_flow_create_drop_queue(struct priv *priv)
                        .pd = priv->pd
                });
        if (!fdq->qp) {
-               WARN("cannot allocate QP for drop queue");
+               DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
+                       dev->data->port_id);
+               rte_errno = errno;
                goto error;
        }
        priv->flow_drop_queue = fdq;
@@ -2131,18 +2225,19 @@ error:
        if (fdq)
                rte_free(fdq);
        priv->flow_drop_queue = NULL;
-       return -1;
+       return -rte_errno;
 }
 
 /**
  * Delete drop queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  */
 void
-priv_flow_delete_drop_queue(struct priv *priv)
+mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
 
        if (!fdq)
@@ -2162,14 +2257,15 @@ priv_flow_delete_drop_queue(struct priv *priv)
 /**
  * Remove all flows.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param list
  *   Pointer to a TAILQ flow list.
  */
 void
-priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
+mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
 {
+       struct priv *priv = dev->data->dev_private;
        struct rte_flow *flow;
 
        TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
@@ -2182,7 +2278,8 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
                        claim_zero(ibv_destroy_flow
                                   (flow->frxq[HASH_RXQ_ETH].ibv_flow));
                        flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
-                       DEBUG("Flow %p removed", (void *)flow);
+                       DRV_LOG(DEBUG, "port %u flow %p removed",
+                               dev->data->port_id, (void *)flow);
                        /* Next flow. */
                        continue;
                }
@@ -2211,27 +2308,29 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
                                continue;
                        claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
                        flow->frxq[i].ibv_flow = NULL;
-                       mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+                       mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
                        flow->frxq[i].hrxq = NULL;
                }
-               DEBUG("Flow %p removed", (void *)flow);
+               DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
+                       (void *)flow);
        }
 }
 
 /**
  * Add all flows.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param list
  *   Pointer to a TAILQ flow list.
  *
  * @return
- *   0 on success, a errno value otherwise and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_flow_start(struct priv *priv, struct mlx5_flows *list)
+mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
 {
+       struct priv *priv = dev->data->dev_private;
        struct rte_flow *flow;
 
        TAILQ_FOREACH(flow, list, next) {
@@ -2243,12 +2342,14 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
                                (priv->flow_drop_queue->qp,
                                 flow->frxq[HASH_RXQ_ETH].ibv_attr);
                        if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
-                               DEBUG("Flow %p cannot be applied",
-                                     (void *)flow);
+                               DRV_LOG(DEBUG,
+                                       "port %u flow %p cannot be applied",
+                                       dev->data->port_id, (void *)flow);
                                rte_errno = EINVAL;
-                               return rte_errno;
+                               return -rte_errno;
                        }
-                       DEBUG("Flow %p applied", (void *)flow);
+                       DRV_LOG(DEBUG, "port %u flow %p applied",
+                               dev->data->port_id, (void *)flow);
                        /* Next flow. */
                        continue;
                }
@@ -2256,36 +2357,39 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list)
                        if (!flow->frxq[i].ibv_attr)
                                continue;
                        flow->frxq[i].hrxq =
-                               mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
-                                                  flow->rss_conf.rss_key_len,
-                                                  hash_rxq_init[i].hash_fields,
-                                                  (*flow->queues),
-                                                  flow->queues_n);
+                               mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
+                                             flow->rss_conf.rss_key_len,
+                                             hash_rxq_init[i].hash_fields,
+                                             (*flow->queues),
+                                             flow->queues_n);
                        if (flow->frxq[i].hrxq)
                                goto flow_create;
                        flow->frxq[i].hrxq =
-                               mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
-                                                  flow->rss_conf.rss_key_len,
-                                                  hash_rxq_init[i].hash_fields,
-                                                  (*flow->queues),
-                                                  flow->queues_n);
+                               mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
+                                             flow->rss_conf.rss_key_len,
+                                             hash_rxq_init[i].hash_fields,
+                                             (*flow->queues),
+                                             flow->queues_n);
                        if (!flow->frxq[i].hrxq) {
-                               DEBUG("Flow %p cannot be applied",
-                                     (void *)flow);
+                               DRV_LOG(DEBUG,
+                                       "port %u flow %p cannot be applied",
+                                       dev->data->port_id, (void *)flow);
                                rte_errno = EINVAL;
-                               return rte_errno;
+                               return -rte_errno;
                        }
 flow_create:
                        flow->frxq[i].ibv_flow =
                                ibv_create_flow(flow->frxq[i].hrxq->qp,
                                                flow->frxq[i].ibv_attr);
                        if (!flow->frxq[i].ibv_flow) {
-                               DEBUG("Flow %p cannot be applied",
-                                     (void *)flow);
+                               DRV_LOG(DEBUG,
+                                       "port %u flow %p cannot be applied",
+                                       dev->data->port_id, (void *)flow);
                                rte_errno = EINVAL;
-                               return rte_errno;
+                               return -rte_errno;
                        }
-                       DEBUG("Flow %p applied", (void *)flow);
+                       DRV_LOG(DEBUG, "port %u flow %p applied",
+                               dev->data->port_id, (void *)flow);
                }
                if (!flow->mark)
                        continue;
@@ -2298,20 +2402,21 @@ flow_create:
 /**
  * Verify the flow list is empty
  *
- * @param priv
- *  Pointer to private structure.
+ * @param dev
+ *  Pointer to Ethernet device.
  *
  * @return the number of flows not released.
  */
 int
-priv_flow_verify(struct priv *priv)
+mlx5_flow_verify(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct rte_flow *flow;
        int ret = 0;
 
        TAILQ_FOREACH(flow, &priv->flows, next) {
-               DEBUG("%p: flow %p still referenced", (void *)priv,
-                     (void *)flow);
+               DRV_LOG(DEBUG, "port %u flow %p still referenced",
+                       dev->data->port_id, (void *)flow);
                ++ret;
        }
        return ret;
@@ -2332,7 +2437,7 @@ priv_flow_verify(struct priv *priv)
  *   A VLAN flow mask to apply.
  *
  * @return
- *   0 on success.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
@@ -2384,17 +2489,19 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
                } local;
        } action_rss;
 
-       if (!priv->reta_idx_n)
-               return EINVAL;
+       if (!priv->reta_idx_n) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
        for (i = 0; i != priv->reta_idx_n; ++i)
                action_rss.local.queue[i] = (*priv->reta_idx)[i];
        action_rss.local.rss_conf = &priv->rss_conf;
        action_rss.local.num = priv->reta_idx_n;
        actions[0].conf = (const void *)&action_rss.rss;
-       flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
-                               &error);
+       flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
+                                    actions, &error);
        if (!flow)
-               return rte_errno;
+               return -rte_errno;
        return 0;
 }
 
@@ -2409,7 +2516,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
  *   An Ethernet flow mask to apply.
  *
  * @return
- *   0 on success.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_ctrl_flow(struct rte_eth_dev *dev,
@@ -2428,14 +2535,11 @@ mlx5_ctrl_flow(struct rte_eth_dev *dev,
 int
 mlx5_flow_destroy(struct rte_eth_dev *dev,
                  struct rte_flow *flow,
-                 struct rte_flow_error *error)
+                 struct rte_flow_error *error __rte_unused)
 {
        struct priv *priv = dev->data->dev_private;
 
-       (void)error;
-       priv_lock(priv);
-       priv_flow_destroy(priv, &priv->flows, flow);
-       priv_unlock(priv);
+       mlx5_flow_list_destroy(dev, &priv->flows, flow);
        return 0;
 }
 
@@ -2447,14 +2551,11 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
  */
 int
 mlx5_flow_flush(struct rte_eth_dev *dev,
-               struct rte_flow_error *error)
+               struct rte_flow_error *error __rte_unused)
 {
        struct priv *priv = dev->data->dev_private;
 
-       (void)error;
-       priv_lock(priv);
-       priv_flow_flush(priv, &priv->flows);
-       priv_unlock(priv);
+       mlx5_flow_list_flush(dev, &priv->flows);
        return 0;
 }
 
@@ -2468,10 +2569,10 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
  *   returned data from the counter.
  *
  * @return
- *   0 on success, a errno value otherwise and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_flow_query_count(struct ibv_counter_set *cs,
+mlx5_flow_query_count(struct ibv_counter_set *cs,
                      struct mlx5_flow_counter_stats *counter_stats,
                      struct rte_flow_query_count *query_count,
                      struct rte_flow_error *error)
@@ -2485,15 +2586,13 @@ priv_flow_query_count(struct ibv_counter_set *cs,
                .out = counters,
                .outlen = 2 * sizeof(uint64_t),
        };
-       int res = ibv_query_counter_set(&query_cs_attr, &query_out);
+       int err = ibv_query_counter_set(&query_cs_attr, &query_out);
 
-       if (res) {
-               rte_flow_error_set(error, -res,
-                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                  NULL,
-                                  "cannot read counter");
-               return -res;
-       }
+       if (err)
+               return rte_flow_error_set(error, err,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL,
+                                         "cannot read counter");
        query_count->hits_set = 1;
        query_count->bytes_set = 1;
        query_count->hits = counters[0] - counter_stats->hits;
@@ -2512,29 +2611,28 @@ priv_flow_query_count(struct ibv_counter_set *cs,
  * @see rte_flow_ops
  */
 int
-mlx5_flow_query(struct rte_eth_dev *dev,
+mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
                struct rte_flow *flow,
                enum rte_flow_action_type action __rte_unused,
                void *data,
                struct rte_flow_error *error)
 {
-       struct priv *priv = dev->data->dev_private;
-       int res = EINVAL;
-
-       priv_lock(priv);
        if (flow->cs) {
-               res = priv_flow_query_count(flow->cs,
-                                       &flow->counter_stats,
-                                       (struct rte_flow_query_count *)data,
-                                       error);
+               int ret;
+
+               ret = mlx5_flow_query_count(flow->cs,
+                                           &flow->counter_stats,
+                                           (struct rte_flow_query_count *)data,
+                                           error);
+               if (ret)
+                       return ret;
        } else {
-               rte_flow_error_set(error, res,
-                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                  NULL,
-                                  "no counter found for flow");
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL,
+                                         "no counter found for flow");
        }
-       priv_unlock(priv);
-       return -res;
+       return 0;
 }
 #endif
 
@@ -2551,48 +2649,50 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 {
        struct priv *priv = dev->data->dev_private;
 
-       priv_lock(priv);
        if (dev->data->dev_started) {
                rte_flow_error_set(error, EBUSY,
                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
                                   NULL,
                                   "port must be stopped first");
-               priv_unlock(priv);
                return -rte_errno;
        }
        priv->isolated = !!enable;
        if (enable)
-               priv->dev->dev_ops = &mlx5_dev_ops_isolate;
+               dev->dev_ops = &mlx5_dev_ops_isolate;
        else
-               priv->dev->dev_ops = &mlx5_dev_ops;
-       priv_unlock(priv);
+               dev->dev_ops = &mlx5_dev_ops;
        return 0;
 }
 
 /**
  * Convert a flow director filter to a generic flow.
  *
- * @param priv
- *   Private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param fdir_filter
  *   Flow director filter to add.
  * @param attributes
  *   Generic flow parameters structure.
  *
  * @return
- *  0 on success, errno value on error.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_fdir_filter_convert(struct priv *priv,
+mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
                         const struct rte_eth_fdir_filter *fdir_filter,
                         struct mlx5_fdir *attributes)
 {
+       struct priv *priv = dev->data->dev_private;
        const struct rte_eth_fdir_input *input = &fdir_filter->input;
+       const struct rte_eth_fdir_masks *mask =
+               &dev->data->dev_conf.fdir_conf.mask;
 
        /* Validate queue number. */
        if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
-               ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
-               return EINVAL;
+               DRV_LOG(ERR, "port %u invalid queue number %d",
+                       dev->data->port_id, fdir_filter->action.rx_queue);
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
        attributes->attr.ingress = 1;
        attributes->items[0] = (struct rte_flow_item) {
@@ -2613,134 +2713,140 @@ priv_fdir_filter_convert(struct priv *priv,
                };
                break;
        default:
-               ERROR("invalid behavior %d", fdir_filter->action.behavior);
-               return ENOTSUP;
+               DRV_LOG(ERR, "port %u invalid behavior %d",
+                       dev->data->port_id,
+                       fdir_filter->action.behavior);
+               rte_errno = ENOTSUP;
+               return -rte_errno;
        }
        attributes->queue.index = fdir_filter->action.rx_queue;
+       /* Handle L3. */
        switch (fdir_filter->input.flow_type) {
        case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
+       case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
+       case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
                attributes->l3.ipv4.hdr = (struct ipv4_hdr){
-                       .src_addr = input->flow.udp4_flow.ip.src_ip,
-                       .dst_addr = input->flow.udp4_flow.ip.dst_ip,
-                       .time_to_live = input->flow.udp4_flow.ip.ttl,
-                       .type_of_service = input->flow.udp4_flow.ip.tos,
-                       .next_proto_id = input->flow.udp4_flow.ip.proto,
+                       .src_addr = input->flow.ip4_flow.src_ip,
+                       .dst_addr = input->flow.ip4_flow.dst_ip,
+                       .time_to_live = input->flow.ip4_flow.ttl,
+                       .type_of_service = input->flow.ip4_flow.tos,
+                       .next_proto_id = input->flow.ip4_flow.proto,
                };
-               attributes->l4.udp.hdr = (struct udp_hdr){
-                       .src_port = input->flow.udp4_flow.src_port,
-                       .dst_port = input->flow.udp4_flow.dst_port,
+               attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
+                       .src_addr = mask->ipv4_mask.src_ip,
+                       .dst_addr = mask->ipv4_mask.dst_ip,
+                       .time_to_live = mask->ipv4_mask.ttl,
+                       .type_of_service = mask->ipv4_mask.tos,
+                       .next_proto_id = mask->ipv4_mask.proto,
                };
                attributes->items[1] = (struct rte_flow_item){
                        .type = RTE_FLOW_ITEM_TYPE_IPV4,
                        .spec = &attributes->l3,
+                       .mask = &attributes->l3_mask,
+               };
+               break;
+       case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
+       case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
+       case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
+               attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+                       .hop_limits = input->flow.ipv6_flow.hop_limits,
+                       .proto = input->flow.ipv6_flow.proto,
+               };
+
+               memcpy(attributes->l3.ipv6.hdr.src_addr,
+                      input->flow.ipv6_flow.src_ip,
+                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+               memcpy(attributes->l3.ipv6.hdr.dst_addr,
+                      input->flow.ipv6_flow.dst_ip,
+                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+               memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
+                      mask->ipv6_mask.src_ip,
+                      RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
+               memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
+                      mask->ipv6_mask.dst_ip,
+                      RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
+               attributes->items[1] = (struct rte_flow_item){
+                       .type = RTE_FLOW_ITEM_TYPE_IPV6,
+                       .spec = &attributes->l3,
+                       .mask = &attributes->l3_mask,
+               };
+               break;
+       default:
+               DRV_LOG(ERR, "port %u invalid flow type%d",
+                       dev->data->port_id, fdir_filter->input.flow_type);
+               rte_errno = ENOTSUP;
+               return -rte_errno;
+       }
+       /* Handle L4. */
+       switch (fdir_filter->input.flow_type) {
+       case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
+               attributes->l4.udp.hdr = (struct udp_hdr){
+                       .src_port = input->flow.udp4_flow.src_port,
+                       .dst_port = input->flow.udp4_flow.dst_port,
+               };
+               attributes->l4_mask.udp.hdr = (struct udp_hdr){
+                       .src_port = mask->src_port_mask,
+                       .dst_port = mask->dst_port_mask,
                };
                attributes->items[2] = (struct rte_flow_item){
                        .type = RTE_FLOW_ITEM_TYPE_UDP,
                        .spec = &attributes->l4,
+                       .mask = &attributes->l4_mask,
                };
                break;
        case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-               attributes->l3.ipv4.hdr = (struct ipv4_hdr){
-                       .src_addr = input->flow.tcp4_flow.ip.src_ip,
-                       .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
-                       .time_to_live = input->flow.tcp4_flow.ip.ttl,
-                       .type_of_service = input->flow.tcp4_flow.ip.tos,
-                       .next_proto_id = input->flow.tcp4_flow.ip.proto,
-               };
                attributes->l4.tcp.hdr = (struct tcp_hdr){
                        .src_port = input->flow.tcp4_flow.src_port,
                        .dst_port = input->flow.tcp4_flow.dst_port,
                };
-               attributes->items[1] = (struct rte_flow_item){
-                       .type = RTE_FLOW_ITEM_TYPE_IPV4,
-                       .spec = &attributes->l3,
+               attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
+                       .src_port = mask->src_port_mask,
+                       .dst_port = mask->dst_port_mask,
                };
                attributes->items[2] = (struct rte_flow_item){
                        .type = RTE_FLOW_ITEM_TYPE_TCP,
                        .spec = &attributes->l4,
-               };
-               break;
-       case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
-               attributes->l3.ipv4.hdr = (struct ipv4_hdr){
-                       .src_addr = input->flow.ip4_flow.src_ip,
-                       .dst_addr = input->flow.ip4_flow.dst_ip,
-                       .time_to_live = input->flow.ip4_flow.ttl,
-                       .type_of_service = input->flow.ip4_flow.tos,
-                       .next_proto_id = input->flow.ip4_flow.proto,
-               };
-               attributes->items[1] = (struct rte_flow_item){
-                       .type = RTE_FLOW_ITEM_TYPE_IPV4,
-                       .spec = &attributes->l3,
+                       .mask = &attributes->l4_mask,
                };
                break;
        case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-               attributes->l3.ipv6.hdr = (struct ipv6_hdr){
-                       .hop_limits = input->flow.udp6_flow.ip.hop_limits,
-                       .proto = input->flow.udp6_flow.ip.proto,
-               };
-               memcpy(attributes->l3.ipv6.hdr.src_addr,
-                      input->flow.udp6_flow.ip.src_ip,
-                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
-               memcpy(attributes->l3.ipv6.hdr.dst_addr,
-                      input->flow.udp6_flow.ip.dst_ip,
-                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
                attributes->l4.udp.hdr = (struct udp_hdr){
                        .src_port = input->flow.udp6_flow.src_port,
                        .dst_port = input->flow.udp6_flow.dst_port,
                };
-               attributes->items[1] = (struct rte_flow_item){
-                       .type = RTE_FLOW_ITEM_TYPE_IPV6,
-                       .spec = &attributes->l3,
+               attributes->l4_mask.udp.hdr = (struct udp_hdr){
+                       .src_port = mask->src_port_mask,
+                       .dst_port = mask->dst_port_mask,
                };
                attributes->items[2] = (struct rte_flow_item){
                        .type = RTE_FLOW_ITEM_TYPE_UDP,
                        .spec = &attributes->l4,
+                       .mask = &attributes->l4_mask,
                };
                break;
        case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-               attributes->l3.ipv6.hdr = (struct ipv6_hdr){
-                       .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
-                       .proto = input->flow.tcp6_flow.ip.proto,
-               };
-               memcpy(attributes->l3.ipv6.hdr.src_addr,
-                      input->flow.tcp6_flow.ip.src_ip,
-                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
-               memcpy(attributes->l3.ipv6.hdr.dst_addr,
-                      input->flow.tcp6_flow.ip.dst_ip,
-                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
                attributes->l4.tcp.hdr = (struct tcp_hdr){
                        .src_port = input->flow.tcp6_flow.src_port,
                        .dst_port = input->flow.tcp6_flow.dst_port,
                };
-               attributes->items[1] = (struct rte_flow_item){
-                       .type = RTE_FLOW_ITEM_TYPE_IPV6,
-                       .spec = &attributes->l3,
+               attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
+                       .src_port = mask->src_port_mask,
+                       .dst_port = mask->dst_port_mask,
                };
                attributes->items[2] = (struct rte_flow_item){
                        .type = RTE_FLOW_ITEM_TYPE_TCP,
                        .spec = &attributes->l4,
+                       .mask = &attributes->l4_mask,
                };
                break;
+       case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
        case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
-               attributes->l3.ipv6.hdr = (struct ipv6_hdr){
-                       .hop_limits = input->flow.ipv6_flow.hop_limits,
-                       .proto = input->flow.ipv6_flow.proto,
-               };
-               memcpy(attributes->l3.ipv6.hdr.src_addr,
-                      input->flow.ipv6_flow.src_ip,
-                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
-               memcpy(attributes->l3.ipv6.hdr.dst_addr,
-                      input->flow.ipv6_flow.dst_ip,
-                      RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
-               attributes->items[1] = (struct rte_flow_item){
-                       .type = RTE_FLOW_ITEM_TYPE_IPV6,
-                       .spec = &attributes->l3,
-               };
                break;
        default:
-               ERROR("invalid flow type%d",
-                     fdir_filter->input.flow_type);
-               return ENOTSUP;
+               DRV_LOG(ERR, "port %u invalid flow type%d",
+                       dev->data->port_id, fdir_filter->input.flow_type);
+               rte_errno = ENOTSUP;
+               return -rte_errno;
        }
        return 0;
 }
@@ -2748,18 +2854,19 @@ priv_fdir_filter_convert(struct priv *priv,
 /**
  * Add new flow director filter and store it in list.
  *
- * @param priv
- *   Private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param fdir_filter
  *   Flow director filter to add.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_fdir_filter_add(struct priv *priv,
+mlx5_fdir_filter_add(struct rte_eth_dev *dev,
                     const struct rte_eth_fdir_filter *fdir_filter)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_fdir attributes = {
                .attr.group = 0,
                .l2_mask = {
@@ -2775,41 +2882,40 @@ priv_fdir_filter_add(struct priv *priv,
        struct rte_flow *flow;
        int ret;
 
-       ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+       ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
        if (ret)
-               return -ret;
-       ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+               return ret;
+       ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
                                attributes.actions, &error, &parser);
        if (ret)
-               return -ret;
-       flow = priv_flow_create(priv,
-                               &priv->flows,
-                               &attributes.attr,
-                               attributes.items,
-                               attributes.actions,
-                               &error);
+               return ret;
+       flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
+                                    attributes.items, attributes.actions,
+                                    &error);
        if (flow) {
-               DEBUG("FDIR created %p", (void *)flow);
+               DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
+                       (void *)flow);
                return 0;
        }
-       return ENOTSUP;
+       return -rte_errno;
 }
 
 /**
  * Delete specific filter.
  *
- * @param priv
- *   Private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param fdir_filter
  *   Filter to be deleted.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_fdir_filter_delete(struct priv *priv,
+mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
                        const struct rte_eth_fdir_filter *fdir_filter)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_fdir attributes = {
                .attr.group = 0,
        };
@@ -2822,10 +2928,10 @@ priv_fdir_filter_delete(struct priv *priv,
        unsigned int i;
        int ret;
 
-       ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+       ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
        if (ret)
-               return -ret;
-       ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+               return ret;
+       ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
                                attributes.actions, &error, &parser);
        if (ret)
                goto exit;
@@ -2853,11 +2959,14 @@ priv_fdir_filter_delete(struct priv *priv,
                struct ibv_spec_header *flow_h;
                void *flow_spec;
                unsigned int specs_n;
+               unsigned int queue_id = parser.drop ? HASH_RXQ_ETH :
+                                                     parser.layer;
 
-               attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
-               flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
+               attr = parser.queue[queue_id].ibv_attr;
+               flow_attr = flow->frxq[queue_id].ibv_attr;
                /* Compare first the attributes. */
-               if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
+               if (!flow_attr ||
+                   memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
                        continue;
                if (attr->num_of_specs == 0)
                        continue;
@@ -2882,67 +2991,70 @@ wrong_flow:
                /* The flow does not match. */
                continue;
        }
+       ret = rte_errno; /* Save rte_errno before cleanup. */
        if (flow)
-               priv_flow_destroy(priv, &priv->flows, flow);
+               mlx5_flow_list_destroy(dev, &priv->flows, flow);
 exit:
        for (i = 0; i != hash_rxq_init_n; ++i) {
                if (parser.queue[i].ibv_attr)
                        rte_free(parser.queue[i].ibv_attr);
        }
-       return -ret;
+       rte_errno = ret; /* Restore rte_errno. */
+       return -rte_errno;
 }
 
 /**
  * Update queue for specific filter.
  *
- * @param priv
- *   Private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param fdir_filter
  *   Filter to be updated.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_fdir_filter_update(struct priv *priv,
+mlx5_fdir_filter_update(struct rte_eth_dev *dev,
                        const struct rte_eth_fdir_filter *fdir_filter)
 {
        int ret;
 
-       ret = priv_fdir_filter_delete(priv, fdir_filter);
+       ret = mlx5_fdir_filter_delete(dev, fdir_filter);
        if (ret)
                return ret;
-       ret = priv_fdir_filter_add(priv, fdir_filter);
-       return ret;
+       return mlx5_fdir_filter_add(dev, fdir_filter);
 }
 
 /**
  * Flush all filters.
  *
- * @param priv
- *   Private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  */
 static void
-priv_fdir_filter_flush(struct priv *priv)
+mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
 {
-       priv_flow_flush(priv, &priv->flows);
+       struct priv *priv = dev->data->dev_private;
+
+       mlx5_flow_list_flush(dev, &priv->flows);
 }
 
 /**
  * Get flow director information.
  *
- * @param priv
- *   Private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[out] fdir_info
  *   Resulting flow director information.
  */
 static void
-priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
+mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
 {
        struct rte_eth_fdir_masks *mask =
-               &priv->dev->data->dev_conf.fdir_conf.mask;
+               &dev->data->dev_conf.fdir_conf.mask;
 
-       fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
+       fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
        fdir_info->guarant_spc = 0;
        rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
        fdir_info->max_flexpayload = 0;
@@ -2956,54 +3068,52 @@ priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
 /**
  * Deal with flow director operations.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param filter_op
  *   Operation to perform.
  * @param arg
  *   Pointer to operation-specific structure.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
+mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
+                   void *arg)
 {
        enum rte_fdir_mode fdir_mode =
-               priv->dev->data->dev_conf.fdir_conf.mode;
-       int ret = 0;
+               dev->data->dev_conf.fdir_conf.mode;
 
        if (filter_op == RTE_ETH_FILTER_NOP)
                return 0;
        if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
            fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
-               ERROR("%p: flow director mode %d not supported",
-                     (void *)priv, fdir_mode);
-               return EINVAL;
+               DRV_LOG(ERR, "port %u flow director mode %d not supported",
+                       dev->data->port_id, fdir_mode);
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
        switch (filter_op) {
        case RTE_ETH_FILTER_ADD:
-               ret = priv_fdir_filter_add(priv, arg);
-               break;
+               return mlx5_fdir_filter_add(dev, arg);
        case RTE_ETH_FILTER_UPDATE:
-               ret = priv_fdir_filter_update(priv, arg);
-               break;
+               return mlx5_fdir_filter_update(dev, arg);
        case RTE_ETH_FILTER_DELETE:
-               ret = priv_fdir_filter_delete(priv, arg);
-               break;
+               return mlx5_fdir_filter_delete(dev, arg);
        case RTE_ETH_FILTER_FLUSH:
-               priv_fdir_filter_flush(priv);
+               mlx5_fdir_filter_flush(dev);
                break;
        case RTE_ETH_FILTER_INFO:
-               priv_fdir_info_get(priv, arg);
+               mlx5_fdir_info_get(dev, arg);
                break;
        default:
-               DEBUG("%p: unknown operation %u", (void *)priv,
-                     filter_op);
-               ret = EINVAL;
-               break;
+               DRV_LOG(DEBUG, "port %u unknown operation %u",
+                       dev->data->port_id, filter_op);
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
-       return ret;
+       return 0;
 }
 
 /**
@@ -3019,7 +3129,7 @@ priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
  *   Pointer to operation-specific structure.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
@@ -3027,24 +3137,21 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
                     enum rte_filter_op filter_op,
                     void *arg)
 {
-       int ret = EINVAL;
-       struct priv *priv = dev->data->dev_private;
-
        switch (filter_type) {
        case RTE_ETH_FILTER_GENERIC:
-               if (filter_op != RTE_ETH_FILTER_GET)
-                       return -EINVAL;
+               if (filter_op != RTE_ETH_FILTER_GET) {
+                       rte_errno = EINVAL;
+                       return -rte_errno;
+               }
                *(const void **)arg = &mlx5_flow_ops;
                return 0;
        case RTE_ETH_FILTER_FDIR:
-               priv_lock(priv);
-               ret = priv_fdir_ctrl_func(priv, filter_op, arg);
-               priv_unlock(priv);
-               break;
+               return mlx5_fdir_ctrl_func(dev, filter_op, arg);
        default:
-               ERROR("%p: filter type (%d) not supported",
-                     (void *)dev, filter_type);
-               break;
+               DRV_LOG(ERR, "port %u filter type (%d) not supported",
+                       dev->data->port_id, filter_type);
+               rte_errno = ENOTSUP;
+               return -rte_errno;
        }
-       return -ret;
+       return 0;
 }
index 9fb5ba5..9de3514 100644 (file)
 /**
  * Get MAC address by querying netdevice.
  *
- * @param[in] priv
- *   struct priv for the requested device.
+ * @param[in] dev
+ *   Pointer to Ethernet device.
  * @param[out] mac
  *   MAC address output buffer.
  *
  * @return
- *   0 on success, -1 on failure and errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
+mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[ETHER_ADDR_LEN])
 {
        struct ifreq request;
+       int ret;
 
-       if (priv_ifreq(priv, SIOCGIFHWADDR, &request))
-               return -1;
+       ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request);
+       if (ret)
+               return ret;
        memcpy(mac, request.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
        return 0;
 }
@@ -95,8 +97,13 @@ mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 {
        assert(index < MLX5_MAX_MAC_ADDRESSES);
        memset(&dev->data->mac_addrs[index], 0, sizeof(struct ether_addr));
-       if (!dev->data->promiscuous && !dev->data->all_multicast)
-               mlx5_traffic_restart(dev);
+       if (!dev->data->promiscuous) {
+               int ret = mlx5_traffic_restart(dev);
+
+               if (ret)
+                       DRV_LOG(ERR, "port %u cannot remove mac address: %s",
+                               dev->data->port_id, strerror(rte_errno));
+       }
 }
 
 /**
@@ -112,16 +119,14 @@ mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
  *   VMDq pool index to associate address with (ignored).
  *
  * @return
- *   0 on success.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
-                 uint32_t index, uint32_t vmdq)
+                 uint32_t index, uint32_t vmdq __rte_unused)
 {
        unsigned int i;
-       int ret = 0;
 
-       (void)vmdq;
        assert(index < MLX5_MAX_MAC_ADDRESSES);
        /* First, make sure this address isn't already configured. */
        for (i = 0; (i != MLX5_MAX_MAC_ADDRESSES); ++i) {
@@ -131,12 +136,13 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
                if (memcmp(&dev->data->mac_addrs[i], mac, sizeof(*mac)))
                        continue;
                /* Address already configured elsewhere, return with error. */
-               return EADDRINUSE;
+               rte_errno = EADDRINUSE;
+               return -rte_errno;
        }
        dev->data->mac_addrs[index] = *mac;
-       if (!dev->data->promiscuous && !dev->data->all_multicast)
-               mlx5_traffic_restart(dev);
-       return ret;
+       if (!dev->data->promiscuous)
+               return mlx5_traffic_restart(dev);
+       return 0;
 }
 
 /**
@@ -150,6 +156,13 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
 void
 mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 {
-       DEBUG("%p: setting primary MAC address", (void *)dev);
-       mlx5_mac_addr_add(dev, mac_addr, 0, 0);
+       int ret;
+
+       DRV_LOG(DEBUG, "port %u setting primary MAC address",
+               dev->data->port_id);
+
+       ret = mlx5_mac_addr_add(dev, mac_addr, 0, 0);
+       if (ret)
+               DRV_LOG(ERR, "port %u cannot set mac address: %s",
+                       dev->data->port_id, strerror(rte_errno));
 }
index 2776dc7..a50c520 100644 (file)
@@ -55,15 +55,12 @@ struct mlx5_check_mempool_data {
 
 /* Called by mlx5_check_mempool() when iterating the memory chunks. */
 static void
-mlx5_check_mempool_cb(struct rte_mempool *mp,
+mlx5_check_mempool_cb(struct rte_mempool *mp __rte_unused,
                      void *opaque, struct rte_mempool_memhdr *memhdr,
-                     unsigned int mem_idx)
+                     unsigned int mem_idx __rte_unused)
 {
        struct mlx5_check_mempool_data *data = opaque;
 
-       (void)mp;
-       (void)mem_idx;
-
        /* It already failed, skip the next chunks. */
        if (data->ret != 0)
                return;
@@ -98,8 +95,9 @@ mlx5_check_mempool_cb(struct rte_mempool *mp,
  * @return
  *   0 on success (mempool is virtually contiguous), -1 on error.
  */
-static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
-       uintptr_t *end)
+static int
+mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
+                  uintptr_t *end)
 {
        struct mlx5_check_mempool_data data;
 
@@ -107,7 +105,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
        rte_mempool_mem_iter(mp, mlx5_check_mempool_cb, &data);
        *start = (uintptr_t)data.start;
        *end = (uintptr_t)data.end;
-
        return data.ret;
 }
 
@@ -115,10 +112,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
  * Register a Memory Region (MR) <-> Memory Pool (MP) association in
  * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
  *
- * This function should only be called by txq_mp2mr().
- *
- * @param priv
- *   Pointer to private structure.
  * @param txq
  *   Pointer to TX queue structure.
  * @param[in] mp
@@ -127,71 +120,63 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
  *   Index of the next available entry.
  *
  * @return
- *   mr on success, NULL on failure.
+ *   mr on success, NULL on failure and rte_errno is set.
  */
-struct mlx5_mr*
-priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *txq,
-                  struct rte_mempool *mp, unsigned int idx)
+struct mlx5_mr *
+mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
+                  unsigned int idx)
 {
        struct mlx5_txq_ctrl *txq_ctrl =
                container_of(txq, struct mlx5_txq_ctrl, txq);
+       struct rte_eth_dev *dev;
        struct mlx5_mr *mr;
 
+       rte_spinlock_lock(&txq_ctrl->priv->mr_lock);
        /* Add a new entry, register MR first. */
-       DEBUG("%p: discovered new memory pool \"%s\" (%p)",
-             (void *)txq_ctrl, mp->name, (void *)mp);
-       mr = priv_mr_get(priv, mp);
-       if (mr == NULL)
-               mr = priv_mr_new(priv, mp);
+       DRV_LOG(DEBUG, "port %u discovered new memory pool \"%s\" (%p)",
+               PORT_ID(txq_ctrl->priv), mp->name, (void *)mp);
+       dev = ETH_DEV(txq_ctrl->priv);
+       mr = mlx5_mr_get(dev, mp);
+       if (mr == NULL) {
+               if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+                       DRV_LOG(DEBUG,
+                               "port %u using unregistered mempool 0x%p(%s)"
+                               " in secondary process, please create mempool"
+                               " before rte_eth_dev_start()",
+                               PORT_ID(txq_ctrl->priv), (void *)mp, mp->name);
+                       rte_spinlock_unlock(&txq_ctrl->priv->mr_lock);
+                       rte_errno = ENOTSUP;
+                       return NULL;
+               }
+               mr = mlx5_mr_new(dev, mp);
+       }
        if (unlikely(mr == NULL)) {
-               DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
-                     (void *)txq_ctrl);
+               DRV_LOG(DEBUG,
+                       "port %u unable to configure memory region,"
+                       " ibv_reg_mr() failed.",
+                       PORT_ID(txq_ctrl->priv));
+               rte_spinlock_unlock(&txq_ctrl->priv->mr_lock);
                return NULL;
        }
        if (unlikely(idx == RTE_DIM(txq->mp2mr))) {
                /* Table is full, remove oldest entry. */
-               DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
-                     (void *)txq_ctrl);
+               DRV_LOG(DEBUG,
+                       "port %u memory region <-> memory pool table full, "
+                       " dropping oldest entry",
+                       PORT_ID(txq_ctrl->priv));
                --idx;
-               priv_mr_release(priv, txq->mp2mr[0]);
+               mlx5_mr_release(txq->mp2mr[0]);
                memmove(&txq->mp2mr[0], &txq->mp2mr[1],
                        (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
        }
        /* Store the new entry. */
        txq_ctrl->txq.mp2mr[idx] = mr;
-       DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
-             (void *)txq_ctrl, mp->name, (void *)mp,
-             txq_ctrl->txq.mp2mr[idx]->lkey);
-       return mr;
-}
-
-/**
- * Register a Memory Region (MR) <-> Memory Pool (MP) association in
- * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
- *
- * This function should only be called by txq_mp2mr().
- *
- * @param txq
- *   Pointer to TX queue structure.
- * @param[in] mp
- *   Memory Pool for which a Memory Region lkey must be returned.
- * @param idx
- *   Index of the next available entry.
- *
- * @return
- *   mr on success, NULL on failure.
- */
-struct mlx5_mr*
-mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
-                  unsigned int idx)
-{
-       struct mlx5_txq_ctrl *txq_ctrl =
-               container_of(txq, struct mlx5_txq_ctrl, txq);
-       struct mlx5_mr *mr;
-
-       priv_lock(txq_ctrl->priv);
-       mr = priv_txq_mp2mr_reg(txq_ctrl->priv, txq, mp, idx);
-       priv_unlock(txq_ctrl->priv);
+       DRV_LOG(DEBUG,
+               "port %u new memory region lkey for MP \"%s\" (%p): 0x%08"
+               PRIu32,
+               PORT_ID(txq_ctrl->priv), mp->name, (void *)mp,
+               txq_ctrl->txq.mp2mr[idx]->lkey);
+       rte_spinlock_unlock(&txq_ctrl->priv->mr_lock);
        return mr;
 }
 
@@ -250,28 +235,33 @@ mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
        if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
                        data.ret == -1)
                return;
-       mr = priv_mr_get(priv, mp);
+       mr = mlx5_mr_get(ETH_DEV(priv), mp);
        if (mr) {
-               priv_mr_release(priv, mr);
+               mlx5_mr_release(mr);
                return;
        }
-       priv_mr_new(priv, mp);
+       mr = mlx5_mr_new(ETH_DEV(priv), mp);
+       if (!mr)
+               DRV_LOG(ERR, "port %u cannot create memory region: %s",
+                       PORT_ID(priv), strerror(rte_errno));
 }
 
 /**
  * Register a new memory region from the mempool and store it in the memory
  * region list.
  *
- * @param  priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param mp
  *   Pointer to the memory pool to register.
+ *
  * @return
- *   The memory region on success.
+ *   The memory region on success, NULL on failure and rte_errno is set.
  */
-struct mlx5_mr*
-priv_mr_new(struct priv *priv, struct rte_mempool *mp)
+struct mlx5_mr *
+mlx5_mr_new(struct rte_eth_dev *dev, struct rte_mempool *mp)
 {
+       struct priv *priv = dev->data->dev_private;
        const struct rte_memseg *ms = rte_eal_get_physmem_layout();
        uintptr_t start;
        uintptr_t end;
@@ -280,17 +270,22 @@ priv_mr_new(struct priv *priv, struct rte_mempool *mp)
 
        mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
        if (!mr) {
-               DEBUG("unable to configure MR, ibv_reg_mr() failed.");
+               DRV_LOG(DEBUG,
+                       "port %u unable to configure memory region,"
+                       " ibv_reg_mr() failed.",
+                       dev->data->port_id);
+               rte_errno = ENOMEM;
                return NULL;
        }
        if (mlx5_check_mempool(mp, &start, &end) != 0) {
-               ERROR("mempool %p: not virtually contiguous",
-                     (void *)mp);
+               DRV_LOG(ERR, "port %u mempool %p: not virtually contiguous",
+                       dev->data->port_id, (void *)mp);
+               rte_errno = ENOMEM;
                return NULL;
        }
-       DEBUG("mempool %p area start=%p end=%p size=%zu",
-             (void *)mp, (void *)start, (void *)end,
-             (size_t)(end - start));
+       DRV_LOG(DEBUG, "port %u mempool %p area start=%p end=%p size=%zu",
+               dev->data->port_id, (void *)mp, (void *)start, (void *)end,
+               (size_t)(end - start));
        /* Save original addresses for exact MR lookup. */
        mr->start = start;
        mr->end = end;
@@ -305,16 +300,22 @@ priv_mr_new(struct priv *priv, struct rte_mempool *mp)
                if ((end > addr) && (end < addr + len))
                        end = RTE_ALIGN_CEIL(end, align);
        }
-       DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
-             (void *)mp, (void *)start, (void *)end,
-             (size_t)(end - start));
+       DRV_LOG(DEBUG,
+               "port %u mempool %p using start=%p end=%p size=%zu for memory"
+               " region",
+               dev->data->port_id, (void *)mp, (void *)start, (void *)end,
+               (size_t)(end - start));
        mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
                            IBV_ACCESS_LOCAL_WRITE);
+       if (!mr->mr) {
+               rte_errno = ENOMEM;
+               return NULL;
+       }
        mr->mp = mp;
        mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
        rte_atomic32_inc(&mr->refcnt);
-       DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv,
-             (void *)mr, rte_atomic32_read(&mr->refcnt));
+       DRV_LOG(DEBUG, "port %u new memory Region %p refcnt: %d",
+               dev->data->port_id, (void *)mr, rte_atomic32_read(&mr->refcnt));
        LIST_INSERT_HEAD(&priv->mr, mr, next);
        return mr;
 }
@@ -322,16 +323,18 @@ priv_mr_new(struct priv *priv, struct rte_mempool *mp)
 /**
  * Search the memory region object in the memory region list.
  *
- * @param  priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param mp
  *   Pointer to the memory pool to register.
+ *
  * @return
  *   The memory region on success.
  */
-struct mlx5_mr*
-priv_mr_get(struct priv *priv, struct rte_mempool *mp)
+struct mlx5_mr *
+mlx5_mr_get(struct rte_eth_dev *dev, struct rte_mempool *mp)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_mr *mr;
 
        assert(mp);
@@ -340,8 +343,9 @@ priv_mr_get(struct priv *priv, struct rte_mempool *mp)
        LIST_FOREACH(mr, &priv->mr, next) {
                if (mr->mp == mp) {
                        rte_atomic32_inc(&mr->refcnt);
-                       DEBUG("Memory Region %p refcnt: %d",
-                             (void *)mr, rte_atomic32_read(&mr->refcnt));
+                       DRV_LOG(DEBUG, "port %u memory region %p refcnt: %d",
+                               dev->data->port_id, (void *)mr,
+                               rte_atomic32_read(&mr->refcnt));
                        return mr;
                }
        }
@@ -355,41 +359,42 @@ priv_mr_get(struct priv *priv, struct rte_mempool *mp)
  *   Pointer to memory region to release.
  *
  * @return
- *   0 on success, errno on failure.
+ *   1 while a reference on it exists, 0 when freed.
  */
 int
-priv_mr_release(struct priv *priv, struct mlx5_mr *mr)
+mlx5_mr_release(struct mlx5_mr *mr)
 {
-       (void)priv;
        assert(mr);
-       DEBUG("Memory Region %p refcnt: %d",
-             (void *)mr, rte_atomic32_read(&mr->refcnt));
+       DRV_LOG(DEBUG, "memory region %p refcnt: %d", (void *)mr,
+               rte_atomic32_read(&mr->refcnt));
        if (rte_atomic32_dec_and_test(&mr->refcnt)) {
                claim_zero(ibv_dereg_mr(mr->mr));
                LIST_REMOVE(mr, next);
                rte_free(mr);
                return 0;
        }
-       return EBUSY;
+       return 1;
 }
 
 /**
  * Verify the flow list is empty
  *
- * @param priv
- *  Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
- * @return the number of object not released.
+ * @return
+ *   The number of object not released.
  */
 int
-priv_mr_verify(struct priv *priv)
+mlx5_mr_verify(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        int ret = 0;
        struct mlx5_mr *mr;
 
        LIST_FOREACH(mr, &priv->mr, next) {
-               DEBUG("%p: mr %p still referenced", (void *)priv,
-                     (void *)mr);
+               DRV_LOG(DEBUG, "port %u memory region %p still referenced",
+                       dev->data->port_id, (void *)mr);
                ++ret;
        }
        return ret;
index f47bda6..029e0ec 100644 (file)
  *   RSS configuration data.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_rss_hash_update(struct rte_eth_dev *dev,
                     struct rte_eth_rss_conf *rss_conf)
 {
        struct priv *priv = dev->data->dev_private;
-       int ret = 0;
+       unsigned int i;
+       unsigned int idx;
 
-       priv_lock(priv);
        if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
-               ret = -EINVAL;
-               goto out;
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
        if (rss_conf->rss_key && rss_conf->rss_key_len) {
+               if (rss_conf->rss_key_len != rss_hash_default_key_len) {
+                       DRV_LOG(ERR,
+                               "port %u RSS key len must be %zu Bytes long",
+                               dev->data->port_id, rss_hash_default_key_len);
+                       rte_errno = EINVAL;
+                       return -rte_errno;
+               }
                priv->rss_conf.rss_key = rte_realloc(priv->rss_conf.rss_key,
                                                     rss_conf->rss_key_len, 0);
                if (!priv->rss_conf.rss_key) {
-                       ret = -ENOMEM;
-                       goto out;
+                       rte_errno = ENOMEM;
+                       return -rte_errno;
                }
                memcpy(priv->rss_conf.rss_key, rss_conf->rss_key,
                       rss_conf->rss_key_len);
                priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
        }
        priv->rss_conf.rss_hf = rss_conf->rss_hf;
-out:
-       priv_unlock(priv);
-       return ret;
+       /* Enable the RSS hash in all Rx queues. */
+       for (i = 0, idx = 0; idx != priv->rxqs_n; ++i) {
+               if (!(*priv->rxqs)[i])
+                       continue;
+               (*priv->rxqs)[i]->rss_hash = !!rss_conf->rss_hf &&
+                       !!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS);
+               ++idx;
+       }
+       return 0;
 }
 
 /**
@@ -103,7 +116,7 @@ out:
  *   RSS configuration data.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
@@ -111,9 +124,10 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
 {
        struct priv *priv = dev->data->dev_private;
 
-       if (!rss_conf)
-               return -EINVAL;
-       priv_lock(priv);
+       if (!rss_conf) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
        if (rss_conf->rss_key &&
            (rss_conf->rss_key_len >= priv->rss_conf.rss_key_len)) {
                memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
@@ -121,24 +135,24 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
        }
        rss_conf->rss_key_len = priv->rss_conf.rss_key_len;
        rss_conf->rss_hf = priv->rss_conf.rss_hf;
-       priv_unlock(priv);
        return 0;
 }
 
 /**
  * Allocate/reallocate RETA index table.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @praram reta_size
  *   The size of the array to allocate.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size)
+mlx5_rss_reta_index_resize(struct rte_eth_dev *dev, unsigned int reta_size)
 {
+       struct priv *priv = dev->data->dev_private;
        void *mem;
        unsigned int old_size = priv->reta_idx_n;
 
@@ -147,11 +161,12 @@ priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size)
 
        mem = rte_realloc(priv->reta_idx,
                          reta_size * sizeof((*priv->reta_idx)[0]), 0);
-       if (!mem)
-               return ENOMEM;
+       if (!mem) {
+               rte_errno = ENOMEM;
+               return -rte_errno;
+       }
        priv->reta_idx = mem;
        priv->reta_idx_n = reta_size;
-
        if (old_size < reta_size)
                memset(&(*priv->reta_idx)[old_size], 0,
                       (reta_size - old_size) *
@@ -160,28 +175,31 @@ priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size)
 }
 
 /**
- * Query RETA table.
+ * DPDK callback to get the RETA indirection table.
  *
- * @param priv
- *   Pointer to private structure.
- * @param[in, out] reta_conf
- *   Pointer to the first RETA configuration structure.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param reta_conf
+ *   Pointer to RETA configuration structure array.
  * @param reta_size
- *   Number of entries.
+ *   Size of the RETA table.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-priv_dev_rss_reta_query(struct priv *priv,
+int
+mlx5_dev_rss_reta_query(struct rte_eth_dev *dev,
                        struct rte_eth_rss_reta_entry64 *reta_conf,
-                       unsigned int reta_size)
+                       uint16_t reta_size)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int idx;
        unsigned int i;
 
-       if (!reta_size || reta_size > priv->reta_idx_n)
-               return EINVAL;
+       if (!reta_size || reta_size > priv->reta_idx_n) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
        /* Fill each entry of the table even if its bit is not set. */
        for (idx = 0, i = 0; (i != reta_size); ++i) {
                idx = i / RTE_RETA_GROUP_SIZE;
@@ -192,34 +210,36 @@ priv_dev_rss_reta_query(struct priv *priv,
 }
 
 /**
- * Update RETA table.
+ * DPDK callback to update the RETA indirection table.
  *
- * @param priv
- *   Pointer to private structure.
- * @param[in] reta_conf
- *   Pointer to the first RETA configuration structure.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param reta_conf
+ *   Pointer to RETA configuration structure array.
  * @param reta_size
- *   Number of entries.
+ *   Size of the RETA table.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-priv_dev_rss_reta_update(struct priv *priv,
+int
+mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
                         struct rte_eth_rss_reta_entry64 *reta_conf,
-                        unsigned int reta_size)
+                        uint16_t reta_size)
 {
+       int ret;
+       struct priv *priv = dev->data->dev_private;
        unsigned int idx;
        unsigned int i;
        unsigned int pos;
-       int ret;
 
-       if (!reta_size)
-               return EINVAL;
-       ret = priv_rss_reta_index_resize(priv, reta_size);
+       if (!reta_size) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
+       ret = mlx5_rss_reta_index_resize(dev, reta_size);
        if (ret)
                return ret;
-
        for (idx = 0, i = 0; (i != reta_size); ++i) {
                idx = i / RTE_RETA_GROUP_SIZE;
                pos = i % RTE_RETA_GROUP_SIZE;
@@ -228,63 +248,9 @@ priv_dev_rss_reta_update(struct priv *priv,
                assert(reta_conf[idx].reta[pos] < priv->rxqs_n);
                (*priv->reta_idx)[i] = reta_conf[idx].reta[pos];
        }
-       return 0;
-}
-
-/**
- * DPDK callback to get the RETA indirection table.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param reta_conf
- *   Pointer to RETA configuration structure array.
- * @param reta_size
- *   Size of the RETA table.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_rss_reta_query(struct rte_eth_dev *dev,
-                       struct rte_eth_rss_reta_entry64 *reta_conf,
-                       uint16_t reta_size)
-{
-       int ret;
-       struct priv *priv = dev->data->dev_private;
-
-       priv_lock(priv);
-       ret = priv_dev_rss_reta_query(priv, reta_conf, reta_size);
-       priv_unlock(priv);
-       return -ret;
-}
-
-/**
- * DPDK callback to update the RETA indirection table.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param reta_conf
- *   Pointer to RETA configuration structure array.
- * @param reta_size
- *   Size of the RETA table.
- *
- * @return
- *   0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
-                        struct rte_eth_rss_reta_entry64 *reta_conf,
-                        uint16_t reta_size)
-{
-       int ret;
-       struct priv *priv = dev->data->dev_private;
-
-       priv_lock(priv);
-       ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
-       priv_unlock(priv);
        if (dev->data->dev_started) {
                mlx5_dev_stop(dev);
-               mlx5_dev_start(dev);
+               return mlx5_dev_start(dev);
        }
-       return -ret;
+       return 0;
 }
index 6fb245b..23eae7c 100644 (file)
 void
 mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 {
+       int ret;
+
        dev->data->promiscuous = 1;
-       mlx5_traffic_restart(dev);
+       ret = mlx5_traffic_restart(dev);
+       if (ret)
+               DRV_LOG(ERR, "port %u cannot enable promiscuous mode: %s",
+                       dev->data->port_id, strerror(rte_errno));
 }
 
 /**
@@ -73,8 +78,13 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 void
 mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 {
+       int ret;
+
        dev->data->promiscuous = 0;
-       mlx5_traffic_restart(dev);
+       ret = mlx5_traffic_restart(dev);
+       if (ret)
+               DRV_LOG(ERR, "port %u cannot disable promiscuous mode: %s",
+                       dev->data->port_id, strerror(rte_errno));
 }
 
 /**
@@ -86,8 +96,13 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 {
+       int ret;
+
        dev->data->all_multicast = 1;
-       mlx5_traffic_restart(dev);
+       ret = mlx5_traffic_restart(dev);
+       if (ret)
+               DRV_LOG(ERR, "port %u cannot enable allmulicast mode: %s",
+                       dev->data->port_id, strerror(rte_errno));
 }
 
 /**
@@ -99,6 +114,11 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 void
 mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 {
+       int ret;
+
        dev->data->all_multicast = 0;
-       mlx5_traffic_restart(dev);
+       ret = mlx5_traffic_restart(dev);
+       if (ret)
+               DRV_LOG(ERR, "port %u cannot disable allmulicast mode: %s",
+                       dev->data->port_id, strerror(rte_errno));
 }
index 20f3ec6..dcc5a87 100644 (file)
@@ -88,7 +88,7 @@ const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
  *   Pointer to RX queue structure.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
@@ -96,7 +96,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
        const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
        unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
        unsigned int i;
-       int ret = 0;
+       int err;
 
        /* Iterate on segments. */
        for (i = 0; (i != elts_n); ++i) {
@@ -104,8 +104,9 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 
                buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
                if (buf == NULL) {
-                       ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
-                       ret = ENOMEM;
+                       DRV_LOG(ERR, "port %u empty mbuf pool",
+                               PORT_ID(rxq_ctrl->priv));
+                       rte_errno = ENOMEM;
                        goto error;
                }
                /* Headroom is reserved by rte_pktmbuf_alloc(). */
@@ -124,7 +125,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
                (*rxq_ctrl->rxq.elts)[i] = buf;
        }
        /* If Rx vector is activated. */
-       if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
+       if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
                struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
                struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
                int j;
@@ -145,20 +146,24 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
                for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
                        (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
        }
-       DEBUG("%p: allocated and configured %u segments (max %u packets)",
-             (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
-       assert(ret == 0);
+       DRV_LOG(DEBUG,
+               "port %u Rx queue %u allocated and configured %u segments"
+               " (max %u packets)",
+               PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n,
+               elts_n / (1 << rxq_ctrl->rxq.sges_n));
        return 0;
 error:
+       err = rte_errno; /* Save rte_errno before cleanup. */
        elts_n = i;
        for (i = 0; (i != elts_n); ++i) {
                if ((*rxq_ctrl->rxq.elts)[i] != NULL)
                        rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
                (*rxq_ctrl->rxq.elts)[i] = NULL;
        }
-       DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
-       assert(ret > 0);
-       return ret;
+       DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
+               PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
+       rte_errno = err; /* Restore rte_errno. */
+       return -rte_errno;
 }
 
 /**
@@ -176,14 +181,15 @@ rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
        uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
        uint16_t i;
 
-       DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
+       DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
+               PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
        if (rxq->elts == NULL)
                return;
        /**
         * Some mbuf in the Ring belongs to the application.  They cannot be
         * freed.
         */
-       if (rxq_check_vec_support(rxq) > 0) {
+       if (mlx5_rxq_check_vec_support(rxq) > 0) {
                for (i = 0; i < used; ++i)
                        (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
                rxq->rq_pi = rxq->rq_ci;
@@ -206,9 +212,10 @@ rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
 void
 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
-       DEBUG("cleaning up %p", (void *)rxq_ctrl);
+       DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u",
+               PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx);
        if (rxq_ctrl->ibv)
-               mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
+               mlx5_rxq_ibv_release(rxq_ctrl->ibv);
        memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
 }
 
@@ -228,55 +235,52 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
  *   Memory pool for buffer allocations.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
-                   unsigned int socket, const struct rte_eth_rxconf *conf,
+                   unsigned int socket,
+                   const struct rte_eth_rxconf *conf __rte_unused,
                    struct rte_mempool *mp)
 {
        struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
        struct mlx5_rxq_ctrl *rxq_ctrl =
                container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-       int ret = 0;
 
-       (void)conf;
-       priv_lock(priv);
        if (!rte_is_power_of_2(desc)) {
                desc = 1 << log2above(desc);
-               WARN("%p: increased number of descriptors in RX queue %u"
-                    " to the next power of two (%d)",
-                    (void *)dev, idx, desc);
+               DRV_LOG(WARNING,
+                       "port %u increased number of descriptors in Rx queue %u"
+                       " to the next power of two (%d)",
+                       dev->data->port_id, idx, desc);
        }
-       DEBUG("%p: configuring queue %u for %u descriptors",
-             (void *)dev, idx, desc);
+       DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors",
+               dev->data->port_id, idx, desc);
        if (idx >= priv->rxqs_n) {
-               ERROR("%p: queue index out of range (%u >= %u)",
-                     (void *)dev, idx, priv->rxqs_n);
-               priv_unlock(priv);
-               return -EOVERFLOW;
+               DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)",
+                       dev->data->port_id, idx, priv->rxqs_n);
+               rte_errno = EOVERFLOW;
+               return -rte_errno;
        }
-       if (!mlx5_priv_rxq_releasable(priv, idx)) {
-               ret = EBUSY;
-               ERROR("%p: unable to release queue index %u",
-                     (void *)dev, idx);
-               goto out;
+       if (!mlx5_rxq_releasable(dev, idx)) {
+               DRV_LOG(ERR, "port %u unable to release queue index %u",
+                       dev->data->port_id, idx);
+               rte_errno = EBUSY;
+               return -rte_errno;
        }
-       mlx5_priv_rxq_release(priv, idx);
-       rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
+       mlx5_rxq_release(dev, idx);
+       rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, mp);
        if (!rxq_ctrl) {
-               ERROR("%p: unable to allocate queue index %u",
-                     (void *)dev, idx);
-               ret = ENOMEM;
-               goto out;
+               DRV_LOG(ERR, "port %u unable to allocate queue index %u",
+                       dev->data->port_id, idx);
+               rte_errno = ENOMEM;
+               return -rte_errno;
        }
-       DEBUG("%p: adding RX queue %p to list",
-             (void *)dev, (void *)rxq_ctrl);
+       DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
+               dev->data->port_id, idx);
        (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
-out:
-       priv_unlock(priv);
-       return -ret;
+       return 0;
 }
 
 /**
@@ -296,45 +300,48 @@ mlx5_rx_queue_release(void *dpdk_rxq)
                return;
        rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
        priv = rxq_ctrl->priv;
-       priv_lock(priv);
-       if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
-               rte_panic("Rx queue %p is still used by a flow and cannot be"
-                         " removed\n", (void *)rxq_ctrl);
-       mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
-       priv_unlock(priv);
+       if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx))
+               rte_panic("port %u Rx queue %u is still used by a flow and"
+                         " cannot be removed\n",
+                         PORT_ID(priv), rxq_ctrl->idx);
+       mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx);
 }
 
 /**
  * Allocate queue vector and fill epoll fd list for Rx interrupts.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
  * @return
- *   0 on success, negative on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_rx_intr_vec_enable(struct priv *priv)
+mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
        unsigned int rxqs_n = priv->rxqs_n;
        unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
        unsigned int count = 0;
-       struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+       struct rte_intr_handle *intr_handle = dev->intr_handle;
 
-       if (!priv->dev->data->dev_conf.intr_conf.rxq)
+       if (!dev->data->dev_conf.intr_conf.rxq)
                return 0;
-       priv_rx_intr_vec_disable(priv);
+       mlx5_rx_intr_vec_disable(dev);
        intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0]));
        if (intr_handle->intr_vec == NULL) {
-               ERROR("failed to allocate memory for interrupt vector,"
-                     " Rx interrupts will not be supported");
-               return -ENOMEM;
+               DRV_LOG(ERR,
+                       "port %u failed to allocate memory for interrupt"
+                       " vector, Rx interrupts will not be supported",
+                       dev->data->port_id);
+               rte_errno = ENOMEM;
+               return -rte_errno;
        }
        intr_handle->type = RTE_INTR_HANDLE_EXT;
        for (i = 0; i != n; ++i) {
                /* This rxq ibv must not be released in this function. */
-               struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
+               struct mlx5_rxq_ibv *rxq_ibv = mlx5_rxq_ibv_get(dev, i);
                int fd;
                int flags;
                int rc;
@@ -348,27 +355,34 @@ priv_rx_intr_vec_enable(struct priv *priv)
                        continue;
                }
                if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
-                       ERROR("too many Rx queues for interrupt vector size"
-                             " (%d), Rx interrupts cannot be enabled",
-                             RTE_MAX_RXTX_INTR_VEC_ID);
-                       priv_rx_intr_vec_disable(priv);
-                       return -1;
+                       DRV_LOG(ERR,
+                               "port %u too many Rx queues for interrupt"
+                               " vector size (%d), Rx interrupts cannot be"
+                               " enabled",
+                               dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID);
+                       mlx5_rx_intr_vec_disable(dev);
+                       rte_errno = ENOMEM;
+                       return -rte_errno;
                }
                fd = rxq_ibv->channel->fd;
                flags = fcntl(fd, F_GETFL);
                rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
                if (rc < 0) {
-                       ERROR("failed to make Rx interrupt file descriptor"
-                             " %d non-blocking for queue index %d", fd, i);
-                       priv_rx_intr_vec_disable(priv);
-                       return -1;
+                       rte_errno = errno;
+                       DRV_LOG(ERR,
+                               "port %u failed to make Rx interrupt file"
+                               " descriptor %d non-blocking for queue index"
+                               " %d",
+                               dev->data->port_id, fd, i);
+                       mlx5_rx_intr_vec_disable(dev);
+                       return -rte_errno;
                }
                intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
                intr_handle->efds[count] = fd;
                count++;
        }
        if (!count)
-               priv_rx_intr_vec_disable(priv);
+               mlx5_rx_intr_vec_disable(dev);
        else
                intr_handle->nb_efd = count;
        return 0;
@@ -377,18 +391,19 @@ priv_rx_intr_vec_enable(struct priv *priv)
 /**
  * Clean up Rx interrupts handler.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  */
 void
-priv_rx_intr_vec_disable(struct priv *priv)
+mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev)
 {
-       struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+       struct priv *priv = dev->data->dev_private;
+       struct rte_intr_handle *intr_handle = dev->intr_handle;
        unsigned int i;
        unsigned int rxqs_n = priv->rxqs_n;
        unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
 
-       if (!priv->dev->data->dev_conf.intr_conf.rxq)
+       if (!dev->data->dev_conf.intr_conf.rxq)
                return;
        if (!intr_handle->intr_vec)
                goto free;
@@ -405,7 +420,7 @@ priv_rx_intr_vec_disable(struct priv *priv)
                 */
                rxq_data = (*priv->rxqs)[i];
                rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
-               mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
+               mlx5_rxq_ibv_release(rxq_ctrl->ibv);
        }
 free:
        rte_intr_free_epoll_fd(intr_handle);
@@ -449,39 +464,33 @@ mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
  *   Rx queue number.
  *
  * @return
- *   0 on success, negative on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-       struct priv *priv = mlx5_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_data *rxq_data;
        struct mlx5_rxq_ctrl *rxq_ctrl;
-       int ret = 0;
 
-       priv_lock(priv);
        rxq_data = (*priv->rxqs)[rx_queue_id];
        if (!rxq_data) {
-               ret = EINVAL;
-               goto exit;
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
        rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
        if (rxq_ctrl->irq) {
                struct mlx5_rxq_ibv *rxq_ibv;
 
-               rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+               rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id);
                if (!rxq_ibv) {
-                       ret = EINVAL;
-                       goto exit;
+                       rte_errno = EINVAL;
+                       return -rte_errno;
                }
                mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
-               mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
+               mlx5_rxq_ibv_release(rxq_ibv);
        }
-exit:
-       priv_unlock(priv);
-       if (ret)
-               WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
-       return -ret;
+       return 0;
 }
 
 /**
@@ -493,64 +502,65 @@ exit:
  *   Rx queue number.
  *
  * @return
- *   0 on success, negative on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-       struct priv *priv = mlx5_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_data *rxq_data;
        struct mlx5_rxq_ctrl *rxq_ctrl;
        struct mlx5_rxq_ibv *rxq_ibv = NULL;
        struct ibv_cq *ev_cq;
        void *ev_ctx;
-       int ret = 0;
+       int ret;
 
-       priv_lock(priv);
        rxq_data = (*priv->rxqs)[rx_queue_id];
        if (!rxq_data) {
-               ret = EINVAL;
-               goto exit;
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
        rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
        if (!rxq_ctrl->irq)
-               goto exit;
-       rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+               return 0;
+       rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id);
        if (!rxq_ibv) {
-               ret = EINVAL;
-               goto exit;
+               rte_errno = EINVAL;
+               return -rte_errno;
        }
        ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
        if (ret || ev_cq != rxq_ibv->cq) {
-               ret = EINVAL;
+               rte_errno = EINVAL;
                goto exit;
        }
        rxq_data->cq_arm_sn++;
        ibv_ack_cq_events(rxq_ibv->cq, 1);
+       return 0;
 exit:
+       ret = rte_errno; /* Save rte_errno before cleanup. */
        if (rxq_ibv)
-               mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
-       priv_unlock(priv);
-       if (ret)
-               WARN("unable to disable interrupt on rx queue %d",
-                    rx_queue_id);
-       return -ret;
+               mlx5_rxq_ibv_release(rxq_ibv);
+       DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d",
+               dev->data->port_id, rx_queue_id);
+       rte_errno = ret; /* Restore rte_errno. */
+       return -rte_errno;
 }
 
 /**
  * Create the Rx queue Verbs object.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   Queue index in DPDK Rx queue array
  *
  * @return
- *   The Verbs object initialised if it can be created.
+ *   The Verbs object initialised, NULL otherwise and rte_errno is set.
  */
-struct mlx5_rxq_ibv*
-mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
+struct mlx5_rxq_ibv *
+mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
        struct mlx5_rxq_ctrl *rxq_ctrl =
                container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
@@ -573,28 +583,34 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 
        assert(rxq_data);
        assert(!rxq_ctrl->ibv);
+       priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
+       priv->verbs_alloc_ctx.obj = rxq_ctrl;
        tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
                                 rxq_ctrl->socket);
        if (!tmpl) {
-               ERROR("%p: cannot allocate verbs resources",
-                      (void *)rxq_ctrl);
+               DRV_LOG(ERR,
+                       "port %u Rx queue %u cannot allocate verbs resources",
+                       dev->data->port_id, rxq_ctrl->idx);
+               rte_errno = ENOMEM;
                goto error;
        }
        tmpl->rxq_ctrl = rxq_ctrl;
        /* Use the entire RX mempool as the memory region. */
-       tmpl->mr = priv_mr_get(priv, rxq_data->mp);
+       tmpl->mr = mlx5_mr_get(dev, rxq_data->mp);
        if (!tmpl->mr) {
-               tmpl->mr = priv_mr_new(priv, rxq_data->mp);
+               tmpl->mr = mlx5_mr_new(dev, rxq_data->mp);
                if (!tmpl->mr) {
-                       ERROR("%p: MR creation failure", (void *)rxq_ctrl);
+                       DRV_LOG(ERR, "port %u: memeroy region creation failure",
+                               dev->data->port_id);
                        goto error;
                }
        }
        if (rxq_ctrl->irq) {
                tmpl->channel = ibv_create_comp_channel(priv->ctx);
                if (!tmpl->channel) {
-                       ERROR("%p: Comp Channel creation failure",
-                             (void *)rxq_ctrl);
+                       DRV_LOG(ERR, "port %u: comp channel creation failure",
+                               dev->data->port_id);
+                       rte_errno = ENOMEM;
                        goto error;
                }
        }
@@ -614,21 +630,26 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
                 * For vectorized Rx, it must not be doubled in order to
                 * make cq_ci and rq_ci aligned.
                 */
-               if (rxq_check_vec_support(rxq_data) < 0)
+               if (mlx5_rxq_check_vec_support(rxq_data) < 0)
                        attr.cq.ibv.cqe *= 2;
        } else if (priv->cqe_comp && rxq_data->hw_timestamp) {
-               DEBUG("Rx CQE compression is disabled for HW timestamp");
+               DRV_LOG(DEBUG,
+                       "port %u Rx CQE compression is disabled for HW"
+                       " timestamp",
+                       dev->data->port_id);
        }
        tmpl->cq = ibv_cq_ex_to_cq(mlx5dv_create_cq(priv->ctx, &attr.cq.ibv,
                                                    &attr.cq.mlx5));
        if (tmpl->cq == NULL) {
-               ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
+               DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure",
+                       dev->data->port_id, idx);
+               rte_errno = ENOMEM;
                goto error;
        }
-       DEBUG("priv->device_attr.max_qp_wr is %d",
-             priv->device_attr.orig_attr.max_qp_wr);
-       DEBUG("priv->device_attr.max_sge is %d",
-             priv->device_attr.orig_attr.max_sge);
+       DRV_LOG(DEBUG, "port %u priv->device_attr.max_qp_wr is %d",
+               dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr);
+       DRV_LOG(DEBUG, "port %u priv->device_attr.max_sge is %d",
+               dev->data->port_id, priv->device_attr.orig_attr.max_sge);
        attr.wq = (struct ibv_wq_init_attr){
                .wq_context = NULL, /* Could be useful in the future. */
                .wq_type = IBV_WQT_RQ,
@@ -658,7 +679,9 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
 #endif
        tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
        if (tmpl->wq == NULL) {
-               ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
+               DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure",
+                       dev->data->port_id, idx);
+               rte_errno = ENOMEM;
                goto error;
        }
        /*
@@ -668,11 +691,14 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
        if (((int)attr.wq.max_wr !=
             ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
            ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
-               ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
-                     (void *)rxq_ctrl,
-                     ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
-                     (1 << rxq_data->sges_n),
-                     attr.wq.max_wr, attr.wq.max_sge);
+               DRV_LOG(ERR,
+                       "port %u Rx queue %u requested %u*%u but got %u*%u"
+                       " WRs*SGEs",
+                       dev->data->port_id, idx,
+                       ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
+                       (1 << rxq_data->sges_n),
+                       attr.wq.max_wr, attr.wq.max_sge);
+               rte_errno = EINVAL;
                goto error;
        }
        /* Change queue state to ready. */
@@ -682,8 +708,10 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
        };
        ret = ibv_modify_wq(tmpl->wq, &mod);
        if (ret) {
-               ERROR("%p: WQ state to IBV_WQS_RDY failed",
-                     (void *)rxq_ctrl);
+               DRV_LOG(ERR,
+                       "port %u Rx queue %u WQ state to IBV_WQS_RDY failed",
+                       dev->data->port_id, idx);
+               rte_errno = ret;
                goto error;
        }
        obj.cq.in = tmpl->cq;
@@ -691,11 +719,16 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
        obj.rwq.in = tmpl->wq;
        obj.rwq.out = &rwq;
        ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
-       if (ret != 0)
+       if (ret) {
+               rte_errno = ret;
                goto error;
+       }
        if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-               ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-                     "it should be set to %u", RTE_CACHE_LINE_SIZE);
+               DRV_LOG(ERR,
+                       "port %u wrong MLX5_CQE_SIZE environment variable"
+                       " value: it should be set to %u",
+                       dev->data->port_id, RTE_CACHE_LINE_SIZE);
+               rte_errno = EINVAL;
                goto error;
        }
        /* Fill the rings. */
@@ -731,13 +764,16 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
        rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
        rte_wmb();
        *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
-       DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
+       DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id,
+               idx, (void *)&tmpl);
        rte_atomic32_inc(&tmpl->refcnt);
-       DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
-             (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+       DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d",
+               dev->data->port_id, idx, rte_atomic32_read(&tmpl->refcnt));
        LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
+       priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
        return tmpl;
 error:
+       ret = rte_errno; /* Save rte_errno before cleanup. */
        if (tmpl->wq)
                claim_zero(ibv_destroy_wq(tmpl->wq));
        if (tmpl->cq)
@@ -745,24 +781,27 @@ error:
        if (tmpl->channel)
                claim_zero(ibv_destroy_comp_channel(tmpl->channel));
        if (tmpl->mr)
-               priv_mr_release(priv, tmpl->mr);
+               mlx5_mr_release(tmpl->mr);
+       priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
+       rte_errno = ret; /* Restore rte_errno. */
        return NULL;
 }
 
 /**
  * Get an Rx queue Verbs object.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   Queue index in DPDK Rx queue array
  *
  * @return
  *   The Verbs object if it exists.
  */
-struct mlx5_rxq_ibv*
-mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
+struct mlx5_rxq_ibv *
+mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
        struct mlx5_rxq_ctrl *rxq_ctrl;
 
@@ -772,11 +811,11 @@ mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
                return NULL;
        rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
        if (rxq_ctrl->ibv) {
-               priv_mr_get(priv, rxq_data->mp);
+               mlx5_mr_get(dev, rxq_data->mp);
                rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
-               DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
-                     (void *)rxq_ctrl->ibv,
-                     rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
+               DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d",
+                       dev->data->port_id, rxq_ctrl->idx,
+                       rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
        }
        return rxq_ctrl->ibv;
 }
@@ -784,16 +823,14 @@ mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
 /**
  * Release an Rx verbs queue object.
  *
- * @param priv
- *   Pointer to private structure.
  * @param rxq_ibv
  *   Verbs Rx queue object.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   1 while a reference on it exists, 0 when freed.
  */
 int
-mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv)
 {
        int ret;
 
@@ -801,11 +838,12 @@ mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
        assert(rxq_ibv->wq);
        assert(rxq_ibv->cq);
        assert(rxq_ibv->mr);
-       ret = priv_mr_release(priv, rxq_ibv->mr);
+       ret = mlx5_mr_release(rxq_ibv->mr);
        if (!ret)
                rxq_ibv->mr = NULL;
-       DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
-             (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
+       DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d",
+               PORT_ID(rxq_ibv->rxq_ctrl->priv),
+               rxq_ibv->rxq_ctrl->idx, rte_atomic32_read(&rxq_ibv->refcnt));
        if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
                rxq_free_elts(rxq_ibv->rxq_ctrl);
                claim_zero(ibv_destroy_wq(rxq_ibv->wq));
@@ -816,26 +854,28 @@ mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
                rte_free(rxq_ibv);
                return 0;
        }
-       return EBUSY;
+       return 1;
 }
 
 /**
  * Verify the Verbs Rx queue list is empty
  *
- * @param priv
- *  Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
- * @return the number of object not released.
+ * @return
+ *   The number of object not released.
  */
 int
-mlx5_priv_rxq_ibv_verify(struct priv *priv)
+mlx5_rxq_ibv_verify(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        int ret = 0;
        struct mlx5_rxq_ibv *rxq_ibv;
 
        LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
-               DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
-                     (void *)rxq_ibv);
+               DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced",
+                       dev->data->port_id, rxq_ibv->rxq_ctrl->idx);
                ++ret;
        }
        return ret;
@@ -844,15 +884,12 @@ mlx5_priv_rxq_ibv_verify(struct priv *priv)
 /**
  * Return true if a single reference exists on the object.
  *
- * @param priv
- *   Pointer to private structure.
  * @param rxq_ibv
  *   Verbs Rx queue object.
  */
 int
-mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv)
 {
-       (void)priv;
        assert(rxq_ibv);
        return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
 }
@@ -860,8 +897,8 @@ mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
 /**
  * Create a DPDK Rx queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   TX queue index.
  * @param desc
@@ -870,13 +907,13 @@ mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
  *   NUMA socket on which memory must be allocated.
  *
  * @return
- *   A DPDK queue object on success.
+ *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
  */
-struct mlx5_rxq_ctrl*
-mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
-                 unsigned int socket, struct rte_mempool *mp)
+struct mlx5_rxq_ctrl *
+mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+            unsigned int socket, struct rte_mempool *mp)
 {
-       struct rte_eth_dev *dev = priv->dev;
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_ctrl *tmpl;
        const uint16_t desc_n =
                desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
@@ -886,10 +923,12 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
                                 sizeof(*tmpl) +
                                 desc_n * sizeof(struct rte_mbuf *),
                                 0, socket);
-       if (!tmpl)
+       if (!tmpl) {
+               rte_errno = ENOMEM;
                return NULL;
+       }
        tmpl->socket = socket;
-       if (priv->dev->data->dev_conf.intr_conf.rxq)
+       if (dev->data->dev_conf.intr_conf.rxq)
                tmpl->irq = 1;
        /* Enable scattered packets support for this queue if necessary. */
        assert(mb_len >= RTE_PKTMBUF_HEADROOM);
@@ -912,29 +951,34 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
                size = mb_len * (1 << tmpl->rxq.sges_n);
                size -= RTE_PKTMBUF_HEADROOM;
                if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
-                       ERROR("%p: too many SGEs (%u) needed to handle"
-                             " requested maximum packet size %u",
-                             (void *)dev,
-                             1 << sges_n,
-                             dev->data->dev_conf.rxmode.max_rx_pkt_len);
+                       DRV_LOG(ERR,
+                               "port %u too many SGEs (%u) needed to handle"
+                               " requested maximum packet size %u",
+                               dev->data->port_id,
+                               1 << sges_n,
+                               dev->data->dev_conf.rxmode.max_rx_pkt_len);
+                       rte_errno = EOVERFLOW;
                        goto error;
                }
        } else {
-               WARN("%p: the requested maximum Rx packet size (%u) is"
-                    " larger than a single mbuf (%u) and scattered"
-                    " mode has not been requested",
-                    (void *)dev,
-                    dev->data->dev_conf.rxmode.max_rx_pkt_len,
-                    mb_len - RTE_PKTMBUF_HEADROOM);
+               DRV_LOG(WARNING,
+                       "port %u the requested maximum Rx packet size (%u) is"
+                       " larger than a single mbuf (%u) and scattered mode has"
+                       " not been requested",
+                       dev->data->port_id,
+                       dev->data->dev_conf.rxmode.max_rx_pkt_len,
+                       mb_len - RTE_PKTMBUF_HEADROOM);
        }
-       DEBUG("%p: maximum number of segments per packet: %u",
-             (void *)dev, 1 << tmpl->rxq.sges_n);
+       DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u",
+               dev->data->port_id, 1 << tmpl->rxq.sges_n);
        if (desc % (1 << tmpl->rxq.sges_n)) {
-               ERROR("%p: number of RX queue descriptors (%u) is not a"
-                     " multiple of SGEs per packet (%u)",
-                     (void *)dev,
-                     desc,
-                     1 << tmpl->rxq.sges_n);
+               DRV_LOG(ERR,
+                       "port %u number of Rx queue descriptors (%u) is not a"
+                       " multiple of SGEs per packet (%u)",
+                       dev->data->port_id,
+                       desc,
+                       1 << tmpl->rxq.sges_n);
+               rte_errno = EINVAL;
                goto error;
        }
        /* Toggle RX checksum offload if hardware supports it. */
@@ -954,19 +998,22 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
        } else if (priv->hw_fcs_strip) {
                tmpl->rxq.crc_present = 1;
        } else {
-               WARN("%p: CRC stripping has been disabled but will still"
-                    " be performed by hardware, make sure MLNX_OFED and"
-                    " firmware are up to date",
-                    (void *)dev);
+               DRV_LOG(WARNING,
+                       "port %u CRC stripping has been disabled but will"
+                       " still be performed by hardware, make sure MLNX_OFED"
+                       " and firmware are up to date",
+                       dev->data->port_id);
                tmpl->rxq.crc_present = 0;
        }
-       DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
-             " incoming frames to hide it",
-             (void *)dev,
-             tmpl->rxq.crc_present ? "disabled" : "enabled",
-             tmpl->rxq.crc_present << 2);
+       DRV_LOG(DEBUG,
+               "port %u CRC stripping is %s, %u bytes will be subtracted from"
+               " incoming frames to hide it",
+               dev->data->port_id,
+               tmpl->rxq.crc_present ? "disabled" : "enabled",
+               tmpl->rxq.crc_present << 2);
        /* Save port ID. */
-       tmpl->rxq.rss_hash = priv->rxqs_n > 1;
+       tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf &&
+               (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS));
        tmpl->rxq.port_id = dev->data->port_id;
        tmpl->priv = priv;
        tmpl->rxq.mp = mp;
@@ -974,9 +1021,10 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
        tmpl->rxq.elts_n = log2above(desc);
        tmpl->rxq.elts =
                (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+       tmpl->idx = idx;
        rte_atomic32_inc(&tmpl->refcnt);
-       DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
-             (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+       DRV_LOG(DEBUG, "port %u Rx queue %u: refcnt %d", dev->data->port_id,
+               idx, rte_atomic32_read(&tmpl->refcnt));
        LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
        return tmpl;
 error:
@@ -987,28 +1035,29 @@ error:
 /**
  * Get a Rx queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   TX queue index.
  *
  * @return
- *   A pointer to the queue if it exists.
+ *   A pointer to the queue if it exists, NULL otherwise.
  */
-struct mlx5_rxq_ctrl*
-mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
+struct mlx5_rxq_ctrl *
+mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
 
        if ((*priv->rxqs)[idx]) {
                rxq_ctrl = container_of((*priv->rxqs)[idx],
                                        struct mlx5_rxq_ctrl,
                                        rxq);
-
-               mlx5_priv_rxq_ibv_get(priv, idx);
+               mlx5_rxq_ibv_get(dev, idx);
                rte_atomic32_inc(&rxq_ctrl->refcnt);
-               DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
-                     (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+               DRV_LOG(DEBUG, "port %u Rx queue %u: refcnt %d",
+                       dev->data->port_id, rxq_ctrl->idx,
+                       rte_atomic32_read(&rxq_ctrl->refcnt));
        }
        return rxq_ctrl;
 }
@@ -1016,59 +1065,59 @@ mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
 /**
  * Release a Rx queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   TX queue index.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   1 while a reference on it exists, 0 when freed.
  */
 int
-mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
+mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_ctrl *rxq_ctrl;
 
        if (!(*priv->rxqs)[idx])
                return 0;
        rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
        assert(rxq_ctrl->priv);
-       if (rxq_ctrl->ibv) {
-               int ret;
-
-               ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
-               if (!ret)
-                       rxq_ctrl->ibv = NULL;
-       }
-       DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
-             (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+       if (rxq_ctrl->ibv && !mlx5_rxq_ibv_release(rxq_ctrl->ibv))
+               rxq_ctrl->ibv = NULL;
+       DRV_LOG(DEBUG, "port %u Rx queue %u: refcnt %d", dev->data->port_id,
+               rxq_ctrl->idx, rte_atomic32_read(&rxq_ctrl->refcnt));
        if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
                LIST_REMOVE(rxq_ctrl, next);
                rte_free(rxq_ctrl);
                (*priv->rxqs)[idx] = NULL;
                return 0;
        }
-       return EBUSY;
+       return 1;
 }
 
 /**
  * Verify if the queue can be released.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   TX queue index.
  *
  * @return
- *   1 if the queue can be released.
+ *   1 if the queue can be released, negative errno otherwise and rte_errno is
+ *   set.
  */
 int
-mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
+mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_ctrl *rxq_ctrl;
 
-       if (!(*priv->rxqs)[idx])
-               return -1;
+       if (!(*priv->rxqs)[idx]) {
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
        rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
        return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
 }
@@ -1076,20 +1125,22 @@ mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
 /**
  * Verify the Rx Queue list is empty
  *
- * @param priv
- *  Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
- * @return the number of object not released.
+ * @return
+ *   The number of object not released.
  */
 int
-mlx5_priv_rxq_verify(struct priv *priv)
+mlx5_rxq_verify(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_rxq_ctrl *rxq_ctrl;
        int ret = 0;
 
        LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
-               DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
-                     (void *)rxq_ctrl);
+               DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
+                       dev->data->port_id, rxq_ctrl->idx);
                ++ret;
        }
        return ret;
@@ -1098,20 +1149,21 @@ mlx5_priv_rxq_verify(struct priv *priv)
 /**
  * Create an indirection table.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param queues
  *   Queues entering in the indirection table.
  * @param queues_n
  *   Number of queues in the array.
  *
  * @return
- *   A new indirection table.
+ *   The Verbs object initialised, NULL otherwise and rte_errno is set.
  */
-struct mlx5_ind_table_ibv*
-mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
-                           uint16_t queues_n)
+struct mlx5_ind_table_ibv *
+mlx5_ind_table_ibv_new(struct rte_eth_dev *dev, uint16_t queues[],
+                      uint16_t queues_n)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_ind_table_ibv *ind_tbl;
        const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
                log2above(queues_n) :
@@ -1122,11 +1174,12 @@ mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
 
        ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
                             queues_n * sizeof(uint16_t), 0);
-       if (!ind_tbl)
+       if (!ind_tbl) {
+               rte_errno = ENOMEM;
                return NULL;
+       }
        for (i = 0; i != queues_n; ++i) {
-               struct mlx5_rxq_ctrl *rxq =
-                       mlx5_priv_rxq_get(priv, queues[i]);
+               struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, queues[i]);
 
                if (!rxq)
                        goto error;
@@ -1144,24 +1197,28 @@ mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
                        .ind_tbl = wq,
                        .comp_mask = 0,
                });
-       if (!ind_tbl->ind_table)
+       if (!ind_tbl->ind_table) {
+               rte_errno = errno;
                goto error;
+       }
        rte_atomic32_inc(&ind_tbl->refcnt);
        LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
-       DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
-             (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+       DRV_LOG(DEBUG, "port %u indirection table %p: refcnt %d",
+               dev->data->port_id, (void *)ind_tbl,
+               rte_atomic32_read(&ind_tbl->refcnt));
        return ind_tbl;
 error:
        rte_free(ind_tbl);
-       DEBUG("%p cannot create indirection table", (void *)priv);
+       DRV_LOG(DEBUG, "port %u cannot create indirection table",
+               dev->data->port_id);
        return NULL;
 }
 
 /**
  * Get an indirection table.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param queues
  *   Queues entering in the indirection table.
  * @param queues_n
@@ -1170,10 +1227,11 @@ error:
  * @return
  *   An indirection table if found.
  */
-struct mlx5_ind_table_ibv*
-mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
-                           uint16_t queues_n)
+struct mlx5_ind_table_ibv *
+mlx5_ind_table_ibv_get(struct rte_eth_dev *dev, uint16_t queues[],
+                      uint16_t queues_n)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_ind_table_ibv *ind_tbl;
 
        LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
@@ -1187,10 +1245,11 @@ mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
                unsigned int i;
 
                rte_atomic32_inc(&ind_tbl->refcnt);
-               DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
-                     (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+               DRV_LOG(DEBUG, "port %u indirection table %p: refcnt %d",
+                       dev->data->port_id, (void *)ind_tbl,
+                       rte_atomic32_read(&ind_tbl->refcnt));
                for (i = 0; i != ind_tbl->queues_n; ++i)
-                       mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
+                       mlx5_rxq_get(dev, ind_tbl->queues[i]);
        }
        return ind_tbl;
 }
@@ -1198,51 +1257,55 @@ mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
 /**
  * Release an indirection table.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param ind_table
  *   Indirection table to release.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   1 while a reference on it exists, 0 when freed.
  */
 int
-mlx5_priv_ind_table_ibv_release(struct priv *priv,
-                               struct mlx5_ind_table_ibv *ind_tbl)
+mlx5_ind_table_ibv_release(struct rte_eth_dev *dev,
+                          struct mlx5_ind_table_ibv *ind_tbl)
 {
        unsigned int i;
 
-       DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
-             (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+       DRV_LOG(DEBUG, "port %u indirection table %p: refcnt %d",
+               dev->data->port_id, (void *)ind_tbl,
+               rte_atomic32_read(&ind_tbl->refcnt));
        if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
                claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
        for (i = 0; i != ind_tbl->queues_n; ++i)
-               claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
+               claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i]));
        if (!rte_atomic32_read(&ind_tbl->refcnt)) {
                LIST_REMOVE(ind_tbl, next);
                rte_free(ind_tbl);
                return 0;
        }
-       return EBUSY;
+       return 1;
 }
 
 /**
  * Verify the Rx Queue list is empty
  *
- * @param priv
- *  Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
- * @return the number of object not released.
+ * @return
+ *   The number of object not released.
  */
 int
-mlx5_priv_ind_table_ibv_verify(struct priv *priv)
+mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_ind_table_ibv *ind_tbl;
        int ret = 0;
 
        LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
-               DEBUG("%p: Verbs indirection table %p still referenced",
-                     (void *)priv, (void *)ind_tbl);
+               DRV_LOG(DEBUG,
+                       "port %u Verbs indirection table %p still referenced",
+                       dev->data->port_id, (void *)ind_tbl);
                ++ret;
        }
        return ret;
@@ -1251,8 +1314,8 @@ mlx5_priv_ind_table_ibv_verify(struct priv *priv)
 /**
  * Create an Rx Hash queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param rss_key
  *   RSS key for the Rx hash queue.
  * @param rss_key_len
@@ -1266,22 +1329,26 @@ mlx5_priv_ind_table_ibv_verify(struct priv *priv)
  *   Number of queues.
  *
  * @return
- *   An hash Rx queue on success.
+ *   The Verbs object initialised, NULL otherwise and rte_errno is set.
  */
-struct mlx5_hrxq*
-mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
-                  uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+struct mlx5_hrxq *
+mlx5_hrxq_new(struct rte_eth_dev *dev, uint8_t *rss_key, uint8_t rss_key_len,
+             uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_hrxq *hrxq;
        struct mlx5_ind_table_ibv *ind_tbl;
        struct ibv_qp *qp;
+       int err;
 
        queues_n = hash_fields ? queues_n : 1;
-       ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
-       if (!ind_tbl)
-               ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
+       ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
        if (!ind_tbl)
+               ind_tbl = mlx5_ind_table_ibv_new(dev, queues, queues_n);
+       if (!ind_tbl) {
+               rte_errno = ENOMEM;
                return NULL;
+       }
        qp = ibv_create_qp_ex(
                priv->ctx,
                &(struct ibv_qp_init_attr_ex){
@@ -1299,8 +1366,10 @@ mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
                        .rwq_ind_tbl = ind_tbl->ind_table,
                        .pd = priv->pd,
                });
-       if (!qp)
+       if (!qp) {
+               rte_errno = errno;
                goto error;
+       }
        hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
        if (!hrxq)
                goto error;
@@ -1311,21 +1380,24 @@ mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
        memcpy(hrxq->rss_key, rss_key, rss_key_len);
        rte_atomic32_inc(&hrxq->refcnt);
        LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
-       DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
-             (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+       DRV_LOG(DEBUG, "port %u hash Rx queue %p: refcnt %d",
+               dev->data->port_id, (void *)hrxq,
+               rte_atomic32_read(&hrxq->refcnt));
        return hrxq;
 error:
-       mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+       err = rte_errno; /* Save rte_errno before cleanup. */
+       mlx5_ind_table_ibv_release(dev, ind_tbl);
        if (qp)
                claim_zero(ibv_destroy_qp(qp));
+       rte_errno = err; /* Restore rte_errno. */
        return NULL;
 }
 
 /**
  * Get an Rx Hash queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param rss_conf
  *   RSS configuration for the Rx hash queue.
  * @param queues
@@ -1337,10 +1409,11 @@ error:
  * @return
  *   An hash Rx queue on success.
  */
-struct mlx5_hrxq*
-mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
-                  uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+struct mlx5_hrxq *
+mlx5_hrxq_get(struct rte_eth_dev *dev, uint8_t *rss_key, uint8_t rss_key_len,
+             uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_hrxq *hrxq;
 
        queues_n = hash_fields ? queues_n : 1;
@@ -1353,16 +1426,17 @@ mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
                        continue;
                if (hrxq->hash_fields != hash_fields)
                        continue;
-               ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+               ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
                if (!ind_tbl)
                        continue;
                if (ind_tbl != hrxq->ind_table) {
-                       mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+                       mlx5_ind_table_ibv_release(dev, ind_tbl);
                        continue;
                }
                rte_atomic32_inc(&hrxq->refcnt);
-               DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
-                     (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+               DRV_LOG(DEBUG, "port %u hash Rx queue %p: refcnt %d",
+                       dev->data->port_id, (void *)hrxq,
+                       rte_atomic32_read(&hrxq->refcnt));
                return hrxq;
        }
        return NULL;
@@ -1371,47 +1445,51 @@ mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
 /**
  * Release the hash Rx queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param hrxq
  *   Pointer to Hash Rx queue to release.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   1 while a reference on it exists, 0 when freed.
  */
 int
-mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
+mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq)
 {
-       DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
-             (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+       DRV_LOG(DEBUG, "port %u hash Rx queue %p: refcnt %d",
+               dev->data->port_id, (void *)hrxq,
+               rte_atomic32_read(&hrxq->refcnt));
        if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
                claim_zero(ibv_destroy_qp(hrxq->qp));
-               mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
+               mlx5_ind_table_ibv_release(dev, hrxq->ind_table);
                LIST_REMOVE(hrxq, next);
                rte_free(hrxq);
                return 0;
        }
-       claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
-       return EBUSY;
+       claim_nonzero(mlx5_ind_table_ibv_release(dev, hrxq->ind_table));
+       return 1;
 }
 
 /**
  * Verify the Rx Queue list is empty
  *
- * @param priv
- *  Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
- * @return the number of object not released.
+ * @return
+ *   The number of object not released.
  */
 int
-mlx5_priv_hrxq_ibv_verify(struct priv *priv)
+mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_hrxq *hrxq;
        int ret = 0;
 
        LIST_FOREACH(hrxq, &priv->hrxqs, next) {
-               DEBUG("%p: Verbs Hash Rx queue %p still referenced",
-                     (void *)priv, (void *)hrxq);
+               DRV_LOG(DEBUG,
+                       "port %u Verbs hash Rx queue %p still referenced",
+                       dev->data->port_id, (void *)hrxq);
                ++ret;
        }
        return ret;
index 32bfa30..2e003ae 100644 (file)
@@ -114,6 +114,14 @@ mlx5_set_ptype_table(void)
                     RTE_PTYPE_L4_TCP;
        (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_L4_TCP;
+       (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_TCP;
+       (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_TCP;
+       (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_TCP;
+       (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_TCP;
        /* UDP */
        (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_L4_UDP;
@@ -132,6 +140,14 @@ mlx5_set_ptype_table(void)
                     RTE_PTYPE_L4_TCP;
        (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_L4_TCP;
+       (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_TCP;
+       (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_TCP;
+       (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_TCP;
+       (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_L4_TCP;
        (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_L4_UDP;
        (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
@@ -169,12 +185,36 @@ mlx5_set_ptype_table(void)
        (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_TCP;
+       (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_TCP;
+       (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_TCP;
+       (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_TCP;
+       (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_TCP;
        (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_TCP;
        (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L4_TCP;
+       (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_TCP;
+       (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_TCP;
+       (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_TCP;
+       (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                    RTE_PTYPE_INNER_L4_TCP;
        /* Tunneled - UDP */
        (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
                     RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
@@ -370,7 +410,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                unsigned int ds = 0;
                unsigned int sg = 0; /* counter of additional segs attached. */
                uintptr_t addr;
-               uint64_t naddr;
                uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;
                uint16_t tso_header_sz = 0;
                uint16_t ehdr;
@@ -594,12 +633,12 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        ds = 3;
 use_dseg:
                        /* Add the remaining packet as a simple ds. */
-                       naddr = rte_cpu_to_be_64(addr);
+                       addr = rte_cpu_to_be_64(addr);
                        *dseg = (rte_v128u32_t){
                                rte_cpu_to_be_32(length),
                                mlx5_tx_mb2mr(txq, buf),
-                               naddr,
-                               naddr >> 32,
+                               addr,
+                               addr >> 32,
                        };
                        ++ds;
                        if (!segs_n)
@@ -633,12 +672,12 @@ next_seg:
                total_length += length;
 #endif
                /* Store segment information. */
-               naddr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
+               addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
                *dseg = (rte_v128u32_t){
                        rte_cpu_to_be_32(length),
                        mlx5_tx_mb2mr(txq, buf),
-                       naddr,
-                       naddr >> 32,
+                       addr,
+                       addr >> 32,
                };
                (*txq->elts)[++elts_head & elts_m] = buf;
                ++sg;
@@ -1339,8 +1378,6 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
        do {
                struct rte_mbuf *buf = *(pkts++);
                uintptr_t addr;
-               uint64_t naddr;
-               unsigned int n;
                unsigned int do_inline = 0; /* Whether inline is possible. */
                uint32_t length;
                unsigned int segs_n = buf->nb_segs;
@@ -1459,7 +1496,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                max_wqe--;
                        else
                                max_wqe -= 2;
-               } else if (do_inline) {
+               } else if (max_inline && do_inline) {
                        /* Inline packet into WQE. */
                        unsigned int max;
 
@@ -1517,16 +1554,13 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                        ((uintptr_t)mpw.data.raw +
                                         inl_pad);
                        (*txq->elts)[elts_head++ & elts_m] = buf;
-                       addr = rte_pktmbuf_mtod(buf, uintptr_t);
-                       for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++)
-                               rte_prefetch2((void *)(addr +
-                                               n * RTE_CACHE_LINE_SIZE));
-                       naddr = rte_cpu_to_be_64(addr);
+                       addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+                                                                uintptr_t));
                        *dseg = (rte_v128u32_t) {
                                rte_cpu_to_be_32(length),
                                mlx5_tx_mb2mr(txq, buf),
-                               naddr,
-                               naddr >> 32,
+                               addr,
+                               addr >> 32,
                        };
                        mpw.data.raw = (volatile void *)(dseg + 1);
                        mpw.total_len += (inl_pad + sizeof(*dseg));
@@ -1677,6 +1711,7 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
                        return 0;
                ++rxq->cq_ci;
                op_own = cqe->op_own;
+               rte_io_rmb();
                if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
                        volatile struct mlx5_mini_cqe8 (*mc)[8] =
                                (volatile struct mlx5_mini_cqe8 (*)[8])
@@ -1934,11 +1969,10 @@ skip:
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 uint16_t
-removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+removed_tx_burst(void *dpdk_txq __rte_unused,
+                struct rte_mbuf **pkts __rte_unused,
+                uint16_t pkts_n __rte_unused)
 {
-       (void)dpdk_txq;
-       (void)pkts;
-       (void)pkts_n;
        return 0;
 }
 
@@ -1959,11 +1993,10 @@ removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
  *   Number of packets successfully received (<= pkts_n).
  */
 uint16_t
-removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+removed_rx_burst(void *dpdk_txq __rte_unused,
+                struct rte_mbuf **pkts __rte_unused,
+                uint16_t pkts_n __rte_unused)
 {
-       (void)dpdk_rxq;
-       (void)pkts;
-       (void)pkts_n;
        return 0;
 }
 
@@ -1975,56 +2008,49 @@ removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
  */
 
 uint16_t __attribute__((weak))
-mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
+                     struct rte_mbuf **pkts __rte_unused,
+                     uint16_t pkts_n __rte_unused)
 {
-       (void)dpdk_txq;
-       (void)pkts;
-       (void)pkts_n;
        return 0;
 }
 
 uint16_t __attribute__((weak))
-mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
+                 struct rte_mbuf **pkts __rte_unused,
+                 uint16_t pkts_n __rte_unused)
 {
-       (void)dpdk_txq;
-       (void)pkts;
-       (void)pkts_n;
        return 0;
 }
 
 uint16_t __attribute__((weak))
-mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
+                 struct rte_mbuf **pkts __rte_unused,
+                 uint16_t pkts_n __rte_unused)
 {
-       (void)dpdk_rxq;
-       (void)pkts;
-       (void)pkts_n;
        return 0;
 }
 
 int __attribute__((weak))
-priv_check_raw_vec_tx_support(struct priv *priv)
+mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
 {
-       (void)priv;
        return -ENOTSUP;
 }
 
 int __attribute__((weak))
-priv_check_vec_tx_support(struct priv *priv)
+mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
 {
-       (void)priv;
        return -ENOTSUP;
 }
 
 int __attribute__((weak))
-rxq_check_vec_support(struct mlx5_rxq_data *rxq)
+mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
 {
-       (void)rxq;
        return -ENOTSUP;
 }
 
 int __attribute__((weak))
-priv_check_vec_rx_support(struct priv *priv)
+mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)
 {
-       (void)priv;
        return -ENOTSUP;
 }
index de5b769..29019f7 100644 (file)
@@ -154,6 +154,7 @@ struct mlx5_rxq_ctrl {
        struct mlx5_rxq_data rxq; /* Data path structure. */
        unsigned int socket; /* CPU socket ID for allocations. */
        unsigned int irq:1; /* Whether IRQ is enabled. */
+       uint16_t idx; /* Queue index. */
 };
 
 /* Indirection table. */
@@ -204,7 +205,7 @@ struct mlx5_txq_data {
        volatile void *wqes; /* Work queue (use volatile to write into). */
        volatile uint32_t *qp_db; /* Work queue doorbell. */
        volatile uint32_t *cq_db; /* Completion queue doorbell. */
-       volatile void *bf_reg; /* Blueflame register. */
+       volatile void *bf_reg; /* Blueflame register remapped. */
        struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
        struct rte_mbuf *(*elts)[]; /* TX elements. */
        struct mlx5_txq_stats stats; /* TX queue counters. */
@@ -214,6 +215,7 @@ struct mlx5_txq_data {
 struct mlx5_txq_ibv {
        LIST_ENTRY(mlx5_txq_ibv) next; /* Pointer to the next element. */
        rte_atomic32_t refcnt; /* Reference counter. */
+       struct mlx5_txq_ctrl *txq_ctrl; /* Pointer to the control queue. */
        struct ibv_cq *cq; /* Completion Queue. */
        struct ibv_qp *qp; /* Queue Pair. */
 };
@@ -229,6 +231,8 @@ struct mlx5_txq_ctrl {
        struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
        struct mlx5_txq_data txq; /* Data path structure. */
        off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
+       volatile void *bf_reg_orig; /* Blueflame register from verbs. */
+       uint16_t idx; /* Queue index. */
 };
 
 /* mlx5_rxq.c */
@@ -236,93 +240,104 @@ struct mlx5_txq_ctrl {
 extern uint8_t rss_hash_default_key[];
 extern const size_t rss_hash_default_key_len;
 
-void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
-int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
-                       const struct rte_eth_rxconf *, struct rte_mempool *);
-void mlx5_rx_queue_release(void *);
-int priv_rx_intr_vec_enable(struct priv *priv);
-void priv_rx_intr_vec_disable(struct priv *priv);
+void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl);
+int mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+                       unsigned int socket, const struct rte_eth_rxconf *conf,
+                       struct rte_mempool *mp);
+void mlx5_rx_queue_release(void *dpdk_rxq);
+int mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev);
+void mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev);
 int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
-struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_new(struct priv *, uint16_t);
-struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t);
-int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *);
-int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *);
-int mlx5_priv_rxq_ibv_verify(struct priv *);
-struct mlx5_rxq_ctrl *mlx5_priv_rxq_new(struct priv *, uint16_t,
-                                       uint16_t, unsigned int,
-                                       struct rte_mempool *);
-struct mlx5_rxq_ctrl *mlx5_priv_rxq_get(struct priv *, uint16_t);
-int mlx5_priv_rxq_release(struct priv *, uint16_t);
-int mlx5_priv_rxq_releasable(struct priv *, uint16_t);
-int mlx5_priv_rxq_verify(struct priv *);
-int rxq_alloc_elts(struct mlx5_rxq_ctrl *);
-struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_new(struct priv *,
-                                                      uint16_t [],
-                                                      uint16_t);
-struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *,
-                                                      uint16_t [],
-                                                      uint16_t);
-int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *);
-int mlx5_priv_ind_table_ibv_verify(struct priv *);
-struct mlx5_hrxq *mlx5_priv_hrxq_new(struct priv *, uint8_t *, uint8_t,
-                                    uint64_t, uint16_t [], uint16_t);
-struct mlx5_hrxq *mlx5_priv_hrxq_get(struct priv *, uint8_t *, uint8_t,
-                                    uint64_t, uint16_t [], uint16_t);
-int mlx5_priv_hrxq_release(struct priv *, struct mlx5_hrxq *);
-int mlx5_priv_hrxq_ibv_verify(struct priv *);
+struct mlx5_rxq_ibv *mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
+struct mlx5_rxq_ibv *mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
+int mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv);
+int mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv);
+int mlx5_rxq_ibv_verify(struct rte_eth_dev *dev);
+struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx,
+                                  uint16_t desc, unsigned int socket,
+                                  struct rte_mempool *mp);
+struct mlx5_rxq_ctrl *mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx);
+int mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx);
+int mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx);
+int mlx5_rxq_verify(struct rte_eth_dev *dev);
+int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl);
+struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_new(struct rte_eth_dev *dev,
+                                                 uint16_t queues[],
+                                                 uint16_t queues_n);
+struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_get(struct rte_eth_dev *dev,
+                                                 uint16_t queues[],
+                                                 uint16_t queues_n);
+int mlx5_ind_table_ibv_release(struct rte_eth_dev *dev,
+                              struct mlx5_ind_table_ibv *ind_tbl);
+int mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev);
+struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev, uint8_t *rss_key,
+                               uint8_t rss_key_len, uint64_t hash_fields,
+                               uint16_t queues[], uint16_t queues_n);
+struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev, uint8_t *rss_key,
+                               uint8_t rss_key_len, uint64_t hash_fields,
+                               uint16_t queues[], uint16_t queues_n);
+int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq);
+int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev);
 
 /* mlx5_txq.c */
 
-int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
-                       const struct rte_eth_txconf *);
-void mlx5_tx_queue_release(void *);
-int priv_tx_uar_remap(struct priv *priv, int fd);
-struct mlx5_txq_ibv *mlx5_priv_txq_ibv_new(struct priv *, uint16_t);
-struct mlx5_txq_ibv *mlx5_priv_txq_ibv_get(struct priv *, uint16_t);
-int mlx5_priv_txq_ibv_release(struct priv *, struct mlx5_txq_ibv *);
-int mlx5_priv_txq_ibv_releasable(struct priv *, struct mlx5_txq_ibv *);
-int mlx5_priv_txq_ibv_verify(struct priv *);
-struct mlx5_txq_ctrl *mlx5_priv_txq_new(struct priv *, uint16_t,
-                                       uint16_t, unsigned int,
-                                       const struct rte_eth_txconf *);
-struct mlx5_txq_ctrl *mlx5_priv_txq_get(struct priv *, uint16_t);
-int mlx5_priv_txq_release(struct priv *, uint16_t);
-int mlx5_priv_txq_releasable(struct priv *, uint16_t);
-int mlx5_priv_txq_verify(struct priv *);
-void txq_alloc_elts(struct mlx5_txq_ctrl *);
+int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+                       unsigned int socket, const struct rte_eth_txconf *conf);
+void mlx5_tx_queue_release(void *dpdk_txq);
+int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
+struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx);
+struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx);
+int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv);
+int mlx5_txq_ibv_releasable(struct mlx5_txq_ibv *txq_ibv);
+int mlx5_txq_ibv_verify(struct rte_eth_dev *dev);
+struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx,
+                                  uint16_t desc, unsigned int socket,
+                                  const struct rte_eth_txconf *conf);
+struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx);
+int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx);
+int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx);
+int mlx5_txq_verify(struct rte_eth_dev *dev);
+void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl);
 
 /* mlx5_rxtx.c */
 
 extern uint32_t mlx5_ptype_table[];
 
 void mlx5_set_ptype_table(void);
-uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t);
-uint16_t mlx5_tx_burst_mpw(void *, struct rte_mbuf **, uint16_t);
-uint16_t mlx5_tx_burst_mpw_inline(void *, struct rte_mbuf **, uint16_t);
-uint16_t mlx5_tx_burst_empw(void *, struct rte_mbuf **, uint16_t);
-uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t);
-uint16_t removed_tx_burst(void *, struct rte_mbuf **, uint16_t);
-uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);
-int mlx5_rx_descriptor_status(void *, uint16_t);
-int mlx5_tx_descriptor_status(void *, uint16_t);
+uint16_t mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts,
+                      uint16_t pkts_n);
+uint16_t mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts,
+                          uint16_t pkts_n);
+uint16_t mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
+                                 uint16_t pkts_n);
+uint16_t mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts,
+                           uint16_t pkts_n);
+uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
+uint16_t removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts,
+                         uint16_t pkts_n);
+uint16_t removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts,
+                         uint16_t pkts_n);
+int mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset);
+int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset);
 
 /* Vectorized version of mlx5_rxtx.c */
-int priv_check_raw_vec_tx_support(struct priv *);
-int priv_check_vec_tx_support(struct priv *);
-int rxq_check_vec_support(struct mlx5_rxq_data *);
-int priv_check_vec_rx_support(struct priv *);
-uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
-uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
-uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
+int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev);
+int mlx5_check_vec_tx_support(struct rte_eth_dev *dev);
+int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data);
+int mlx5_check_vec_rx_support(struct rte_eth_dev *dev);
+uint16_t mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
+                              uint16_t pkts_n);
+uint16_t mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
+                          uint16_t pkts_n);
+uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
+                          uint16_t pkts_n);
 
 /* mlx5_mr.c */
 
-void mlx5_mp2mr_iter(struct rte_mempool *, void *);
-struct mlx5_mr *priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *,
-                                  struct rte_mempool *, unsigned int);
-struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
-                                  unsigned int);
+void mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg);
+struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq,
+                                  struct rte_mempool *mp, unsigned int idx);
 
 #ifndef NDEBUG
 /**
@@ -385,9 +400,10 @@ check_cqe(volatile struct mlx5_cqe *cqe,
                    (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
                        return 0;
                if (!check_cqe_seen(cqe)) {
-                       ERROR("unexpected CQE error %u (0x%02x)"
-                             " syndrome 0x%02x",
-                             op_code, op_code, syndrome);
+                       DRV_LOG(ERR,
+                               "unexpected CQE error %u (0x%02x) syndrome"
+                               " 0x%02x",
+                               op_code, op_code, syndrome);
                        rte_hexdump(stderr, "MLX5 Error CQE:",
                                    (const void *)((uintptr_t)err_cqe),
                                    sizeof(*err_cqe));
@@ -396,8 +412,8 @@ check_cqe(volatile struct mlx5_cqe *cqe,
        } else if ((op_code != MLX5_CQE_RESP_SEND) &&
                   (op_code != MLX5_CQE_REQ)) {
                if (!check_cqe_seen(cqe)) {
-                       ERROR("unexpected CQE opcode %u (0x%02x)",
-                             op_code, op_code);
+                       DRV_LOG(ERR, "unexpected CQE opcode %u (0x%02x)",
+                               op_code, op_code);
                        rte_hexdump(stderr, "MLX5 CQE:",
                                    (const void *)((uintptr_t)cqe),
                                    sizeof(*cqe));
@@ -457,7 +473,7 @@ mlx5_tx_complete(struct mlx5_txq_data *txq)
        if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
            (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
                if (!check_cqe_seen(cqe)) {
-                       ERROR("unexpected error CQE, TX stopped");
+                       DRV_LOG(ERR, "unexpected error CQE, Tx stopped");
                        rte_hexdump(stderr, "MLX5 TXQ:",
                                    (const void *)((uintptr_t)txq->wqes),
                                    ((1 << txq->wqe_n) *
@@ -558,8 +574,6 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
                if (txq->mp2mr[i]->start <= addr &&
                    txq->mp2mr[i]->end > addr) {
                        assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
-                       assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) ==
-                              txq->mp2mr[i]->lkey);
                        txq->mr_cache_idx = i;
                        return txq->mp2mr[i]->lkey;
                }
@@ -573,6 +587,11 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
                rte_atomic32_inc(&mr->refcnt);
                txq->mr_cache_idx = i >= RTE_DIM(txq->mp2mr) ? i - 1 : i;
                return mr->lkey;
+       } else {
+               struct rte_mempool *mp = mlx5_tx_mb2mp(mb);
+
+               DRV_LOG(WARNING, "failed to register mempool 0x%p(%s)",
+                       (void *)mp, mp->name);
        }
        return (uint32_t)-1;
 }
index 101aa15..982b8f1 100644 (file)
@@ -276,15 +276,16 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 /**
  * Check Tx queue flags are set for raw vectorized Tx.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
  * @return
  *   1 if supported, negative errno value if not.
  */
 int __attribute__((cold))
-priv_check_raw_vec_tx_support(struct priv *priv)
+mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        uint16_t i;
 
        /* All the configured queues should support. */
@@ -303,15 +304,17 @@ priv_check_raw_vec_tx_support(struct priv *priv)
 /**
  * Check a device can support vectorized TX.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
  * @return
  *   1 if supported, negative errno value if not.
  */
 int __attribute__((cold))
-priv_check_vec_tx_support(struct priv *priv)
+mlx5_check_vec_tx_support(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
+
        if (!priv->tx_vec_en ||
            priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
            priv->mps != MLX5_MPW_ENHANCED ||
@@ -330,7 +333,7 @@ priv_check_vec_tx_support(struct priv *priv)
  *   1 if supported, negative errno value if not.
  */
 int __attribute__((cold))
-rxq_check_vec_support(struct mlx5_rxq_data *rxq)
+mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 {
        struct mlx5_rxq_ctrl *ctrl =
                container_of(rxq, struct mlx5_rxq_ctrl, rxq);
@@ -343,15 +346,16 @@ rxq_check_vec_support(struct mlx5_rxq_data *rxq)
 /**
  * Check a device can support vectorized RX.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
  * @return
  *   1 if supported, negative errno value if not.
  */
 int __attribute__((cold))
-priv_check_vec_rx_support(struct priv *priv)
+mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        uint16_t i;
 
        if (!priv->rx_vec_en)
@@ -362,7 +366,7 @@ priv_check_vec_rx_support(struct priv *priv)
 
                if (!rxq)
                        continue;
-               if (rxq_check_vec_support(rxq) < 0)
+               if (mlx5_rxq_check_vec_support(rxq) < 0)
                        break;
        }
        if (i != priv->rxqs_n)
index 06f83ef..cf42477 100644 (file)
@@ -193,8 +193,8 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                vst1q_u8((void *)t_wqe, ctrl);
                /* Fill ESEG in the header. */
                vst1q_u16((void *)(t_wqe + 1),
-                         (uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len),
-                                        0, 0, 0, 0 });
+                         ((uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len),
+                                         0, 0, 0, 0 }));
                txq->wqe_ci = wqe_ci;
        }
        if (!n)
@@ -320,10 +320,10 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
        vst1q_u8((void *)t_wqe, ctrl);
        /* Fill ESEG in the header. */
        vst1q_u8((void *)(t_wqe + 1),
-                (uint8x16_t) { 0, 0, 0, 0,
-                               cs_flags, 0, 0, 0,
-                               0, 0, 0, 0,
-                               0, 0, 0, 0 });
+                ((uint8x16_t) { 0, 0, 0, 0,
+                                cs_flags, 0, 0, 0,
+                                0, 0, 0, 0,
+                                0, 0, 0, 0 }));
 #ifdef MLX5_PMD_SOFT_COUNTERS
        txq->stats.opackets += pkts_n;
 #endif
@@ -806,6 +806,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
                uint16x4_t mask;
                uint16x4_t byte_cnt;
                uint32x4_t ptype_info, flow_tag;
+               register uint64x2_t c0, c1, c2, c3;
                uint8_t *p0, *p1, *p2, *p3;
                uint8_t *e0 = (void *)&elts[pos]->pkt_len;
                uint8_t *e1 = (void *)&elts[pos + 1]->pkt_len;
@@ -822,6 +823,16 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
                p1 = p0 + (pkts_n - pos > 1) * sizeof(struct mlx5_cqe);
                p2 = p1 + (pkts_n - pos > 2) * sizeof(struct mlx5_cqe);
                p3 = p2 + (pkts_n - pos > 3) * sizeof(struct mlx5_cqe);
+               /* B.0 (CQE 3) load a block having op_own. */
+               c3 = vld1q_u64((uint64_t *)(p3 + 48));
+               /* B.0 (CQE 2) load a block having op_own. */
+               c2 = vld1q_u64((uint64_t *)(p2 + 48));
+               /* B.0 (CQE 1) load a block having op_own. */
+               c1 = vld1q_u64((uint64_t *)(p1 + 48));
+               /* B.0 (CQE 0) load a block having op_own. */
+               c0 = vld1q_u64((uint64_t *)(p0 + 48));
+               /* Synchronize for loading the rest of blocks. */
+               rte_io_rmb();
                /* Prefetch next 4 CQEs. */
                if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
                        unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP;
@@ -831,50 +842,46 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
                        rte_prefetch_non_temporal(&cq[next + 3]);
                }
                __asm__ volatile (
-               /* B.1 (CQE 3) load a block having op_own. */
-               "ld1 {v19.16b}, [%[p3]] \n\t"
-               "sub %[p3], %[p3], #48 \n\t"
-               /* B.2 (CQE 3) load the rest blocks. */
+               /* B.1 (CQE 3) load the rest of blocks. */
                "ld1 {v16.16b - v18.16b}, [%[p3]] \n\t"
+               /* B.2 (CQE 3) move the block having op_own. */
+               "mov v19.16b, %[c3].16b \n\t"
                /* B.3 (CQE 3) extract 16B fields. */
                "tbl v23.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+               /* B.1 (CQE 2) load the rest of blocks. */
+               "ld1 {v16.16b - v18.16b}, [%[p2]] \n\t"
                /* B.4 (CQE 3) adjust CRC length. */
                "sub v23.8h, v23.8h, %[crc_adj].8h \n\t"
-               /* B.1 (CQE 2) load a block having op_own. */
-               "ld1 {v19.16b}, [%[p2]] \n\t"
-               "sub %[p2], %[p2], #48 \n\t"
                /* C.1 (CQE 3) generate final structure for mbuf. */
                "tbl v15.16b, {v23.16b}, %[mb_shuf_m].16b \n\t"
-               /* B.2 (CQE 2) load the rest blocks. */
-               "ld1 {v16.16b - v18.16b}, [%[p2]] \n\t"
+               /* B.2 (CQE 2) move the block having op_own. */
+               "mov v19.16b, %[c2].16b \n\t"
                /* B.3 (CQE 2) extract 16B fields. */
                "tbl v22.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+               /* B.1 (CQE 1) load the rest of blocks. */
+               "ld1 {v16.16b - v18.16b}, [%[p1]] \n\t"
                /* B.4 (CQE 2) adjust CRC length. */
                "sub v22.8h, v22.8h, %[crc_adj].8h \n\t"
-               /* B.1 (CQE 1) load a block having op_own. */
-               "ld1 {v19.16b}, [%[p1]] \n\t"
-               "sub %[p1], %[p1], #48 \n\t"
                /* C.1 (CQE 2) generate final structure for mbuf. */
                "tbl v14.16b, {v22.16b}, %[mb_shuf_m].16b \n\t"
-               /* B.2 (CQE 1) load the rest blocks. */
-               "ld1 {v16.16b - v18.16b}, [%[p1]] \n\t"
+               /* B.2 (CQE 1) move the block having op_own. */
+               "mov v19.16b, %[c1].16b \n\t"
                /* B.3 (CQE 1) extract 16B fields. */
                "tbl v21.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+               /* B.1 (CQE 0) load the rest of blocks. */
+               "ld1 {v16.16b - v18.16b}, [%[p0]] \n\t"
                /* B.4 (CQE 1) adjust CRC length. */
                "sub v21.8h, v21.8h, %[crc_adj].8h \n\t"
-               /* B.1 (CQE 0) load a block having op_own. */
-               "ld1 {v19.16b}, [%[p0]] \n\t"
-               "sub %[p0], %[p0], #48 \n\t"
                /* C.1 (CQE 1) generate final structure for mbuf. */
                "tbl v13.16b, {v21.16b}, %[mb_shuf_m].16b \n\t"
-               /* B.2 (CQE 0) load the rest blocks. */
-               "ld1 {v16.16b - v18.16b}, [%[p0]] \n\t"
+               /* B.2 (CQE 0) move the block having op_own. */
+               "mov v19.16b, %[c0].16b \n\t"
+               /* A.1 load mbuf pointers. */
+               "ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t"
                /* B.3 (CQE 0) extract 16B fields. */
                "tbl v20.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
                /* B.4 (CQE 0) adjust CRC length. */
                "sub v20.8h, v20.8h, %[crc_adj].8h \n\t"
-               /* A.1 load mbuf pointers. */
-               "ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t"
                /* D.1 extract op_own byte. */
                "tbl %[op_own].8b, {v20.16b - v23.16b}, %[owner_shuf_m].8b \n\t"
                /* C.2 (CQE 3) adjust flow mark. */
@@ -909,9 +916,9 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
                 [byte_cnt]"=&w"(byte_cnt),
                 [ptype_info]"=&w"(ptype_info),
                 [flow_tag]"=&w"(flow_tag)
-               :[p3]"r"(p3 + 48), [p2]"r"(p2 + 48),
-                [p1]"r"(p1 + 48), [p0]"r"(p0 + 48),
+               :[p3]"r"(p3), [p2]"r"(p2), [p1]"r"(p1), [p0]"r"(p0),
                 [e3]"r"(e3), [e2]"r"(e2), [e1]"r"(e1), [e0]"r"(e0),
+                [c3]"w"(c3), [c2]"w"(c2), [c1]"w"(c1), [c0]"w"(c0),
                 [elts_p]"r"(elts_p),
                 [pkts_p]"r"(pkts_p),
                 [cqe_shuf_m]"w"(cqe_shuf_m),
index 7ef2c59..7931429 100644 (file)
@@ -825,7 +825,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
                /* B.2 copy mbuf pointers. */
                _mm_storeu_si128((__m128i *)&pkts[pos], mbp1);
                _mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2);
-               rte_compiler_barrier();
+               rte_io_rmb();
                /* C.1 load remained CQE data and extract necessary fields. */
                cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]);
                cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]);
index 5cd1ab8..7ab3100 100644 (file)
 /**
  * Initialise the socket to communicate with the secondary process
  *
- * @param[in] priv
- *   Pointer to private structure.
+ * @param[in] dev
+ *   Pointer to Ethernet device.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_socket_init(struct priv *priv)
+mlx5_socket_init(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct sockaddr_un sun = {
                .sun_family = AF_UNIX,
        };
        int ret;
        int flags;
-       struct stat file_stat;
 
        /*
         * Initialise the socket to communicate with the secondary
@@ -67,70 +67,77 @@ priv_socket_init(struct priv *priv)
         */
        ret = socket(AF_UNIX, SOCK_STREAM, 0);
        if (ret < 0) {
-               WARN("secondary process not supported: %s", strerror(errno));
-               return ret;
+               rte_errno = errno;
+               DRV_LOG(WARNING, "port %u secondary process not supported: %s",
+                       dev->data->port_id, strerror(errno));
+               goto error;
        }
        priv->primary_socket = ret;
        flags = fcntl(priv->primary_socket, F_GETFL, 0);
-       if (flags == -1)
-               goto out;
+       if (flags == -1) {
+               rte_errno = errno;
+               goto error;
+       }
        ret = fcntl(priv->primary_socket, F_SETFL, flags | O_NONBLOCK);
-       if (ret < 0)
-               goto out;
+       if (ret < 0) {
+               rte_errno = errno;
+               goto error;
+       }
        snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
                 MLX5_DRIVER_NAME, priv->primary_socket);
-       ret = stat(sun.sun_path, &file_stat);
-       if (!ret)
-               claim_zero(remove(sun.sun_path));
+       remove(sun.sun_path);
        ret = bind(priv->primary_socket, (const struct sockaddr *)&sun,
                   sizeof(sun));
        if (ret < 0) {
-               WARN("cannot bind socket, secondary process not supported: %s",
-                    strerror(errno));
+               rte_errno = errno;
+               DRV_LOG(WARNING,
+                       "port %u cannot bind socket, secondary process not"
+                       " supported: %s",
+                       dev->data->port_id, strerror(errno));
                goto close;
        }
        ret = listen(priv->primary_socket, 0);
        if (ret < 0) {
-               WARN("Secondary process not supported: %s", strerror(errno));
+               rte_errno = errno;
+               DRV_LOG(WARNING, "port %u secondary process not supported: %s",
+                       dev->data->port_id, strerror(errno));
                goto close;
        }
-       return ret;
+       return 0;
 close:
        remove(sun.sun_path);
-out:
+error:
        claim_zero(close(priv->primary_socket));
        priv->primary_socket = 0;
-       return -(ret);
+       return -rte_errno;
 }
 
 /**
  * Un-Initialise the socket to communicate with the secondary process
  *
- * @param[in] priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, errno value on failure.
+ * @param[in] dev
  */
-int
-priv_socket_uninit(struct priv *priv)
+void
+mlx5_socket_uninit(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
+
        MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket);
        claim_zero(close(priv->primary_socket));
        priv->primary_socket = 0;
        claim_zero(remove(path));
-       return 0;
 }
 
 /**
  * Handle socket interrupts.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  */
 void
-priv_socket_handle(struct priv *priv)
+mlx5_socket_handle(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        int conn_sock;
        int ret = 0;
        struct cmsghdr *cmsg = NULL;
@@ -152,25 +159,30 @@ priv_socket_handle(struct priv *priv)
        /* Accept the connection from the client. */
        conn_sock = accept(priv->primary_socket, NULL, NULL);
        if (conn_sock < 0) {
-               WARN("connection failed: %s", strerror(errno));
+               DRV_LOG(WARNING, "port %u connection failed: %s",
+                       dev->data->port_id, strerror(errno));
                return;
        }
        ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1},
                                         sizeof(int));
        if (ret < 0) {
-               WARN("cannot change socket options");
-               goto out;
+               ret = errno;
+               DRV_LOG(WARNING, "port %u cannot change socket options: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               goto error;
        }
        ret = recvmsg(conn_sock, &msg, MSG_WAITALL);
        if (ret < 0) {
-               WARN("received an empty message: %s", strerror(errno));
-               goto out;
+               ret = errno;
+               DRV_LOG(WARNING, "port %u received an empty message: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               goto error;
        }
        /* Expect to receive credentials only. */
        cmsg = CMSG_FIRSTHDR(&msg);
        if (cmsg == NULL) {
-               WARN("no message");
-               goto out;
+               DRV_LOG(WARNING, "port %u no message", dev->data->port_id);
+               goto error;
        }
        if ((cmsg->cmsg_type == SCM_CREDENTIALS) &&
                (cmsg->cmsg_len >= sizeof(*cred))) {
@@ -179,14 +191,16 @@ priv_socket_handle(struct priv *priv)
        }
        cmsg = CMSG_NXTHDR(&msg, cmsg);
        if (cmsg != NULL) {
-               WARN("Message wrongly formatted");
-               goto out;
+               DRV_LOG(WARNING, "port %u message wrongly formatted",
+                       dev->data->port_id);
+               goto error;
        }
        /* Make sure all the ancillary data was received and valid. */
        if ((cred == NULL) || (cred->uid != getuid()) ||
            (cred->gid != getgid())) {
-               WARN("wrong credentials");
-               goto out;
+               DRV_LOG(WARNING, "port %u wrong credentials",
+                       dev->data->port_id);
+               goto error;
        }
        /* Set-up the ancillary data. */
        cmsg = CMSG_FIRSTHDR(&msg);
@@ -198,27 +212,29 @@ priv_socket_handle(struct priv *priv)
        *fd = priv->ctx->cmd_fd;
        ret = sendmsg(conn_sock, &msg, 0);
        if (ret < 0)
-               WARN("cannot send response");
-out:
+               DRV_LOG(WARNING, "port %u cannot send response",
+                       dev->data->port_id);
+error:
        close(conn_sock);
 }
 
 /**
  * Connect to the primary process.
  *
- * @param[in] priv
- *   Pointer to private structure.
+ * @param[in] dev
+ *   Pointer to Ethernet structure.
  *
  * @return
- *   fd on success, negative errno value on failure.
+ *   fd on success, negative errno value otherwise and rte_errno is set.
  */
 int
-priv_socket_connect(struct priv *priv)
+mlx5_socket_connect(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct sockaddr_un sun = {
                .sun_family = AF_UNIX,
        };
-       int socket_fd;
+       int socket_fd = -1;
        int *fd = NULL;
        int ret;
        struct ucred *cred;
@@ -238,57 +254,75 @@ priv_socket_connect(struct priv *priv)
 
        ret = socket(AF_UNIX, SOCK_STREAM, 0);
        if (ret < 0) {
-               WARN("cannot connect to primary");
-               return ret;
+               rte_errno = errno;
+               DRV_LOG(WARNING, "port %u cannot connect to primary",
+                       dev->data->port_id);
+               goto error;
        }
        socket_fd = ret;
        snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
                 MLX5_DRIVER_NAME, priv->primary_socket);
        ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun));
        if (ret < 0) {
-               WARN("cannot connect to primary");
-               goto out;
+               rte_errno = errno;
+               DRV_LOG(WARNING, "port %u cannot connect to primary",
+                       dev->data->port_id);
+               goto error;
        }
        cmsg = CMSG_FIRSTHDR(&msg);
        if (cmsg == NULL) {
-               DEBUG("cannot get first message");
-               goto out;
+               rte_errno = EINVAL;
+               DRV_LOG(DEBUG, "port %u cannot get first message",
+                       dev->data->port_id);
+               goto error;
        }
        cmsg->cmsg_level = SOL_SOCKET;
        cmsg->cmsg_type = SCM_CREDENTIALS;
        cmsg->cmsg_len = CMSG_LEN(sizeof(*cred));
        cred = (struct ucred *)CMSG_DATA(cmsg);
        if (cred == NULL) {
-               DEBUG("no credentials received");
-               goto out;
+               rte_errno = EINVAL;
+               DRV_LOG(DEBUG, "port %u no credentials received",
+                       dev->data->port_id);
+               goto error;
        }
        cred->pid = getpid();
        cred->uid = getuid();
        cred->gid = getgid();
        ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT);
        if (ret < 0) {
-               WARN("cannot send credentials to primary: %s",
-                    strerror(errno));
-               goto out;
+               rte_errno = errno;
+               DRV_LOG(WARNING,
+                       "port %u cannot send credentials to primary: %s",
+                       dev->data->port_id, strerror(errno));
+               goto error;
        }
        ret = recvmsg(socket_fd, &msg, MSG_WAITALL);
        if (ret <= 0) {
-               WARN("no message from primary: %s", strerror(errno));
-               goto out;
+               rte_errno = errno;
+               DRV_LOG(WARNING, "port %u no message from primary: %s",
+                       dev->data->port_id, strerror(errno));
+               goto error;
        }
        cmsg = CMSG_FIRSTHDR(&msg);
        if (cmsg == NULL) {
-               WARN("No file descriptor received");
-               goto out;
+               rte_errno = EINVAL;
+               DRV_LOG(WARNING, "port %u no file descriptor received",
+                       dev->data->port_id);
+               goto error;
        }
        fd = (int *)CMSG_DATA(cmsg);
-       if (*fd <= 0) {
-               WARN("no file descriptor received: %s", strerror(errno));
-               ret = *fd;
-               goto out;
+       if (*fd < 0) {
+               DRV_LOG(WARNING, "port %u no file descriptor received: %s",
+                       dev->data->port_id, strerror(errno));
+               rte_errno = *fd;
+               goto error;
        }
        ret = *fd;
-out:
        close(socket_fd);
        return ret;
+error:
+       if (socket_fd != -1)
+               close(socket_fd);
+       return -rte_errno;
 }
index 2427585..345ed70 100644 (file)
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <inttypes.h>
 #include <linux/sockios.h>
 #include <linux/ethtool.h>
+#include <stdint.h>
+#include <stdio.h>
 
 #include <rte_ethdev.h>
 #include <rte_common.h>
@@ -47,6 +50,7 @@ struct mlx5_counter_ctrl {
        char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
        /* Name of the counter on the device table. */
        char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
+       uint32_t ib:1; /**< Nonzero for IB counters. */
 };
 
 static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
@@ -121,6 +125,7 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
        {
                .dpdk_name = "rx_out_of_buffer",
                .ctr_name = "out_of_buffer",
+               .ib = 1,
        },
 };
 
@@ -129,39 +134,56 @@ static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
 /**
  * Read device counters table.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[out] stats
  *   Counters table output buffer.
  *
  * @return
- *   0 on success and stats is filled, negative on error.
+ *   0 on success and stats is filled, negative errno value otherwise and
+ *   rte_errno is set.
  */
 static int
-priv_read_dev_counters(struct priv *priv, uint64_t *stats)
+mlx5_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
        unsigned int i;
        struct ifreq ifr;
        unsigned int stats_sz = xstats_ctrl->stats_n * sizeof(uint64_t);
        unsigned char et_stat_buf[sizeof(struct ethtool_stats) + stats_sz];
        struct ethtool_stats *et_stats = (struct ethtool_stats *)et_stat_buf;
+       int ret;
 
        et_stats->cmd = ETHTOOL_GSTATS;
        et_stats->n_stats = xstats_ctrl->stats_n;
        ifr.ifr_data = (caddr_t)et_stats;
-       if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
-               WARN("unable to read statistic values from device");
-               return -1;
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret) {
+               DRV_LOG(WARNING,
+                       "port %u unable to read statistic values from device",
+                       dev->data->port_id);
+               return ret;
        }
        for (i = 0; i != xstats_n; ++i) {
-               if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
-                       priv_get_cntr_sysfs(priv,
-                                           mlx5_counters_init[i].ctr_name,
-                                           &stats[i]);
-               else
+               if (mlx5_counters_init[i].ib) {
+                       FILE *file;
+                       MKSTR(path, "%s/ports/1/hw_counters/%s",
+                             priv->ibdev_path,
+                             mlx5_counters_init[i].ctr_name);
+
+                       file = fopen(path, "rb");
+                       if (file) {
+                               int n = fscanf(file, "%" SCNu64, &stats[i]);
+
+                               fclose(file);
+                               if (n != 1)
+                                       stats[i] = 0;
+                       }
+               } else {
                        stats[i] = (uint64_t)
                                et_stats->data[xstats_ctrl->dev_table_idx[i]];
+               }
        }
        return 0;
 }
@@ -169,22 +191,26 @@ priv_read_dev_counters(struct priv *priv, uint64_t *stats)
 /**
  * Query the number of statistics provided by ETHTOOL.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
  * @return
- *   Number of statistics on success, -1 on error.
+ *   Number of statistics on success, negative errno value otherwise and
+ *   rte_errno is set.
  */
 static int
-priv_ethtool_get_stats_n(struct priv *priv) {
+mlx5_ethtool_get_stats_n(struct rte_eth_dev *dev) {
        struct ethtool_drvinfo drvinfo;
        struct ifreq ifr;
+       int ret;
 
        drvinfo.cmd = ETHTOOL_GDRVINFO;
        ifr.ifr_data = (caddr_t)&drvinfo;
-       if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
-               WARN("unable to query number of statistics");
-               return -1;
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret) {
+               DRV_LOG(WARNING, "port %u unable to query number of statistics",
+                       dev->data->port_id);
+               return ret;
        }
        return drvinfo.n_stats;
 }
@@ -192,12 +218,13 @@ priv_ethtool_get_stats_n(struct priv *priv) {
 /**
  * Init the structures to read device counters.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  */
 void
-priv_xstats_init(struct priv *priv)
+mlx5_xstats_init(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
        unsigned int i;
        unsigned int j;
@@ -205,12 +232,15 @@ priv_xstats_init(struct priv *priv)
        struct ethtool_gstrings *strings = NULL;
        unsigned int dev_stats_n;
        unsigned int str_sz;
+       int ret;
 
-       dev_stats_n = priv_ethtool_get_stats_n(priv);
-       if (dev_stats_n < 1) {
-               WARN("no extended statistics available");
+       ret = mlx5_ethtool_get_stats_n(dev);
+       if (ret < 0) {
+               DRV_LOG(WARNING, "port %u no extended statistics available",
+                       dev->data->port_id);
                return;
        }
+       dev_stats_n = ret;
        xstats_ctrl->stats_n = dev_stats_n;
        /* Allocate memory to grab stat names and values. */
        str_sz = dev_stats_n * ETH_GSTRING_LEN;
@@ -218,15 +248,18 @@ priv_xstats_init(struct priv *priv)
                  rte_malloc("xstats_strings",
                             str_sz + sizeof(struct ethtool_gstrings), 0);
        if (!strings) {
-               WARN("unable to allocate memory for xstats");
+               DRV_LOG(WARNING, "port %u unable to allocate memory for xstats",
+                    dev->data->port_id);
                return;
        }
        strings->cmd = ETHTOOL_GSTRINGS;
        strings->string_set = ETH_SS_STATS;
        strings->len = dev_stats_n;
        ifr.ifr_data = (caddr_t)strings;
-       if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) {
-               WARN("unable to get statistic names");
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret) {
+               DRV_LOG(WARNING, "port %u unable to get statistic names",
+                       dev->data->port_id);
                goto free;
        }
        for (j = 0; j != xstats_n; ++j)
@@ -244,68 +277,67 @@ priv_xstats_init(struct priv *priv)
                }
        }
        for (j = 0; j != xstats_n; ++j) {
-               if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
+               if (mlx5_counters_init[j].ib)
                        continue;
                if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
-                       WARN("counter \"%s\" is not recognized",
-                            mlx5_counters_init[j].dpdk_name);
+                       DRV_LOG(WARNING,
+                               "port %u counter \"%s\" is not recognized",
+                               dev->data->port_id,
+                               mlx5_counters_init[j].dpdk_name);
                        goto free;
                }
        }
        /* Copy to base at first time. */
        assert(xstats_n <= MLX5_MAX_XSTATS);
-       priv_read_dev_counters(priv, xstats_ctrl->base);
+       ret = mlx5_read_dev_counters(dev, xstats_ctrl->base);
+       if (ret)
+               DRV_LOG(ERR, "port %u cannot read device counters: %s",
+                       dev->data->port_id, strerror(rte_errno));
 free:
        rte_free(strings);
 }
 
 /**
- * Get device extended statistics.
+ * DPDK callback to get extended device statistics.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param[out] stats
  *   Pointer to rte extended stats table.
+ * @param n
+ *   The size of the stats table.
  *
  * @return
  *   Number of extended stats on success and stats is filled,
- *   negative on error.
+ *   negative on error and rte_errno is set.
  */
-static int
-priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats)
+int
+mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats,
+               unsigned int n)
 {
-       struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
-       unsigned int n = xstats_n;
        uint64_t counters[n];
 
-       if (priv_read_dev_counters(priv, counters) < 0)
-               return -1;
-       for (i = 0; i != xstats_n; ++i) {
-               stats[i].id = i;
-               stats[i].value = (counters[i] - xstats_ctrl->base[i]);
-       }
-       return n;
-}
-
-/**
- * Reset device extended statistics.
- *
- * @param priv
- *   Pointer to private structure.
- */
-static void
-priv_xstats_reset(struct priv *priv)
-{
-       struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
-       unsigned int i;
-       unsigned int n = xstats_n;
-       uint64_t counters[n];
+       if (n >= xstats_n && stats) {
+               struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+               int stats_n;
+               int ret;
 
-       if (priv_read_dev_counters(priv, counters) < 0)
-               return;
-       for (i = 0; i != n; ++i)
-               xstats_ctrl->base[i] = counters[i];
+               stats_n = mlx5_ethtool_get_stats_n(dev);
+               if (stats_n < 0)
+                       return stats_n;
+               if (xstats_ctrl->stats_n != stats_n)
+                       mlx5_xstats_init(dev);
+               ret = mlx5_read_dev_counters(dev, counters);
+               if (ret)
+                       return ret;
+               for (i = 0; i != xstats_n; ++i) {
+                       stats[i].id = i;
+                       stats[i].value = (counters[i] - xstats_ctrl->base[i]);
+               }
+       }
+       return xstats_n;
 }
 
 /**
@@ -315,16 +347,19 @@ priv_xstats_reset(struct priv *priv)
  *   Pointer to Ethernet device structure.
  * @param[out] stats
  *   Stats structure output buffer.
+ *
+ * @return
+ *   0 on success and stats is filled, negative errno value otherwise and
+ *   rte_errno is set.
  */
 int
 mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-       struct priv *priv = mlx5_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        struct rte_eth_stats tmp = {0};
        unsigned int i;
        unsigned int idx;
 
-       priv_lock(priv);
        /* Add software counters. */
        for (i = 0; (i != priv->rxqs_n); ++i) {
                struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
@@ -370,7 +405,6 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
        /* FIXME: retrieve and add hardware counters. */
 #endif
        *stats = tmp;
-       priv_unlock(priv);
        return 0;
 }
 
@@ -387,7 +421,6 @@ mlx5_stats_reset(struct rte_eth_dev *dev)
        unsigned int i;
        unsigned int idx;
 
-       priv_lock(priv);
        for (i = 0; (i != priv->rxqs_n); ++i) {
                if ((*priv->rxqs)[i] == NULL)
                        continue;
@@ -405,45 +438,6 @@ mlx5_stats_reset(struct rte_eth_dev *dev)
 #ifndef MLX5_PMD_SOFT_COUNTERS
        /* FIXME: reset hardware counters. */
 #endif
-       priv_unlock(priv);
-}
-
-/**
- * DPDK callback to get extended device statistics.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param[out] stats
- *   Stats table output buffer.
- * @param n
- *   The size of the stats table.
- *
- * @return
- *   Number of xstats on success, negative on failure.
- */
-int
-mlx5_xstats_get(struct rte_eth_dev *dev,
-               struct rte_eth_xstat *stats, unsigned int n)
-{
-       struct priv *priv = mlx5_get_priv(dev);
-       int ret = xstats_n;
-
-       if (n >= xstats_n && stats) {
-               struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
-               int stats_n;
-
-               priv_lock(priv);
-               stats_n = priv_ethtool_get_stats_n(priv);
-               if (stats_n < 0) {
-                       priv_unlock(priv);
-                       return -1;
-               }
-               if (xstats_ctrl->stats_n != stats_n)
-                       priv_xstats_init(priv);
-               ret = priv_xstats_get(priv, stats);
-               priv_unlock(priv);
-       }
-       return ret;
 }
 
 /**
@@ -455,19 +449,30 @@ mlx5_xstats_get(struct rte_eth_dev *dev,
 void
 mlx5_xstats_reset(struct rte_eth_dev *dev)
 {
-       struct priv *priv = mlx5_get_priv(dev);
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
        int stats_n;
+       unsigned int i;
+       unsigned int n = xstats_n;
+       uint64_t counters[n];
+       int ret;
 
-       priv_lock(priv);
-       stats_n = priv_ethtool_get_stats_n(priv);
-       if (stats_n < 0)
-               goto unlock;
+       stats_n = mlx5_ethtool_get_stats_n(dev);
+       if (stats_n < 0) {
+               DRV_LOG(ERR, "port %u cannot get stats: %s", dev->data->port_id,
+                       strerror(-stats_n));
+               return;
+       }
        if (xstats_ctrl->stats_n != stats_n)
-               priv_xstats_init(priv);
-       priv_xstats_reset(priv);
-unlock:
-       priv_unlock(priv);
+               mlx5_xstats_init(dev);
+       ret = mlx5_read_dev_counters(dev, counters);
+       if (ret) {
+               DRV_LOG(ERR, "port %u cannot read device counters: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               return;
+       }
+       for (i = 0; i != n; ++i)
+               xstats_ctrl->base[i] = counters[i];
 }
 
 /**
@@ -484,21 +489,18 @@ unlock:
  *   Number of xstats names.
  */
 int
-mlx5_xstats_get_names(struct rte_eth_dev *dev,
-               struct rte_eth_xstat_name *xstats_names, unsigned int n)
+mlx5_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
+                     struct rte_eth_xstat_name *xstats_names, unsigned int n)
 {
-       struct priv *priv = mlx5_get_priv(dev);
        unsigned int i;
 
        if (n >= xstats_n && xstats_names) {
-               priv_lock(priv);
                for (i = 0; i != xstats_n; ++i) {
                        strncpy(xstats_names[i].name,
                                mlx5_counters_init[i].dpdk_name,
                                RTE_ETH_XSTATS_NAME_SIZE);
                        xstats_names[i].name[RTE_ETH_XSTATS_NAME_SIZE - 1] = 0;
                }
-               priv_unlock(priv);
        }
        return xstats_n;
 }
index d682ea2..214543f 100644 (file)
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
+/**
+ * Stop traffic on Tx queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
 static void
-priv_txq_stop(struct priv *priv)
+mlx5_txq_stop(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
 
        for (i = 0; i != priv->txqs_n; ++i)
-               mlx5_priv_txq_release(priv, i);
+               mlx5_txq_release(dev, i);
 }
 
+/**
+ * Start traffic on Tx queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-priv_txq_start(struct priv *priv)
+mlx5_txq_start(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
-       int ret = 0;
+       int ret;
 
        /* Add memory regions to Tx queues. */
        for (i = 0; i != priv->txqs_n; ++i) {
                unsigned int idx = 0;
                struct mlx5_mr *mr;
-               struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i);
+               struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
 
                if (!txq_ctrl)
                        continue;
                LIST_FOREACH(mr, &priv->mr, next) {
-                       priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++);
+                       mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mr->mp, idx++);
                        if (idx == MLX5_PMD_TX_MP_CACHE)
                                break;
                }
                txq_alloc_elts(txq_ctrl);
-               txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i);
+               txq_ctrl->ibv = mlx5_txq_ibv_new(dev, i);
                if (!txq_ctrl->ibv) {
-                       ret = ENOMEM;
+                       rte_errno = ENOMEM;
                        goto error;
                }
        }
-       return -ret;
+       ret = mlx5_tx_uar_remap(dev, priv->ctx->cmd_fd);
+       if (ret)
+               goto error;
+       return 0;
 error:
-       priv_txq_stop(priv);
-       return -ret;
+       ret = rte_errno; /* Save rte_errno before cleanup. */
+       mlx5_txq_stop(dev);
+       rte_errno = ret; /* Restore rte_errno. */
+       return -rte_errno;
 }
 
+/**
+ * Stop traffic on Rx queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
 static void
-priv_rxq_stop(struct priv *priv)
+mlx5_rxq_stop(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
 
        for (i = 0; i != priv->rxqs_n; ++i)
-               mlx5_priv_rxq_release(priv, i);
+               mlx5_rxq_release(dev, i);
 }
 
+/**
+ * Start traffic on Rx queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
 static int
-priv_rxq_start(struct priv *priv)
+mlx5_rxq_start(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
        int ret = 0;
 
        for (i = 0; i != priv->rxqs_n; ++i) {
-               struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_priv_rxq_get(priv, i);
+               struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
 
                if (!rxq_ctrl)
                        continue;
                ret = rxq_alloc_elts(rxq_ctrl);
                if (ret)
                        goto error;
-               rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, i);
-               if (!rxq_ctrl->ibv) {
-                       ret = ENOMEM;
+               rxq_ctrl->ibv = mlx5_rxq_ibv_new(dev, i);
+               if (!rxq_ctrl->ibv)
                        goto error;
-               }
        }
-       return -ret;
+       return 0;
 error:
-       priv_rxq_stop(priv);
-       return -ret;
+       ret = rte_errno; /* Save rte_errno before cleanup. */
+       mlx5_rxq_stop(dev);
+       rte_errno = ret; /* Restore rte_errno. */
+       return -rte_errno;
 }
 
 /**
@@ -126,68 +165,73 @@ error:
  *   Pointer to Ethernet device structure.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_dev_start(struct rte_eth_dev *dev)
 {
        struct priv *priv = dev->data->dev_private;
        struct mlx5_mr *mr = NULL;
-       int err;
+       int ret;
 
        dev->data->dev_started = 1;
-       priv_lock(priv);
-       err = priv_flow_create_drop_queue(priv);
-       if (err) {
-               ERROR("%p: Drop queue allocation failed: %s",
-                     (void *)dev, strerror(err));
+       ret = mlx5_flow_create_drop_queue(dev);
+       if (ret) {
+               DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
                goto error;
        }
-       DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
+       DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues",
+               dev->data->port_id);
        rte_mempool_walk(mlx5_mp2mr_iter, priv);
-       err = priv_txq_start(priv);
-       if (err) {
-               ERROR("%p: TXQ allocation failed: %s",
-                     (void *)dev, strerror(err));
+       ret = mlx5_txq_start(dev);
+       if (ret) {
+               DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               goto error;
+       }
+       ret = mlx5_rxq_start(dev);
+       if (ret) {
+               DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
                goto error;
        }
-       err = priv_rxq_start(priv);
-       if (err) {
-               ERROR("%p: RXQ allocation failed: %s",
-                     (void *)dev, strerror(err));
+       ret = mlx5_rx_intr_vec_enable(dev);
+       if (ret) {
+               DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
+                       dev->data->port_id);
                goto error;
        }
-       err = priv_rx_intr_vec_enable(priv);
-       if (err) {
-               ERROR("%p: RX interrupt vector creation failed",
-                     (void *)priv);
+       mlx5_xstats_init(dev);
+       ret = mlx5_traffic_enable(dev);
+       if (ret) {
+               DRV_LOG(DEBUG, "port %u failed to set defaults flows",
+                       dev->data->port_id);
                goto error;
        }
-       priv_xstats_init(priv);
-       /* Update link status and Tx/Rx callbacks for the first time. */
-       memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link));
-       INFO("Forcing port %u link to be up", dev->data->port_id);
-       err = priv_force_link_status_change(priv, ETH_LINK_UP);
-       if (err) {
-               DEBUG("Failed to set port %u link to be up",
-                     dev->data->port_id);
+       ret = mlx5_flow_start(dev, &priv->flows);
+       if (ret) {
+               DRV_LOG(DEBUG, "port %u failed to set flows",
+                       dev->data->port_id);
                goto error;
        }
-       priv_dev_interrupt_handler_install(priv, dev);
-       priv_unlock(priv);
+       dev->tx_pkt_burst = mlx5_select_tx_function(dev);
+       dev->rx_pkt_burst = mlx5_select_rx_function(dev);
+       mlx5_dev_interrupt_handler_install(dev);
        return 0;
 error:
+       ret = rte_errno; /* Save rte_errno before cleanup. */
        /* Rollback. */
        dev->data->dev_started = 0;
        for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
-               priv_mr_release(priv, mr);
-       priv_flow_stop(priv, &priv->flows);
-       priv_dev_traffic_disable(priv, dev);
-       priv_txq_stop(priv);
-       priv_rxq_stop(priv);
-       priv_flow_delete_drop_queue(priv);
-       priv_unlock(priv);
-       return err;
+               mlx5_mr_release(mr);
+       mlx5_flow_stop(dev, &priv->flows);
+       mlx5_traffic_disable(dev);
+       mlx5_txq_stop(dev);
+       mlx5_rxq_stop(dev);
+       mlx5_flow_delete_drop_queue(dev);
+       rte_errno = ret; /* Restore rte_errno. */
+       return -rte_errno;
 }
 
 /**
@@ -204,40 +248,40 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
        struct priv *priv = dev->data->dev_private;
        struct mlx5_mr *mr;
 
-       priv_lock(priv);
        dev->data->dev_started = 0;
        /* Prevent crashes when queues are still in use. */
        dev->rx_pkt_burst = removed_rx_burst;
        dev->tx_pkt_burst = removed_tx_burst;
        rte_wmb();
        usleep(1000 * priv->rxqs_n);
-       DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
-       priv_flow_stop(priv, &priv->flows);
-       priv_dev_traffic_disable(priv, dev);
-       priv_rx_intr_vec_disable(priv);
-       priv_dev_interrupt_handler_uninstall(priv, dev);
-       priv_txq_stop(priv);
-       priv_rxq_stop(priv);
+       DRV_LOG(DEBUG, "port %u cleaning up and destroying hash Rx queues",
+               dev->data->port_id);
+       mlx5_flow_stop(dev, &priv->flows);
+       mlx5_traffic_disable(dev);
+       mlx5_rx_intr_vec_disable(dev);
+       mlx5_dev_interrupt_handler_uninstall(dev);
+       mlx5_txq_stop(dev);
+       mlx5_rxq_stop(dev);
        for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
-               priv_mr_release(priv, mr);
-       priv_flow_delete_drop_queue(priv);
-       priv_unlock(priv);
+               mlx5_mr_release(mr);
+       mlx5_flow_delete_drop_queue(dev);
 }
 
 /**
  * Enable traffic flows configured by control plane
  *
- * @param priv
+ * @param dev
  *   Pointer to Ethernet device private data.
  * @param dev
  *   Pointer to Ethernet device structure.
  *
  * @return
- *   0 on success.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
+mlx5_traffic_enable(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct rte_flow_item_eth bcast = {
                .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
        };
@@ -270,8 +314,9 @@ priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
                        .type = 0,
                };
 
-               claim_zero(mlx5_ctrl_flow(dev, &promisc, &promisc));
-               return 0;
+               ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
+               if (ret)
+                       goto error;
        }
        if (dev->data->all_multicast) {
                struct rte_flow_item_eth multicast = {
@@ -280,7 +325,9 @@ priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
                        .type = 0,
                };
 
-               claim_zero(mlx5_ctrl_flow(dev, &multicast, &multicast));
+               ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
+               if (ret)
+                       goto error;
        } else {
                /* Add broadcast/multicast flows. */
                for (i = 0; i != vlan_filter_n; ++i) {
@@ -340,74 +387,49 @@ priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
                                goto error;
                }
                if (!vlan_filter_n) {
-                       ret = mlx5_ctrl_flow(dev, &unicast,
-                                            &unicast_mask);
+                       ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
                        if (ret)
                                goto error;
                }
        }
        return 0;
 error:
-       return rte_errno;
+       ret = rte_errno; /* Save rte_errno before cleanup. */
+       mlx5_flow_list_flush(dev, &priv->ctrl_flows);
+       rte_errno = ret; /* Restore rte_errno. */
+       return -rte_errno;
 }
 
 
 /**
  * Disable traffic flows configured by control plane
  *
- * @param priv
- *   Pointer to Ethernet device private data.
  * @param dev
- *   Pointer to Ethernet device structure.
- *
- * @return
- *   0 on success.
- */
-int
-priv_dev_traffic_disable(struct priv *priv, struct rte_eth_dev *dev)
-{
-       (void)dev;
-       priv_flow_flush(priv, &priv->ctrl_flows);
-       return 0;
-}
-
-/**
- * Restart traffic flows configured by control plane
- *
- * @param priv
  *   Pointer to Ethernet device private data.
- * @param dev
- *   Pointer to Ethernet device structure.
- *
- * @return
- *   0 on success.
  */
-int
-priv_dev_traffic_restart(struct priv *priv, struct rte_eth_dev *dev)
+void
+mlx5_traffic_disable(struct rte_eth_dev *dev)
 {
-       if (dev->data->dev_started) {
-               priv_dev_traffic_disable(priv, dev);
-               priv_dev_traffic_enable(priv, dev);
-       }
-       return 0;
+       struct priv *priv = dev->data->dev_private;
+
+       mlx5_flow_list_flush(dev, &priv->ctrl_flows);
 }
 
 /**
  * Restart traffic flows configured by control plane
  *
  * @param dev
- *   Pointer to Ethernet device structure.
+ *   Pointer to Ethernet device private data.
  *
  * @return
- *   0 on success.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_traffic_restart(struct rte_eth_dev *dev)
 {
-       struct priv *priv = dev->data->dev_private;
-
-       priv_lock(priv);
-       priv_dev_traffic_restart(priv, dev);
-       priv_unlock(priv);
+       if (dev->data->dev_started) {
+               mlx5_traffic_disable(dev);
+               return mlx5_traffic_enable(dev);
+       }
        return 0;
 }
index 7ca99f5..a5c6b58 100644 (file)
@@ -74,7 +74,8 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
 
        for (i = 0; (i != elts_n); ++i)
                (*txq_ctrl->txq.elts)[i] = NULL;
-       DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n);
+       DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs",
+               PORT_ID(txq_ctrl->priv), txq_ctrl->idx, elts_n);
        txq_ctrl->txq.elts_head = 0;
        txq_ctrl->txq.elts_tail = 0;
        txq_ctrl->txq.elts_comp = 0;
@@ -95,7 +96,8 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
        uint16_t elts_tail = txq_ctrl->txq.elts_tail;
        struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts;
 
-       DEBUG("%p: freeing WRs", (void *)txq_ctrl);
+       DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs",
+               PORT_ID(txq_ctrl->priv), txq_ctrl->idx);
        txq_ctrl->txq.elts_head = 0;
        txq_ctrl->txq.elts_tail = 0;
        txq_ctrl->txq.elts_comp = 0;
@@ -130,7 +132,7 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
  *   Thresholds parameters.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
@@ -140,50 +142,47 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        struct mlx5_txq_data *txq = (*priv->txqs)[idx];
        struct mlx5_txq_ctrl *txq_ctrl =
                container_of(txq, struct mlx5_txq_ctrl, txq);
-       int ret = 0;
 
-       priv_lock(priv);
        if (desc <= MLX5_TX_COMP_THRESH) {
-               WARN("%p: number of descriptors requested for TX queue %u"
-                    " must be higher than MLX5_TX_COMP_THRESH, using"
-                    " %u instead of %u",
-                    (void *)dev, idx, MLX5_TX_COMP_THRESH + 1, desc);
+               DRV_LOG(WARNING,
+                       "port %u number of descriptors requested for Tx queue"
+                       " %u must be higher than MLX5_TX_COMP_THRESH, using %u"
+                       " instead of %u",
+                       dev->data->port_id, idx, MLX5_TX_COMP_THRESH + 1, desc);
                desc = MLX5_TX_COMP_THRESH + 1;
        }
        if (!rte_is_power_of_2(desc)) {
                desc = 1 << log2above(desc);
-               WARN("%p: increased number of descriptors in TX queue %u"
-                    " to the next power of two (%d)",
-                    (void *)dev, idx, desc);
+               DRV_LOG(WARNING,
+                       "port %u increased number of descriptors in Tx queue"
+                       " %u to the next power of two (%d)",
+                       dev->data->port_id, idx, desc);
        }
-       DEBUG("%p: configuring queue %u for %u descriptors",
-             (void *)dev, idx, desc);
+       DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors",
+               dev->data->port_id, idx, desc);
        if (idx >= priv->txqs_n) {
-               ERROR("%p: queue index out of range (%u >= %u)",
-                     (void *)dev, idx, priv->txqs_n);
-               priv_unlock(priv);
-               return -EOVERFLOW;
+               DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)",
+                       dev->data->port_id, idx, priv->txqs_n);
+               rte_errno = EOVERFLOW;
+               return -rte_errno;
        }
-       if (!mlx5_priv_txq_releasable(priv, idx)) {
-               ret = EBUSY;
-               ERROR("%p: unable to release queue index %u",
-                     (void *)dev, idx);
-               goto out;
+       if (!mlx5_txq_releasable(dev, idx)) {
+               rte_errno = EBUSY;
+               DRV_LOG(ERR, "port %u unable to release queue index %u",
+                       dev->data->port_id, idx);
+               return -rte_errno;
        }
-       mlx5_priv_txq_release(priv, idx);
-       txq_ctrl = mlx5_priv_txq_new(priv, idx, desc, socket, conf);
+       mlx5_txq_release(dev, idx);
+       txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf);
        if (!txq_ctrl) {
-               ERROR("%p: unable to allocate queue index %u",
-                     (void *)dev, idx);
-               ret = ENOMEM;
-               goto out;
+               DRV_LOG(ERR, "port %u unable to allocate queue index %u",
+                       dev->data->port_id, idx);
+               return -rte_errno;
        }
-       DEBUG("%p: adding TX queue %p to list",
-             (void *)dev, (void *)txq_ctrl);
+       DRV_LOG(DEBUG, "port %u adding Tx queue %u to list",
+               dev->data->port_id, idx);
        (*priv->txqs)[idx] = &txq_ctrl->txq;
-out:
-       priv_unlock(priv);
-       return -ret;
+       return 0;
 }
 
 /**
@@ -204,37 +203,40 @@ mlx5_tx_queue_release(void *dpdk_txq)
                return;
        txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
        priv = txq_ctrl->priv;
-       priv_lock(priv);
        for (i = 0; (i != priv->txqs_n); ++i)
                if ((*priv->txqs)[i] == txq) {
-                       DEBUG("%p: removing TX queue %p from list",
-                             (void *)priv->dev, (void *)txq_ctrl);
-                       mlx5_priv_txq_release(priv, i);
+                       mlx5_txq_release(ETH_DEV(priv), i);
+                       DRV_LOG(DEBUG, "port %u removing Tx queue %u from list",
+                               PORT_ID(priv), txq_ctrl->idx);
                        break;
                }
-       priv_unlock(priv);
 }
 
 
 /**
- * Map locally UAR used in Tx queues for BlueFlame doorbell.
+ * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
+ * Both primary and secondary process do mmap to make UAR address
+ * aligned.
  *
- * @param[in] priv
- *   Pointer to private structure.
+ * @param[in] dev
+ *   Pointer to Ethernet device.
  * @param fd
  *   Verbs file descriptor to map UAR pages.
  *
  * @return
- *   0 on success, errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-priv_tx_uar_remap(struct priv *priv, int fd)
+mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int i, j;
        uintptr_t pages[priv->txqs_n];
        unsigned int pages_n = 0;
        uintptr_t uar_va;
+       uintptr_t off;
        void *addr;
+       void *ret;
        struct mlx5_txq_data *txq;
        struct mlx5_txq_ctrl *txq_ctrl;
        int already_mapped;
@@ -251,8 +253,11 @@ priv_tx_uar_remap(struct priv *priv, int fd)
                        continue;
                txq = (*priv->txqs)[i];
                txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
-               uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
-               uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
+               assert(txq_ctrl->idx == (uint16_t)i);
+               /* UAR addr form verbs used to find dup and offset in page. */
+               uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
+               off = uar_va & (page_size - 1); /* offset in page. */
+               uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */
                already_mapped = 0;
                for (j = 0; j != pages_n; ++j) {
                        if (pages[j] == uar_va) {
@@ -260,16 +265,32 @@ priv_tx_uar_remap(struct priv *priv, int fd)
                                break;
                        }
                }
-               if (already_mapped)
-                       continue;
-               pages[pages_n++] = uar_va;
-               addr = mmap((void *)uar_va, page_size,
-                           PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
-                           txq_ctrl->uar_mmap_offset);
-               if (addr != (void *)uar_va) {
-                       ERROR("call to mmap failed on UAR for txq %d\n", i);
-                       return -1;
+               /* new address in reserved UAR address space. */
+               addr = RTE_PTR_ADD(priv->uar_base,
+                                  uar_va & (MLX5_UAR_SIZE - 1));
+               if (!already_mapped) {
+                       pages[pages_n++] = uar_va;
+                       /* fixed mmap to specified address in reserved
+                        * address space.
+                        */
+                       ret = mmap(addr, page_size,
+                                  PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+                                  txq_ctrl->uar_mmap_offset);
+                       if (ret != addr) {
+                               /* fixed mmap have to return same address */
+                               DRV_LOG(ERR,
+                                       "port %u call to mmap failed on UAR"
+                                       " for txq %u",
+                                       dev->data->port_id, txq_ctrl->idx);
+                               rte_errno = ENXIO;
+                               return -rte_errno;
+                       }
                }
+               if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */
+                       txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off);
+               else
+                       assert(txq_ctrl->txq.bf_reg ==
+                              RTE_PTR_ADD((void *)addr, off));
        }
        return 0;
 }
@@ -277,17 +298,18 @@ priv_tx_uar_remap(struct priv *priv, int fd)
 /**
  * Create the Tx queue Verbs object.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   Queue index in DPDK Rx queue array
  *
  * @return
- *   The Verbs object initialised if it can be created.
+ *   The Verbs object initialised, NULL otherwise and rte_errno is set.
  */
-struct mlx5_txq_ibv*
-mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
+struct mlx5_txq_ibv *
+mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
        struct mlx5_txq_ctrl *txq_ctrl =
                container_of(txq_data, struct mlx5_txq_ctrl, txq);
@@ -307,9 +329,14 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
        int ret = 0;
 
        assert(txq_data);
+       priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE;
+       priv->verbs_alloc_ctx.obj = txq_ctrl;
        if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
-               ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
-               goto error;
+               DRV_LOG(ERR,
+                       "port %u MLX5_ENABLE_CQE_COMPRESSION must never be set",
+                       dev->data->port_id);
+               rte_errno = EINVAL;
+               return NULL;
        }
        memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv));
        /* MRs will be registered in mp2mr[] later. */
@@ -322,7 +349,9 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
                cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
        tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, NULL, 0);
        if (tmpl.cq == NULL) {
-               ERROR("%p: CQ creation failure", (void *)txq_ctrl);
+               DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure",
+                       dev->data->port_id, idx);
+               rte_errno = errno;
                goto error;
        }
        attr.init = (struct ibv_qp_init_attr_ex){
@@ -363,7 +392,9 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
        }
        tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init);
        if (tmpl.qp == NULL) {
-               ERROR("%p: QP creation failure", (void *)txq_ctrl);
+               DRV_LOG(ERR, "port %u Tx queue %u QP creation failure",
+                       dev->data->port_id, idx);
+               rte_errno = errno;
                goto error;
        }
        attr.mod = (struct ibv_qp_attr){
@@ -374,7 +405,10 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
        };
        ret = ibv_modify_qp(tmpl.qp, &attr.mod, (IBV_QP_STATE | IBV_QP_PORT));
        if (ret) {
-               ERROR("%p: QP state to IBV_QPS_INIT failed", (void *)txq_ctrl);
+               DRV_LOG(ERR,
+                       "port %u Tx queue %u QP state to IBV_QPS_INIT failed",
+                       dev->data->port_id, idx);
+               rte_errno = errno;
                goto error;
        }
        attr.mod = (struct ibv_qp_attr){
@@ -382,19 +416,27 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
        };
        ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
        if (ret) {
-               ERROR("%p: QP state to IBV_QPS_RTR failed", (void *)txq_ctrl);
+               DRV_LOG(ERR,
+                       "port %u Tx queue %u QP state to IBV_QPS_RTR failed",
+                       dev->data->port_id, idx);
+               rte_errno = errno;
                goto error;
        }
        attr.mod.qp_state = IBV_QPS_RTS;
        ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
        if (ret) {
-               ERROR("%p: QP state to IBV_QPS_RTS failed", (void *)txq_ctrl);
+               DRV_LOG(ERR,
+                       "port %u Tx queue %u QP state to IBV_QPS_RTS failed",
+                       dev->data->port_id, idx);
+               rte_errno = errno;
                goto error;
        }
        txq_ibv = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_ibv), 0,
                                    txq_ctrl->socket);
        if (!txq_ibv) {
-               ERROR("%p: cannot allocate memory", (void *)txq_ctrl);
+               DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory",
+                       dev->data->port_id, idx);
+               rte_errno = ENOMEM;
                goto error;
        }
        obj.cq.in = tmpl.cq;
@@ -402,11 +444,16 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
        obj.qp.in = tmpl.qp;
        obj.qp.out = &qp;
        ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
-       if (ret != 0)
+       if (ret != 0) {
+               rte_errno = errno;
                goto error;
+       }
        if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
-               ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
-                     "it should be set to %u", RTE_CACHE_LINE_SIZE);
+               DRV_LOG(ERR,
+                       "port %u wrong MLX5_CQE_SIZE environment variable"
+                       " value: it should be set to %u",
+                       dev->data->port_id, RTE_CACHE_LINE_SIZE);
+               rte_errno = EINVAL;
                goto error;
        }
        txq_data->cqe_n = log2above(cq_info.cqe_cnt);
@@ -414,7 +461,7 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
        txq_data->wqes = qp.sq.buf;
        txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
        txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
-       txq_data->bf_reg = qp.bf.reg;
+       txq_ctrl->bf_reg_orig = qp.bf.reg;
        txq_data->cq_db = cq_info.dbrec;
        txq_data->cqes =
                (volatile struct mlx5_cqe (*)[])
@@ -429,35 +476,45 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
        if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
                txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
        } else {
-               ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
+               DRV_LOG(ERR,
+                       "port %u failed to retrieve UAR info, invalid"
+                       " libmlx5.so",
+                       dev->data->port_id);
+               rte_errno = EINVAL;
                goto error;
        }
-       DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
-             (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+       DRV_LOG(DEBUG, "port %u Verbs Tx queue %u: refcnt %d",
+               dev->data->port_id, idx, rte_atomic32_read(&txq_ibv->refcnt));
        LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next);
+       txq_ibv->txq_ctrl = txq_ctrl;
+       priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
        return txq_ibv;
 error:
+       ret = rte_errno; /* Save rte_errno before cleanup. */
        if (tmpl.cq)
                claim_zero(ibv_destroy_cq(tmpl.cq));
        if (tmpl.qp)
                claim_zero(ibv_destroy_qp(tmpl.qp));
+       priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
+       rte_errno = ret; /* Restore rte_errno. */
        return NULL;
 }
 
 /**
  * Get an Tx queue Verbs object.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   Queue index in DPDK Rx queue array
  *
  * @return
  *   The Verbs object if it exists.
  */
-struct mlx5_txq_ibv*
-mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx)
+struct mlx5_txq_ibv *
+mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_txq_ctrl *txq_ctrl;
 
        if (idx >= priv->txqs_n)
@@ -467,8 +524,8 @@ mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx)
        txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
        if (txq_ctrl->ibv) {
                rte_atomic32_inc(&txq_ctrl->ibv->refcnt);
-               DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
-                     (void *)txq_ctrl->ibv,
+               DRV_LOG(DEBUG, "port %u Verbs Tx queue %u: refcnt %d",
+                       dev->data->port_id, txq_ctrl->idx,
                      rte_atomic32_read(&txq_ctrl->ibv->refcnt));
        }
        return txq_ctrl->ibv;
@@ -477,21 +534,19 @@ mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx)
 /**
  * Release an Tx verbs queue object.
  *
- * @param priv
- *   Pointer to private structure.
  * @param txq_ibv
  *   Verbs Tx queue object.
  *
  * @return
- *   0 on success, errno on failure.
+ *   1 while a reference on it exists, 0 when freed.
  */
 int
-mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv)
 {
-       (void)priv;
        assert(txq_ibv);
-       DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
-             (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+       DRV_LOG(DEBUG, "port %u Verbs Tx queue %u: refcnt %d",
+               PORT_ID(txq_ibv->txq_ctrl->priv),
+               txq_ibv->txq_ctrl->idx, rte_atomic32_read(&txq_ibv->refcnt));
        if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
                claim_zero(ibv_destroy_qp(txq_ibv->qp));
                claim_zero(ibv_destroy_cq(txq_ibv->cq));
@@ -499,21 +554,18 @@ mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
                rte_free(txq_ibv);
                return 0;
        }
-       return EBUSY;
+       return 1;
 }
 
 /**
  * Return true if a single reference exists on the object.
  *
- * @param priv
- *   Pointer to private structure.
  * @param txq_ibv
  *   Verbs Tx queue object.
  */
 int
-mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+mlx5_txq_ibv_releasable(struct mlx5_txq_ibv *txq_ibv)
 {
-       (void)priv;
        assert(txq_ibv);
        return (rte_atomic32_read(&txq_ibv->refcnt) == 1);
 }
@@ -521,20 +573,22 @@ mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
 /**
  * Verify the Verbs Tx queue list is empty
  *
- * @param priv
- *  Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
- * @return the number of object not released.
+ * @return
+ *   The number of object not released.
  */
 int
-mlx5_priv_txq_ibv_verify(struct priv *priv)
+mlx5_txq_ibv_verify(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        int ret = 0;
        struct mlx5_txq_ibv *txq_ibv;
 
        LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
-               DEBUG("%p: Verbs Tx queue %p still referenced", (void *)priv,
-                     (void *)txq_ibv);
+               DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced",
+                       dev->data->port_id, txq_ibv->txq_ctrl->idx);
                ++ret;
        }
        return ret;
@@ -543,8 +597,8 @@ mlx5_priv_txq_ibv_verify(struct priv *priv)
 /**
  * Create a DPDK Tx queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   TX queue index.
  * @param desc
@@ -555,13 +609,13 @@ mlx5_priv_txq_ibv_verify(struct priv *priv)
  *  Thresholds parameters.
  *
  * @return
- *   A DPDK queue object on success.
+ *   A DPDK queue object on success, NULL otherwise and rte_errno is set.
  */
-struct mlx5_txq_ctrl*
-mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
-                 unsigned int socket,
-                 const struct rte_eth_txconf *conf)
+struct mlx5_txq_ctrl *
+mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+            unsigned int socket, const struct rte_eth_txconf *conf)
 {
+       struct priv *priv = dev->data->dev_private;
        const unsigned int max_tso_inline =
                ((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) /
                 RTE_CACHE_LINE_SIZE);
@@ -571,20 +625,23 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
                                 sizeof(*tmpl) +
                                 desc * sizeof(struct rte_mbuf *),
                                 0, socket);
-       if (!tmpl)
+       if (!tmpl) {
+               rte_errno = ENOMEM;
                return NULL;
+       }
        assert(desc > MLX5_TX_COMP_THRESH);
        tmpl->txq.flags = conf->txq_flags;
        tmpl->priv = priv;
        tmpl->socket = socket;
        tmpl->txq.elts_n = log2above(desc);
+       tmpl->idx = idx;
        if (priv->mps == MLX5_MPW_ENHANCED)
                tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
        /* MRs will be registered in mp2mr[] later. */
-       DEBUG("priv->device_attr.max_qp_wr is %d",
-             priv->device_attr.orig_attr.max_qp_wr);
-       DEBUG("priv->device_attr.max_sge is %d",
-             priv->device_attr.orig_attr.max_sge);
+       DRV_LOG(DEBUG, "port %u priv->device_attr.max_qp_wr is %d",
+               dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr);
+       DRV_LOG(DEBUG, "port %u priv->device_attr.max_sge is %d",
+               dev->data->port_id, priv->device_attr.orig_attr.max_sge);
        if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
                unsigned int ds_cnt;
 
@@ -605,17 +662,6 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
                                          priv->inline_max_packet_sz) +
                                  (RTE_CACHE_LINE_SIZE - 1)) /
                                 RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
-               } else if (priv->tso) {
-                       int inline_diff = tmpl->txq.max_inline - max_tso_inline;
-
-                       /*
-                        * Adjust inline value as Verbs aggregates
-                        * tso_inline and txq_inline fields.
-                        */
-                       tmpl->max_inline_data = inline_diff > 0 ?
-                                              inline_diff *
-                                              RTE_CACHE_LINE_SIZE :
-                                              0;
                } else {
                        tmpl->max_inline_data =
                                tmpl->txq.max_inline * RTE_CACHE_LINE_SIZE;
@@ -635,9 +681,10 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
 
                        max_inline = max_inline - (max_inline %
                                                   RTE_CACHE_LINE_SIZE);
-                       WARN("txq inline is too large (%d) setting it to "
-                            "the maximum possible: %d\n",
-                            priv->txq_inline, max_inline);
+                       DRV_LOG(WARNING,
+                               "port %u txq inline is too large (%d) setting it"
+                               " to the maximum possible: %d\n",
+                               PORT_ID(priv), priv->txq_inline, max_inline);
                        tmpl->txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
                }
        }
@@ -653,8 +700,8 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
                (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
        tmpl->txq.stats.idx = idx;
        rte_atomic32_inc(&tmpl->refcnt);
-       DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
-             (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+       DRV_LOG(DEBUG, "port %u Tx queue %u: refcnt %d", dev->data->port_id,
+               idx, rte_atomic32_read(&tmpl->refcnt));
        LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
        return tmpl;
 }
@@ -662,17 +709,18 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
 /**
  * Get a Tx queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   TX queue index.
  *
  * @return
  *   A pointer to the queue if it exists.
  */
-struct mlx5_txq_ctrl*
-mlx5_priv_txq_get(struct priv *priv, uint16_t idx)
+struct mlx5_txq_ctrl *
+mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_txq_ctrl *ctrl = NULL;
 
        if ((*priv->txqs)[idx]) {
@@ -680,19 +728,17 @@ mlx5_priv_txq_get(struct priv *priv, uint16_t idx)
                                    txq);
                unsigned int i;
 
-               mlx5_priv_txq_ibv_get(priv, idx);
+               mlx5_txq_ibv_get(dev, idx);
                for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
-                       struct mlx5_mr *mr = NULL;
-
-                       (void)mr;
-                       if (ctrl->txq.mp2mr[i]) {
-                               mr = priv_mr_get(priv, ctrl->txq.mp2mr[i]->mp);
-                               assert(mr);
-                       }
+                       if (ctrl->txq.mp2mr[i])
+                               claim_nonzero
+                                       (mlx5_mr_get(dev,
+                                                    ctrl->txq.mp2mr[i]->mp));
                }
                rte_atomic32_inc(&ctrl->refcnt);
-               DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
-                     (void *)ctrl, rte_atomic32_read(&ctrl->refcnt));
+               DRV_LOG(DEBUG, "port %u Tx queue %u refcnt %d",
+                       dev->data->port_id,
+                       ctrl->idx, rte_atomic32_read(&ctrl->refcnt));
        }
        return ctrl;
 }
@@ -700,38 +746,38 @@ mlx5_priv_txq_get(struct priv *priv, uint16_t idx)
 /**
  * Release a Tx queue.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   TX queue index.
  *
  * @return
- *   0 on success, errno on failure.
+ *   1 while a reference on it exists, 0 when freed.
  */
 int
-mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
+mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        unsigned int i;
        struct mlx5_txq_ctrl *txq;
+       size_t page_size = sysconf(_SC_PAGESIZE);
 
        if (!(*priv->txqs)[idx])
                return 0;
        txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
-       DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
-             (void *)txq, rte_atomic32_read(&txq->refcnt));
-       if (txq->ibv) {
-               int ret;
-
-               ret = mlx5_priv_txq_ibv_release(priv, txq->ibv);
-               if (!ret)
-                       txq->ibv = NULL;
-       }
+       DRV_LOG(DEBUG, "port %u Tx queue %u: refcnt %d", dev->data->port_id,
+               txq->idx, rte_atomic32_read(&txq->refcnt));
+       if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
+               txq->ibv = NULL;
        for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
                if (txq->txq.mp2mr[i]) {
-                       priv_mr_release(priv, txq->txq.mp2mr[i]);
+                       mlx5_mr_release(txq->txq.mp2mr[i]);
                        txq->txq.mp2mr[i] = NULL;
                }
        }
+       if (priv->uar_base)
+               munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg,
+                      page_size), page_size);
        if (rte_atomic32_dec_and_test(&txq->refcnt)) {
                txq_free_elts(txq);
                LIST_REMOVE(txq, next);
@@ -739,14 +785,14 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
                (*priv->txqs)[idx] = NULL;
                return 0;
        }
-       return EBUSY;
+       return 1;
 }
 
 /**
  * Verify if the queue can be released.
  *
- * @param priv
- *   Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param idx
  *   TX queue index.
  *
@@ -754,8 +800,9 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
  *   1 if the queue can be released.
  */
 int
-mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx)
+mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_txq_ctrl *txq;
 
        if (!(*priv->txqs)[idx])
@@ -767,20 +814,22 @@ mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx)
 /**
  * Verify the Tx Queue list is empty
  *
- * @param priv
- *  Pointer to private structure.
+ * @param dev
+ *   Pointer to Ethernet device.
  *
- * @return the number of object not released.
+ * @return
+ *   The number of object not released.
  */
 int
-mlx5_priv_txq_verify(struct priv *priv)
+mlx5_txq_verify(struct rte_eth_dev *dev)
 {
+       struct priv *priv = dev->data->dev_private;
        struct mlx5_txq_ctrl *txq;
        int ret = 0;
 
        LIST_FOREACH(txq, &priv->txqsctrl, next) {
-               DEBUG("%p: Tx Queue %p still referenced", (void *)priv,
-                     (void *)txq);
+               DRV_LOG(DEBUG, "port %u Tx queue %u still referenced",
+                       dev->data->port_id, txq->idx);
                ++ret;
        }
        return ret;
index 2fbd10b..e48352f 100644 (file)
@@ -89,14 +89,21 @@ pmd_drv_log_basename(const char *s)
        return s;
 }
 
+extern int mlx5_logtype;
+
+#define PMD_DRV_LOG___(level, ...) \
+       rte_log(RTE_LOG_ ## level, \
+               mlx5_logtype, \
+               RTE_FMT(MLX5_DRIVER_NAME ": " \
+                       RTE_FMT_HEAD(__VA_ARGS__,), \
+               RTE_FMT_TAIL(__VA_ARGS__,)))
+
 /*
  * When debugging is enabled (NDEBUG not defined), file, line and function
  * information replace the driver name (MLX5_DRIVER_NAME) in log messages.
  */
 #ifndef NDEBUG
 
-#define PMD_DRV_LOG___(level, ...) \
-       ERRNO_SAFE(RTE_LOG(level, PMD, __VA_ARGS__))
 #define PMD_DRV_LOG__(level, ...) \
        PMD_DRV_LOG___(level, "%s:%u: %s(): " __VA_ARGS__)
 #define PMD_DRV_LOG_(level, s, ...) \
@@ -108,9 +115,6 @@ pmd_drv_log_basename(const char *s)
                __VA_ARGS__)
 
 #else /* NDEBUG */
-
-#define PMD_DRV_LOG___(level, ...) \
-       ERRNO_SAFE(RTE_LOG(level, PMD, MLX5_DRIVER_NAME ": " __VA_ARGS__))
 #define PMD_DRV_LOG__(level, ...) \
        PMD_DRV_LOG___(level, __VA_ARGS__)
 #define PMD_DRV_LOG_(level, s, ...) \
@@ -119,18 +123,15 @@ pmd_drv_log_basename(const char *s)
 #endif /* NDEBUG */
 
 /* Generic printf()-like logging macro with automatic line feed. */
-#define PMD_DRV_LOG(level, ...) \
+#define DRV_LOG(level, ...) \
        PMD_DRV_LOG_(level, \
                __VA_ARGS__ PMD_DRV_LOG_STRIP PMD_DRV_LOG_OPAREN, \
                PMD_DRV_LOG_CPAREN)
 
-/*
- * Like assert(), DEBUG() becomes a no-op and claim_zero() does not perform
- * any check when debugging is disabled.
- */
+/* claim_zero() does not perform any check when debugging is disabled. */
 #ifndef NDEBUG
 
-#define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__)
+#define DEBUG(...) DRV_LOG(DEBUG, __VA_ARGS__)
 #define claim_zero(...) assert((__VA_ARGS__) == 0)
 #define claim_nonzero(...) assert((__VA_ARGS__) != 0)
 
@@ -142,9 +143,9 @@ pmd_drv_log_basename(const char *s)
 
 #endif /* NDEBUG */
 
-#define INFO(...) PMD_DRV_LOG(INFO, __VA_ARGS__)
-#define WARN(...) PMD_DRV_LOG(WARNING, __VA_ARGS__)
-#define ERROR(...) PMD_DRV_LOG(ERR, __VA_ARGS__)
+#define INFO(...) DRV_LOG(INFO, __VA_ARGS__)
+#define WARN(...) DRV_LOG(WARNING, __VA_ARGS__)
+#define ERROR(...) DRV_LOG(ERR, __VA_ARGS__)
 
 /* Convenience macros for accessing mbuf fields. */
 #define NEXT(m) ((m)->next)
index 198a69e..dbfa8a0 100644 (file)
  *   Toggle filter.
  *
  * @return
- *   0 on success, negative errno value on failure.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 {
        struct priv *priv = dev->data->dev_private;
        unsigned int i;
-       int ret = 0;
 
-       priv_lock(priv);
-       DEBUG("%p: %s VLAN filter ID %" PRIu16,
-             (void *)dev, (on ? "enable" : "disable"), vlan_id);
+       DRV_LOG(DEBUG, "port %u %s VLAN filter ID %" PRIu16,
+               dev->data->port_id, (on ? "enable" : "disable"), vlan_id);
        assert(priv->vlan_filter_n <= RTE_DIM(priv->vlan_filter));
        for (i = 0; (i != priv->vlan_filter_n); ++i)
                if (priv->vlan_filter[i] == vlan_id)
                        break;
        /* Check if there's room for another VLAN filter. */
        if (i == RTE_DIM(priv->vlan_filter)) {
-               ret = -ENOMEM;
-               goto out;
+               rte_errno = ENOMEM;
+               return -rte_errno;
        }
        if (i < priv->vlan_filter_n) {
                assert(priv->vlan_filter_n != 0);
@@ -96,37 +94,49 @@ mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
                priv->vlan_filter[priv->vlan_filter_n] = vlan_id;
                ++priv->vlan_filter_n;
        }
-       if (dev->data->dev_started)
-               priv_dev_traffic_restart(priv, dev);
 out:
-       priv_unlock(priv);
-       return ret;
+       if (dev->data->dev_started)
+               return mlx5_traffic_restart(dev);
+       return 0;
 }
 
 /**
- * Set/reset VLAN stripping for a specific queue.
+ * Callback to set/reset VLAN stripping for a specific queue.
  *
- * @param priv
- *   Pointer to private structure.
- * @param idx
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param queue
  *   RX queue index.
  * @param on
  *   Enable/disable VLAN stripping.
  */
-static void
-priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
+void
+mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
 {
-       struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_rxq_data *rxq = (*priv->rxqs)[queue];
        struct mlx5_rxq_ctrl *rxq_ctrl =
                container_of(rxq, struct mlx5_rxq_ctrl, rxq);
        struct ibv_wq_attr mod;
        uint16_t vlan_offloads =
                (on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
                0;
-       int err;
+       int ret;
 
-       DEBUG("set VLAN offloads 0x%x for port %d queue %d",
-             vlan_offloads, rxq->port_id, idx);
+       /* Validate hw support */
+       if (!priv->hw_vlan_strip) {
+               DRV_LOG(ERR, "port %u VLAN stripping is not supported",
+                       dev->data->port_id);
+               return;
+       }
+       /* Validate queue number */
+       if (queue >= priv->rxqs_n) {
+               DRV_LOG(ERR, "port %u VLAN stripping, invalid queue number %d",
+                       dev->data->port_id, queue);
+               return;
+       }
+       DRV_LOG(DEBUG, "port %u set VLAN offloads 0x%x for port %uqueue %d",
+               dev->data->port_id, vlan_offloads, rxq->port_id, queue);
        if (!rxq_ctrl->ibv) {
                /* Update related bits in RX queue. */
                rxq->vlan_strip = !!on;
@@ -137,50 +147,16 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
                .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
                .flags = vlan_offloads,
        };
-
-       err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod);
-       if (err) {
-               ERROR("%p: failed to modified stripping mode: %s",
-                     (void *)priv, strerror(err));
+       ret = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod);
+       if (ret) {
+               DRV_LOG(ERR, "port %u failed to modified stripping mode: %s",
+                       dev->data->port_id, strerror(rte_errno));
                return;
        }
-
        /* Update related bits in RX queue. */
        rxq->vlan_strip = !!on;
 }
 
-/**
- * Callback to set/reset VLAN stripping for a specific queue.
- *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param queue
- *   RX queue index.
- * @param on
- *   Enable/disable VLAN stripping.
- */
-void
-mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
-{
-       struct priv *priv = dev->data->dev_private;
-
-       /* Validate hw support */
-       if (!priv->hw_vlan_strip) {
-               ERROR("VLAN stripping is not supported");
-               return;
-       }
-
-       /* Validate queue number */
-       if (queue >= priv->rxqs_n) {
-               ERROR("VLAN stripping, invalid queue number %d", queue);
-               return;
-       }
-
-       priv_lock(priv);
-       priv_vlan_strip_queue_set(priv, queue, on);
-       priv_unlock(priv);
-}
-
 /**
  * Callback to set/reset VLAN offloads for a port.
  *
@@ -188,6 +164,9 @@ mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
  *   Pointer to Ethernet device structure.
  * @param mask
  *   VLAN offload bit mask.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
 mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask)
@@ -199,16 +178,13 @@ mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask)
                int hw_vlan_strip = !!dev->data->dev_conf.rxmode.hw_vlan_strip;
 
                if (!priv->hw_vlan_strip) {
-                       ERROR("VLAN stripping is not supported");
+                       DRV_LOG(ERR, "port %u VLAN stripping is not supported",
+                               dev->data->port_id);
                        return 0;
                }
-
                /* Run on every RX queue and set/reset VLAN stripping. */
-               priv_lock(priv);
                for (i = 0; (i != priv->rxqs_n); i++)
-                       priv_vlan_strip_queue_set(priv, i, hw_vlan_strip);
-               priv_unlock(priv);
+                       mlx5_vlan_strip_queue_set(dev, i, hw_vlan_strip);
        }
-
        return 0;
 }
index 9a35819..d7e9cef 100644 (file)
@@ -674,7 +674,8 @@ mrvl_dev_stop(struct rte_eth_dev *dev)
                pp2_cls_qos_tbl_deinit(priv->qos_tbl);
                priv->qos_tbl = NULL;
        }
-       pp2_ppio_deinit(priv->ppio);
+       if (priv->ppio)
+               pp2_ppio_deinit(priv->ppio);
        priv->ppio = NULL;
 }
 
@@ -1217,8 +1218,8 @@ mrvl_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 static int
 mrvl_fill_bpool(struct mrvl_rxq *rxq, int num)
 {
-       struct buff_release_entry entries[MRVL_PP2_TXD_MAX];
-       struct rte_mbuf *mbufs[MRVL_PP2_TXD_MAX];
+       struct buff_release_entry entries[MRVL_PP2_RXD_MAX];
+       struct rte_mbuf *mbufs[MRVL_PP2_RXD_MAX];
        int i, ret;
        unsigned int core_id;
        struct pp2_hif *hif;
index fa8ff3c..d9cd047 100644 (file)
@@ -301,7 +301,7 @@ nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq)
 
        for (i = 0; i < txq->tx_count; i++) {
                if (txq->txbufs[i].mbuf) {
-                       rte_pktmbuf_free(txq->txbufs[i].mbuf);
+                       rte_pktmbuf_free_seg(txq->txbufs[i].mbuf);
                        txq->txbufs[i].mbuf = NULL;
                }
        }
@@ -1244,9 +1244,9 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ;
        dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ;
 
-       dev_info->speed_capa = ETH_SPEED_NUM_1G | ETH_LINK_SPEED_10G |
-                              ETH_SPEED_NUM_25G | ETH_SPEED_NUM_40G |
-                              ETH_SPEED_NUM_50G | ETH_LINK_SPEED_100G;
+       dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G |
+                              ETH_LINK_SPEED_25G | ETH_LINK_SPEED_40G |
+                              ETH_LINK_SPEED_50G | ETH_LINK_SPEED_100G;
 
        if (hw->cap & NFP_NET_CFG_CTRL_LSO)
                dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
@@ -1995,16 +1995,16 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        break;
                }
 
+               rxds = &rxq->rxds[rxq->rd_p];
+               if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
+                       break;
+
                /*
                 * Memory barrier to ensure that we won't do other
                 * reads before the DD bit.
                 */
                rte_rmb();
 
-               rxds = &rxq->rxds[rxq->rd_p];
-               if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
-                       break;
-
                /*
                 * We got a packet. Let's alloc a new mbuff for refilling the
                 * free descriptor ring as soon as possible
@@ -2065,6 +2065,8 @@ nfp_net_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                mb->nb_segs = 1;
                mb->next = NULL;
 
+               mb->port = rxq->port_id;
+
                /* Checking the RSS flag */
                nfp_net_set_hash(rxq, rxds, mb);
 
index f11afef..c003640 100644 (file)
 #define NFP_CFG_EXP_BAR         7
 
 #define NFP_CFG_EXP_BAR_CFG_BASE       0x30000
+#define NFP_LOCKFILE_PATH_FMT "%s/nfp%d"
+
+/* get nfp lock file path (/var/lock if root, $HOME otherwise) */
+static void
+nspu_get_lockfile_path(char *buffer, int bufsz, nfpu_desc_t *desc)
+{
+       const char *dir = "/var/lock";
+       const char *home_dir = getenv("HOME");
+
+       if (getuid() != 0 && home_dir != NULL)
+               dir = home_dir;
+
+       /* use current prefix as file path */
+       snprintf(buffer, bufsz, NFP_LOCKFILE_PATH_FMT, dir,
+                       desc->nfp);
+}
 
 /* There could be other NFP userspace tools using the NSP interface.
  * Make sure there is no other process using it and locking the access for
@@ -30,9 +46,7 @@ nspv_aquire_process_lock(nfpu_desc_t *desc)
        struct flock lock;
        char lockname[30];
 
-       memset(&lock, 0, sizeof(lock));
-
-       snprintf(lockname, sizeof(lockname), "/var/lock/nfp%d", desc->nfp);
+       nspu_get_lockfile_path(lockname, sizeof(lockname), desc);
 
        /* Using S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH */
        desc->lock = open(lockname, O_RDWR | O_CREAT, 0666);
@@ -101,8 +115,12 @@ nfpu_open(struct rte_pci_device *pci_dev, nfpu_desc_t *desc, int nfp)
 int
 nfpu_close(nfpu_desc_t *desc)
 {
+       char lockname[30];
+
        rte_free(desc->nspu);
        close(desc->lock);
-       unlink("/var/lock/nfp0");
+
+       nspu_get_lockfile_path(lockname, sizeof(lockname), desc);
+       unlink(lockname);
        return 0;
 }
index 726a5c5..7a5f125 100644 (file)
@@ -91,7 +91,7 @@ static struct rte_eth_link pmd_link = {
        .link_speed = ETH_SPEED_NUM_10G,
        .link_duplex = ETH_LINK_FULL_DUPLEX,
        .link_status = ETH_LINK_DOWN,
-       .link_autoneg = ETH_LINK_AUTONEG,
+       .link_autoneg = ETH_LINK_FIXED,
 };
 
 static uint16_t
index eca3a39..33c6e78 100644 (file)
@@ -127,7 +127,7 @@ octeontx_port_open(struct octeontx_nic *nic)
        int res;
 
        res = 0;
-
+       memset(&bgx_port_conf, 0x0, sizeof(bgx_port_conf));
        PMD_INIT_FUNC_TRACE();
 
        res = octeontx_bgx_port_open(nic->port_id, &bgx_port_conf);
@@ -537,7 +537,6 @@ octeontx_dev_link_update(struct rte_eth_dev *dev,
        struct rte_eth_link link;
        int res;
 
-       res = 0;
        PMD_INIT_FUNC_TRACE();
 
        res = octeontx_port_link_status(nic);
@@ -571,6 +570,7 @@ octeontx_dev_link_update(struct rte_eth_dev *dev,
        case OCTEONTX_LINK_SPEED_RESERVE1:
        case OCTEONTX_LINK_SPEED_RESERVE2:
        default:
+               link.link_speed = ETH_SPEED_NUM_NONE;
                octeontx_log_err("incorrect link speed %d", nic->speed);
                break;
        }
@@ -1142,7 +1142,7 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev,
        return data->port_id;
 
 err:
-       if (port)
+       if (nic)
                octeontx_port_close(nic);
 
        if (eth_dev != NULL) {
index 3385d04..3994710 100644 (file)
@@ -124,7 +124,7 @@ static struct rte_eth_link pmd_link = {
                .link_speed = ETH_SPEED_NUM_10G,
                .link_duplex = ETH_LINK_FULL_DUPLEX,
                .link_status = ETH_LINK_DOWN,
-               .link_autoneg = ETH_LINK_AUTONEG,
+               .link_autoneg = ETH_LINK_FIXED,
 };
 
 static int
index fe42f32..2b9a523 100644 (file)
@@ -133,7 +133,7 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
        snprintf(mz_name, sizeof(mz_name) - 1, "%lx",
                                        (unsigned long)rte_get_timer_cycles());
        if (core_id == (unsigned int)LCORE_ID_ANY)
-               core_id = 0;
+               core_id = rte_get_master_lcore();
        socket_id = rte_lcore_to_socket_id(core_id);
        mz = rte_memzone_reserve_aligned(mz_name, size,
                                         socket_id, 0, RTE_CACHE_LINE_SIZE);
@@ -172,7 +172,7 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
        snprintf(mz_name, sizeof(mz_name) - 1, "%lx",
                                        (unsigned long)rte_get_timer_cycles());
        if (core_id == (unsigned int)LCORE_ID_ANY)
-               core_id = 0;
+               core_id = rte_get_master_lcore();
        socket_id = rte_lcore_to_socket_id(core_id);
        mz = rte_memzone_reserve_aligned(mz_name, size, socket_id, 0, align);
        if (!mz) {
@@ -200,6 +200,11 @@ void osal_dma_free_mem(struct ecore_dev *p_dev, dma_addr_t phys)
                        DP_VERBOSE(p_dev, ECORE_MSG_SP,
                                "Free memzone %s\n", ecore_mz_mapping[j]->name);
                        rte_memzone_free(ecore_mz_mapping[j]);
+                       while (j < ecore_mz_count - 1) {
+                               ecore_mz_mapping[j] = ecore_mz_mapping[j + 1];
+                               j++;
+                       }
+                       ecore_mz_count--;
                        return;
                }
        }
index ce5f3a9..a44ce51 100644 (file)
@@ -534,6 +534,12 @@ enum ecore_mf_mode_bit {
        ECORE_MF_UFP_SPECIFIC,
 
        ECORE_MF_DISABLE_ARFS,
+
+       /* Use vlan for steering */
+       ECORE_MF_8021Q_TAGGING,
+
+       /* Use stag for steering */
+       ECORE_MF_8021AD_TAGGING,
 };
 
 enum ecore_ufp_mode {
index 21ddda9..157a605 100644 (file)
@@ -149,6 +149,10 @@ ecore_dcbx_set_params(struct ecore_dcbx_results *p_data,
        }
        p_data->arr[type].update = UPDATE_DCB_DSCP;
 
+       /* Do not add valn tag 0 when DCB is enabled and port is in UFP mode */
+       if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits))
+               p_data->arr[type].dont_add_vlan0 = true;
+
        /* QM reconf data */
        if (p_hwfn->hw_info.personality == personality)
                p_hwfn->hw_info.offload_tc = tc;
@@ -935,6 +939,7 @@ static void ecore_dcbx_update_protocol_data(struct protocol_dcb_data *p_data,
        p_data->dcb_tc = p_src->arr[type].tc;
        p_data->dscp_enable_flag = p_src->arr[type].dscp_enable;
        p_data->dscp_val = p_src->arr[type].dscp_val;
+       p_data->dcb_dont_add_vlan0 = p_src->arr[type].dont_add_vlan0;
 }
 
 /* Set pf update ramrod command params */
index 9ff4df4..4df99ae 100644 (file)
@@ -29,6 +29,7 @@ struct ecore_dcbx_app_data {
        u8 tc;                  /* Traffic Class */
        bool dscp_enable;       /* DSCP enabled */
        u8 dscp_val;            /* DSCP value */
+       bool dont_add_vlan0;    /* Do not insert a vlan tag with id 0 */
 };
 
 #ifndef __EXTRACT__LINUX__
index da1830c..9affcbc 100644 (file)
@@ -3496,9 +3496,14 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
                break;
        case NVM_CFG1_GLOB_MF_MODE_UFP:
                p_hwfn->p_dev->mf_bits = 1 << ECORE_MF_OVLAN_CLSS |
-                                        1 << ECORE_MF_UFP_SPECIFIC;
+                                        1 << ECORE_MF_UFP_SPECIFIC |
+                                        1 << ECORE_MF_8021Q_TAGGING;
+               break;
+       case NVM_CFG1_GLOB_MF_MODE_BD:
+               p_hwfn->p_dev->mf_bits = 1 << ECORE_MF_OVLAN_CLSS |
+                                        1 << ECORE_MF_LLH_PROTO_CLSS |
+                                        1 << ECORE_MF_8021AD_TAGGING;
                break;
-
        case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
                p_hwfn->p_dev->mf_bits = 1 << ECORE_MF_LLH_MAC_CLSS |
                                         1 << ECORE_MF_LLH_PROTO_CLSS |
@@ -3527,6 +3532,7 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
         */
        switch (mf_mode) {
        case NVM_CFG1_GLOB_MF_MODE_MF_ALLOWED:
+       case NVM_CFG1_GLOB_MF_MODE_BD:
                p_hwfn->p_dev->mf_mode = ECORE_MF_OVLAN;
                break;
        case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
index d8abd60..31ae2a0 100644 (file)
@@ -1515,7 +1515,10 @@ struct protocol_dcb_data {
        u8 dcb_priority /* dcbPri flag value */;
        u8 dcb_tc /* dcb TC value */;
        u8 dscp_val /* dscp value to write if dscp_enable_flag is set */;
-       u8 reserved0;
+/* When DCB is enabled - if this flag is set, dont add VLAN 0 tag to untagged
+ * frames
+ */
+       u8 dcb_dont_add_vlan0;
 };
 
 /*
index e3afc8a..1f16697 100644 (file)
@@ -687,7 +687,7 @@ ecore_sp_update_mcast_bin(struct vport_update_ramrod_data *p_ramrod,
 
        p_ramrod->common.update_approx_mcast_flg = 1;
        for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
-               u32 *p_bins = (u32 *)p_params->bins;
+               u32 *p_bins = p_params->bins;
 
                p_ramrod->approx_mcast.bins[i] = OSAL_CPU_TO_LE32(p_bins[i]);
        }
@@ -1556,8 +1556,8 @@ ecore_sp_eth_filter_mcast(struct ecore_hwfn *p_hwfn,
                          enum spq_mode comp_mode,
                          struct ecore_spq_comp_cb *p_comp_data)
 {
-       unsigned long bins[ETH_MULTICAST_MAC_BINS_IN_REGS];
        struct vport_update_ramrod_data *p_ramrod = OSAL_NULL;
+       u32 bins[ETH_MULTICAST_MAC_BINS_IN_REGS];
        struct ecore_spq_entry *p_ent = OSAL_NULL;
        struct ecore_sp_init_data init_data;
        u8 abs_vport_id = 0;
@@ -1596,8 +1596,7 @@ ecore_sp_eth_filter_mcast(struct ecore_hwfn *p_hwfn,
        /* explicitly clear out the entire vector */
        OSAL_MEMSET(&p_ramrod->approx_mcast.bins,
                    0, sizeof(p_ramrod->approx_mcast.bins));
-       OSAL_MEMSET(bins, 0, sizeof(unsigned long) *
-                   ETH_MULTICAST_MAC_BINS_IN_REGS);
+       OSAL_MEMSET(bins, 0, sizeof(u32) * ETH_MULTICAST_MAC_BINS_IN_REGS);
        /* filter ADD op is explicit set op and it removes
        *  any existing filters for the vport.
        */
@@ -1606,16 +1605,15 @@ ecore_sp_eth_filter_mcast(struct ecore_hwfn *p_hwfn,
                        u32 bit;
 
                        bit = ecore_mcast_bin_from_mac(p_filter_cmd->mac[i]);
-                       OSAL_SET_BIT(bit, bins);
+                       bins[bit / 32] |= 1 << (bit % 32);
                }
 
                /* Convert to correct endianity */
                for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
                        struct vport_update_ramrod_mcast *p_ramrod_bins;
-                       u32 *p_bins = (u32 *)bins;
 
                        p_ramrod_bins = &p_ramrod->approx_mcast;
-                       p_ramrod_bins->bins[i] = OSAL_CPU_TO_LE32(p_bins[i]);
+                       p_ramrod_bins->bins[i] = OSAL_CPU_TO_LE32(bins[i]);
                }
        }
 
index ed9837b..8cbe8dd 100644 (file)
@@ -332,7 +332,7 @@ struct ecore_sp_vport_update_params {
        u8                      anti_spoofing_en;
        u8                      update_accept_any_vlan_flg;
        u8                      accept_any_vlan;
-       unsigned long           bins[8];
+       u32                     bins[8];
        struct ecore_rss_params *rss_params;
        struct ecore_filter_accept_flags accept_flags;
        struct ecore_sge_tpa_params *sge_tpa_params;
index 7598e7a..83705b8 100644 (file)
@@ -295,6 +295,7 @@ ecore_tunn_set_pf_start_params(struct ecore_hwfn *p_hwfn,
 }
 
 #define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN         */
 
 enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
                                       struct ecore_ptt *p_ptt,
@@ -308,7 +309,7 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
        struct ecore_sp_init_data init_data;
        enum _ecore_status_t rc = ECORE_NOTIMPL;
        u8 page_cnt;
-       int i;
+       u8 i;
 
        /* update initial eq producer */
        ecore_eq_prod_update(p_hwfn,
@@ -343,18 +344,27 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 
        p_ramrod->outer_tag_config.outer_tag.tci =
                OSAL_CPU_TO_LE16(p_hwfn->hw_info.ovlan);
+       if (OSAL_TEST_BIT(ECORE_MF_8021Q_TAGGING, &p_hwfn->p_dev->mf_bits)) {
+               p_ramrod->outer_tag_config.outer_tag.tpid = ETH_P_8021Q;
+       } else if (OSAL_TEST_BIT(ECORE_MF_8021AD_TAGGING,
+                &p_hwfn->p_dev->mf_bits)) {
+               p_ramrod->outer_tag_config.outer_tag.tpid = ETH_P_8021AD;
+               p_ramrod->outer_tag_config.enable_stag_pri_change = 1;
+       }
+
+       p_ramrod->outer_tag_config.pri_map_valid = 1;
+       for (i = 0; i < ECORE_MAX_PFC_PRIORITIES; i++)
+               p_ramrod->outer_tag_config.inner_to_outer_pri_map[i] = i;
 
+       /* enable_stag_pri_change should be set if port is in BD mode or,
+        * UFP with Host Control mode or, UFP with DCB over base interface.
+        */
        if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits)) {
-               p_ramrod->outer_tag_config.outer_tag.tpid =
-                       OSAL_CPU_TO_LE16(ETH_P_8021Q);
-               if (p_hwfn->ufp_info.pri_type == ECORE_UFP_PRI_OS)
+               if ((p_hwfn->ufp_info.pri_type == ECORE_UFP_PRI_OS) ||
+                   (p_hwfn->p_dcbx_info->results.dcbx_enabled))
                        p_ramrod->outer_tag_config.enable_stag_pri_change = 1;
                else
                        p_ramrod->outer_tag_config.enable_stag_pri_change = 0;
-               p_ramrod->outer_tag_config.pri_map_valid = 1;
-               for (i = 0; i < 8; i++)
-                       p_ramrod->outer_tag_config.inner_to_outer_pri_map[i] =
-                                                                         (u8)i;
        }
 
        /* Place EQ address in RAMROD */
@@ -451,7 +461,8 @@ enum _ecore_status_t ecore_sp_pf_update_ufp(struct ecore_hwfn *p_hwfn)
                return rc;
 
        p_ent->ramrod.pf_update.update_enable_stag_pri_change = true;
-       if (p_hwfn->ufp_info.pri_type == ECORE_UFP_PRI_OS)
+       if ((p_hwfn->ufp_info.pri_type == ECORE_UFP_PRI_OS) ||
+           (p_hwfn->p_dcbx_info->results.dcbx_enabled))
                p_ent->ramrod.pf_update.enable_stag_pri_change = 1;
        else
                p_ent->ramrod.pf_update.enable_stag_pri_change = 0;
index b1e26d6..68f40f8 100644 (file)
@@ -2975,8 +2975,7 @@ ecore_iov_vp_update_mcast_bin_param(struct ecore_hwfn *p_hwfn,
 
        p_data->update_approx_mcast_flg = 1;
        OSAL_MEMCPY(p_data->bins, p_mcast_tlv->bins,
-                   sizeof(unsigned long) *
-                   ETH_MULTICAST_MAC_BINS_IN_REGS);
+                   sizeof(u32) * ETH_MULTICAST_MAC_BINS_IN_REGS);
        *tlvs_mask |= 1 << ECORE_IOV_VP_UPDATE_MCAST;
 }
 
index e0f2dd5..8a08911 100644 (file)
@@ -1275,8 +1275,7 @@ ecore_vf_pf_vport_update(struct ecore_hwfn *p_hwfn,
                resp_size += sizeof(struct pfvf_def_resp_tlv);
 
                OSAL_MEMCPY(p_mcast_tlv->bins, p_params->bins,
-                           sizeof(unsigned long) *
-                           ETH_MULTICAST_MAC_BINS_IN_REGS);
+                           sizeof(u32) * ETH_MULTICAST_MAC_BINS_IN_REGS);
        }
 
        update_rx = p_params->accept_flags.update_rx_mode_config;
@@ -1473,7 +1472,7 @@ void ecore_vf_pf_filter_mcast(struct ecore_hwfn *p_hwfn,
                        u32 bit;
 
                        bit = ecore_mcast_bin_from_mac(p_filter_cmd->mac[i]);
-                       OSAL_SET_BIT(bit, sp_params.bins);
+                       sp_params.bins[bit / 32] |= 1 << (bit % 32);
                }
        }
 
index ecb0064..c6af9ca 100644 (file)
@@ -396,7 +396,13 @@ struct vfpf_vport_update_mcast_bin_tlv {
        struct channel_tlv      tl;
        u8                      padding[4];
 
-       u64             bins[8];
+       /* This was a mistake; There are only 256 approx bins,
+        * and in HSI they're divided into 32-bit values.
+        * As old VFs used to set-bit to the values on its side,
+        * the upper half of the array is never expected to contain any data.
+        */
+       u64             bins[4];
+       u64             obsolete_bins[4];
 };
 
 struct vfpf_vport_update_accept_param_tlv {
index 73764e9..7462f1a 100644 (file)
@@ -443,55 +443,59 @@ static void qede_reset_queue_stats(struct qede_dev *qdev, bool xstats)
 }
 
 static int
-qede_start_vport(struct qede_dev *qdev, uint16_t mtu)
+qede_stop_vport(struct ecore_dev *edev)
 {
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       struct ecore_sp_vport_start_params params;
        struct ecore_hwfn *p_hwfn;
+       uint8_t vport_id;
        int rc;
        int i;
 
-       memset(&params, 0, sizeof(params));
-       params.vport_id = 0;
-       params.mtu = mtu;
-       /* @DPDK - Disable FW placement */
-       params.zero_placement_offset = 1;
+       vport_id = 0;
        for_each_hwfn(edev, i) {
                p_hwfn = &edev->hwfns[i];
-               params.concrete_fid = p_hwfn->hw_info.concrete_fid;
-               params.opaque_fid = p_hwfn->hw_info.opaque_fid;
-               rc = ecore_sp_vport_start(p_hwfn, &params);
+               rc = ecore_sp_vport_stop(p_hwfn, p_hwfn->hw_info.opaque_fid,
+                                        vport_id);
                if (rc != ECORE_SUCCESS) {
-                       DP_ERR(edev, "Start V-PORT failed %d\n", rc);
+                       DP_ERR(edev, "Stop V-PORT failed rc = %d\n", rc);
                        return rc;
                }
        }
-       ecore_reset_vport_stats(edev);
-       if (IS_PF(edev))
-               qede_reset_queue_stats(qdev, true);
-       DP_INFO(edev, "VPORT started with MTU = %u\n", mtu);
+
+       DP_INFO(edev, "vport stopped\n");
 
        return 0;
 }
 
 static int
-qede_stop_vport(struct ecore_dev *edev)
+qede_start_vport(struct qede_dev *qdev, uint16_t mtu)
 {
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct ecore_sp_vport_start_params params;
        struct ecore_hwfn *p_hwfn;
-       uint8_t vport_id;
        int rc;
        int i;
 
-       vport_id = 0;
+       if (qdev->vport_started)
+               qede_stop_vport(edev);
+
+       memset(&params, 0, sizeof(params));
+       params.vport_id = 0;
+       params.mtu = mtu;
+       /* @DPDK - Disable FW placement */
+       params.zero_placement_offset = 1;
        for_each_hwfn(edev, i) {
                p_hwfn = &edev->hwfns[i];
-               rc = ecore_sp_vport_stop(p_hwfn, p_hwfn->hw_info.opaque_fid,
-                                        vport_id);
+               params.concrete_fid = p_hwfn->hw_info.concrete_fid;
+               params.opaque_fid = p_hwfn->hw_info.opaque_fid;
+               rc = ecore_sp_vport_start(p_hwfn, &params);
                if (rc != ECORE_SUCCESS) {
-                       DP_ERR(edev, "Stop V-PORT failed rc = %d\n", rc);
+                       DP_ERR(edev, "Start V-PORT failed %d\n", rc);
                        return rc;
                }
        }
+       ecore_reset_vport_stats(edev);
+       qdev->vport_started = true;
+       DP_INFO(edev, "VPORT started with MTU = %u\n", mtu);
 
        return 0;
 }
@@ -735,10 +739,10 @@ qede_ucast_filter(struct rte_eth_dev *eth_dev, struct ecore_filter_ucast *ucast,
                                    ETHER_ADDR_LEN) == 0) &&
                             ucast->vni == tmp->vni &&
                             ucast->vlan == tmp->vlan) {
-                               DP_ERR(edev, "Unicast MAC is already added"
-                                      " with vlan = %u, vni = %u\n",
-                                      ucast->vlan,  ucast->vni);
-                                       return -EEXIST;
+                               DP_INFO(edev, "Unicast MAC is already added"
+                                       " with vlan = %u, vni = %u\n",
+                                       ucast->vlan,  ucast->vni);
+                                       return 0;
                        }
                }
                u = rte_malloc(NULL, sizeof(struct qede_ucast_entry),
@@ -772,110 +776,95 @@ qede_ucast_filter(struct rte_eth_dev *eth_dev, struct ecore_filter_ucast *ucast,
 }
 
 static int
-qede_mcast_filter(struct rte_eth_dev *eth_dev, struct ecore_filter_ucast *mcast,
-                 bool add)
+qede_add_mcast_filters(struct rte_eth_dev *eth_dev, struct ether_addr *mc_addrs,
+                      uint32_t mc_addrs_num)
 {
        struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
        struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       struct ether_addr *mac_addr;
-       struct qede_mcast_entry *tmp = NULL;
-       struct qede_mcast_entry *m;
+       struct ecore_filter_mcast mcast;
+       struct qede_mcast_entry *m = NULL;
+       uint8_t i;
+       int rc;
 
-       mac_addr  = (struct ether_addr *)mcast->mac;
-       if (add) {
-               SLIST_FOREACH(tmp, &qdev->mc_list_head, list) {
-                       if (memcmp(mac_addr, &tmp->mac, ETHER_ADDR_LEN) == 0) {
-                               DP_ERR(edev,
-                                       "Multicast MAC is already added\n");
-                               return -EEXIST;
-                       }
-               }
+       for (i = 0; i < mc_addrs_num; i++) {
                m = rte_malloc(NULL, sizeof(struct qede_mcast_entry),
-                       RTE_CACHE_LINE_SIZE);
+                              RTE_CACHE_LINE_SIZE);
                if (!m) {
-                       DP_ERR(edev,
-                               "Did not allocate memory for mcast\n");
+                       DP_ERR(edev, "Did not allocate memory for mcast\n");
                        return -ENOMEM;
                }
-               ether_addr_copy(mac_addr, &m->mac);
+               ether_addr_copy(&mc_addrs[i], &m->mac);
                SLIST_INSERT_HEAD(&qdev->mc_list_head, m, list);
-               qdev->num_mc_addr++;
-       } else {
-               SLIST_FOREACH(tmp, &qdev->mc_list_head, list) {
-                       if (memcmp(mac_addr, &tmp->mac, ETHER_ADDR_LEN) == 0)
-                               break;
-               }
-               if (tmp == NULL) {
-                       DP_INFO(edev, "Multicast mac is not found\n");
-                       return -EINVAL;
-               }
-               SLIST_REMOVE(&qdev->mc_list_head, tmp,
-                            qede_mcast_entry, list);
-               qdev->num_mc_addr--;
+       }
+       memset(&mcast, 0, sizeof(mcast));
+       mcast.num_mc_addrs = mc_addrs_num;
+       mcast.opcode = ECORE_FILTER_ADD;
+       for (i = 0; i < mc_addrs_num; i++)
+               ether_addr_copy(&mc_addrs[i], (struct ether_addr *)
+                                                       &mcast.mac[i]);
+       rc = ecore_filter_mcast_cmd(edev, &mcast, ECORE_SPQ_MODE_CB, NULL);
+       if (rc != ECORE_SUCCESS) {
+               DP_ERR(edev, "Failed to add multicast filter (rc = %d\n)", rc);
+               return -1;
        }
 
        return 0;
 }
 
+static int qede_del_mcast_filters(struct rte_eth_dev *eth_dev)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct qede_mcast_entry *tmp = NULL;
+       struct ecore_filter_mcast mcast;
+       int j;
+       int rc;
+
+       memset(&mcast, 0, sizeof(mcast));
+       mcast.num_mc_addrs = qdev->num_mc_addr;
+       mcast.opcode = ECORE_FILTER_REMOVE;
+       j = 0;
+       SLIST_FOREACH(tmp, &qdev->mc_list_head, list) {
+               ether_addr_copy(&tmp->mac, (struct ether_addr *)&mcast.mac[j]);
+               j++;
+       }
+       rc = ecore_filter_mcast_cmd(edev, &mcast, ECORE_SPQ_MODE_CB, NULL);
+       if (rc != ECORE_SUCCESS) {
+               DP_ERR(edev, "Failed to delete multicast filter\n");
+               return -1;
+       }
+       /* Init the list */
+       while (!SLIST_EMPTY(&qdev->mc_list_head)) {
+               tmp = SLIST_FIRST(&qdev->mc_list_head);
+               SLIST_REMOVE_HEAD(&qdev->mc_list_head, list);
+       }
+       SLIST_INIT(&qdev->mc_list_head);
+
+       return 0;
+}
+
 static enum _ecore_status_t
 qede_mac_int_ops(struct rte_eth_dev *eth_dev, struct ecore_filter_ucast *ucast,
                 bool add)
 {
        struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
        struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       enum _ecore_status_t rc;
-       struct ecore_filter_mcast mcast;
-       struct qede_mcast_entry *tmp;
-       uint16_t j = 0;
+       enum _ecore_status_t rc = ECORE_INVAL;
 
-       /* Multicast */
-       if (is_multicast_ether_addr((struct ether_addr *)ucast->mac)) {
-               if (add) {
-                       if (qdev->num_mc_addr >= ECORE_MAX_MC_ADDRS) {
-                               DP_ERR(edev,
-                                      "Mcast filter table limit exceeded, "
-                                      "Please enable mcast promisc mode\n");
-                               return -ECORE_INVAL;
-                       }
-               }
-               rc = qede_mcast_filter(eth_dev, ucast, add);
-               if (rc == 0) {
-                       DP_INFO(edev, "num_mc_addrs = %u\n", qdev->num_mc_addr);
-                       memset(&mcast, 0, sizeof(mcast));
-                       mcast.num_mc_addrs = qdev->num_mc_addr;
-                       mcast.opcode = ECORE_FILTER_ADD;
-                       SLIST_FOREACH(tmp, &qdev->mc_list_head, list) {
-                               ether_addr_copy(&tmp->mac,
-                                       (struct ether_addr *)&mcast.mac[j]);
-                               j++;
-                       }
-                       rc = ecore_filter_mcast_cmd(edev, &mcast,
-                                                   ECORE_SPQ_MODE_CB, NULL);
-               }
-               if (rc != ECORE_SUCCESS) {
-                       DP_ERR(edev, "Failed to add multicast filter"
-                              " rc = %d, op = %d\n", rc, add);
-               }
-       } else { /* Unicast */
-               if (add) {
-                       if (qdev->num_uc_addr >=
-                           qdev->dev_info.num_mac_filters) {
-                               DP_ERR(edev,
-                                      "Ucast filter table limit exceeded,"
-                                      " Please enable promisc mode\n");
-                               return -ECORE_INVAL;
-                       }
-               }
-               rc = qede_ucast_filter(eth_dev, ucast, add);
-               if (rc == 0)
-                       rc = ecore_filter_ucast_cmd(edev, ucast,
-                                                   ECORE_SPQ_MODE_CB, NULL);
-               if (rc != ECORE_SUCCESS) {
-                       DP_ERR(edev, "MAC filter failed, rc = %d, op = %d\n",
-                              rc, add);
-               }
+       if (add && (qdev->num_uc_addr >= qdev->dev_info.num_mac_filters)) {
+               DP_ERR(edev, "Ucast filter table limit exceeded,"
+                             " Please enable promisc mode\n");
+                       return ECORE_INVAL;
        }
 
+       rc = qede_ucast_filter(eth_dev, ucast, add);
+       if (rc == 0)
+               rc = ecore_filter_ucast_cmd(edev, ucast,
+                                           ECORE_SPQ_MODE_CB, NULL);
+       if (rc != ECORE_SUCCESS)
+               DP_ERR(edev, "MAC filter failed, rc = %d, op = %d\n",
+                      rc, add);
+
        return rc;
 }
 
@@ -916,7 +905,7 @@ qede_mac_addr_remove(struct rte_eth_dev *eth_dev, uint32_t index)
        ether_addr_copy(&eth_dev->data->mac_addrs[index],
                        (struct ether_addr *)&ucast.mac);
 
-       ecore_filter_ucast_cmd(edev, &ucast, ECORE_SPQ_MODE_CB, NULL);
+       qede_mac_int_ops(eth_dev, &ucast, false);
 }
 
 static void
@@ -1011,9 +1000,9 @@ static int qede_vlan_filter_set(struct rte_eth_dev *eth_dev,
 
                SLIST_FOREACH(tmp, &qdev->vlan_list_head, list) {
                        if (tmp->vid == vlan_id) {
-                               DP_ERR(edev, "VLAN %u already configured\n",
-                                      vlan_id);
-                               return -EEXIST;
+                               DP_INFO(edev, "VLAN %u already configured\n",
+                                       vlan_id);
+                               return 0;
                        }
                }
 
@@ -1116,6 +1105,8 @@ static int qede_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
                DP_INFO(edev, "No offloads are supported with VLAN Q-in-Q"
                        " and classification is based on outer tag only\n");
 
+       qdev->vlan_offload_mask = mask;
+
        DP_INFO(edev, "vlan offload mask %d vlan-strip %d vlan-filter %d\n",
                mask, rxmode->hw_vlan_strip, rxmode->hw_vlan_filter);
 
@@ -1191,13 +1182,6 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev)
 
        PMD_INIT_FUNC_TRACE(edev);
 
-       /* Update MTU only if it has changed */
-       if (qdev->mtu != qdev->new_mtu) {
-               if (qede_update_mtu(eth_dev, qdev->new_mtu))
-                       goto err;
-               qdev->mtu = qdev->new_mtu;
-       }
-
        /* Configure TPA parameters */
        if (rxmode->enable_lro) {
                if (qede_enable_tpa(eth_dev, true))
@@ -1211,6 +1195,9 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev)
        if (qede_start_queues(eth_dev))
                goto err;
 
+       if (IS_PF(edev))
+               qede_reset_queue_stats(qdev, true);
+
        /* Newer SR-IOV PF driver expects RX/TX queues to be started before
         * enabling RSS. Hence RSS configuration is deferred upto this point.
         * Also, we would like to retain similar behavior in PF case, so we
@@ -1224,9 +1211,6 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev)
        if (qede_activate_vport(eth_dev, true))
                goto err;
 
-       /* Bring-up the link */
-       qede_dev_set_link_state(eth_dev, true);
-
        /* Update link status */
        qede_link_update(eth_dev, 0);
 
@@ -1261,8 +1245,8 @@ static void qede_dev_stop(struct rte_eth_dev *eth_dev)
        /* Disable traffic */
        ecore_hw_stop_fastpath(edev); /* TBD - loop */
 
-       /* Bring the link down */
-       qede_dev_set_link_state(eth_dev, false);
+       if (IS_PF(edev))
+               qede_mac_addr_remove(eth_dev, 0);
 
        DP_INFO(edev, "Device is stopped\n");
 }
@@ -1390,20 +1374,11 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev)
        if (qede_check_fdir_support(eth_dev))
                return -ENOTSUP;
 
-       /* Deallocate resources if held previously. It is needed only if the
-        * queue count has been changed from previous configuration. If its
-        * going to change then it means RX/TX queue setup will be called
-        * again and the fastpath pointers will be reinitialized there.
-        */
-       if (qdev->num_tx_queues != eth_dev->data->nb_tx_queues ||
-           qdev->num_rx_queues != eth_dev->data->nb_rx_queues) {
-               qede_dealloc_fp_resc(eth_dev);
-               /* Proceed with updated queue count */
-               qdev->num_tx_queues = eth_dev->data->nb_tx_queues;
-               qdev->num_rx_queues = eth_dev->data->nb_rx_queues;
-               if (qede_alloc_fp_resc(qdev))
-                       return -ENOMEM;
-       }
+       qede_dealloc_fp_resc(eth_dev);
+       qdev->num_tx_queues = eth_dev->data->nb_tx_queues;
+       qdev->num_rx_queues = eth_dev->data->nb_rx_queues;
+       if (qede_alloc_fp_resc(qdev))
+               return -ENOMEM;
 
        /* If jumbo enabled adjust MTU */
        if (eth_dev->data->dev_conf.rxmode.jumbo_frame)
@@ -1411,19 +1386,9 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev)
                                eth_dev->data->dev_conf.rxmode.max_rx_pkt_len -
                                ETHER_HDR_LEN - ETHER_CRC_LEN;
 
-       /* VF's MTU has to be set using vport-start where as
-        * PF's MTU can be updated via vport-update.
-        */
-       if (IS_VF(edev)) {
-               if (qede_start_vport(qdev, eth_dev->data->mtu))
-                       return -1;
-       } else {
-               if (qede_update_mtu(eth_dev, eth_dev->data->mtu))
-                       return -1;
-       }
-
+       if (qede_start_vport(qdev, eth_dev->data->mtu))
+               return -1;
        qdev->mtu = eth_dev->data->mtu;
-       qdev->new_mtu = qdev->mtu;
 
        /* Enable VLAN offloads by default */
        ret = qede_vlan_offload_set(eth_dev, ETH_VLAN_STRIP_MASK  |
@@ -1636,12 +1601,15 @@ static void qede_dev_close(struct rte_eth_dev *eth_dev)
                qede_dev_stop(eth_dev);
 
        qede_stop_vport(edev);
+       qdev->vport_started = false;
        qede_fdir_dealloc_resc(eth_dev);
        qede_dealloc_fp_resc(eth_dev);
 
        eth_dev->data->nb_rx_queues = 0;
        eth_dev->data->nb_tx_queues = 0;
 
+       /* Bring the link down */
+       qede_dev_set_link_state(eth_dev, false);
        qdev->ops->common->slowpath_stop(edev);
        qdev->ops->common->remove(edev);
        rte_intr_disable(&pci_dev->intr_handle);
@@ -1935,6 +1903,35 @@ static void qede_allmulticast_disable(struct rte_eth_dev *eth_dev)
                                QED_FILTER_RX_MODE_TYPE_REGULAR);
 }
 
+static int
+qede_set_mc_addr_list(struct rte_eth_dev *eth_dev, struct ether_addr *mc_addrs,
+                     uint32_t mc_addrs_num)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       uint8_t i;
+
+       if (mc_addrs_num > ECORE_MAX_MC_ADDRS) {
+               DP_ERR(edev, "Reached max multicast filters limit,"
+                            "Please enable multicast promisc mode\n");
+               return -ENOSPC;
+       }
+
+       for (i = 0; i < mc_addrs_num; i++) {
+               if (!is_multicast_ether_addr(&mc_addrs[i])) {
+                       DP_ERR(edev, "Not a valid multicast MAC\n");
+                       return -EINVAL;
+               }
+       }
+
+       /* Flush all existing entries */
+       if (qede_del_mcast_filters(eth_dev))
+               return -1;
+
+       /* Set new mcast list */
+       return qede_add_mcast_filters(eth_dev, mc_addrs, mc_addrs_num);
+}
+
 static int qede_flow_ctrl_set(struct rte_eth_dev *eth_dev,
                              struct rte_eth_fc_conf *fc_conf)
 {
@@ -2087,7 +2084,7 @@ int qede_rss_hash_update(struct rte_eth_dev *eth_dev,
        vport_update_params.vport_id = 0;
        /* pass the L2 handles instead of qids */
        for (i = 0 ; i < ECORE_RSS_IND_TABLE_SIZE ; i++) {
-               idx = qdev->rss_ind_table[i];
+               idx = i % QEDE_RSS_COUNT(qdev);
                rss_params.rss_ind_table[i] = qdev->fp_array[idx].rxq->handle;
        }
        vport_update_params.rss_params = &rss_params;
@@ -2311,8 +2308,6 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
        int i;
 
        PMD_INIT_FUNC_TRACE(edev);
-       if (IS_VF(edev))
-               return -ENOTSUP;
        qede_dev_info_get(dev, &dev_info);
        max_rx_pkt_len = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
        frame_size = max_rx_pkt_len + QEDE_ETH_OVERHEAD;
@@ -2337,9 +2332,14 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
                dev->data->dev_started = 0;
                qede_dev_stop(dev);
                restart = true;
+       } else {
+               if (IS_PF(edev))
+                       qede_mac_addr_remove(dev, 0);
        }
        rte_delay_ms(1000);
-       qdev->new_mtu = mtu;
+       qede_start_vport(qdev, mtu); /* Recreate vport */
+       qdev->mtu = mtu;
+
        /* Fix up RX buf size for all queues of the port */
        for_each_rss(i) {
                fp = &qdev->fp_array[i];
@@ -2353,17 +2353,35 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
                                rx_buf_size = frame_size;
                        rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rx_buf_size);
                        fp->rxq->rx_buf_size = rx_buf_size;
-                       DP_INFO(edev, "buf_size adjusted to %u\n", rx_buf_size);
+                       DP_INFO(edev, "RX buffer size %u\n", rx_buf_size);
                }
        }
        if (max_rx_pkt_len > ETHER_MAX_LEN)
                dev->data->dev_conf.rxmode.jumbo_frame = 1;
        else
                dev->data->dev_conf.rxmode.jumbo_frame = 0;
+
+       /* Restore config lost due to vport stop */
+       if (IS_PF(edev))
+               qede_mac_addr_set(dev, &qdev->primary_mac);
+
+       if (dev->data->promiscuous)
+               qede_promiscuous_enable(dev);
+       else
+               qede_promiscuous_disable(dev);
+
+       if (dev->data->all_multicast)
+               qede_allmulticast_enable(dev);
+       else
+               qede_allmulticast_disable(dev);
+
+       qede_vlan_offload_set(dev, qdev->vlan_offload_mask);
+
        if (!dev->data->dev_started && restart) {
                qede_dev_start(dev);
                dev->data->dev_started = 1;
        }
+
        /* update max frame size */
        dev->data->dev_conf.rxmode.max_rx_pkt_len = max_rx_pkt_len;
        /* Reassign back */
@@ -2715,6 +2733,7 @@ static const struct eth_dev_ops qede_eth_dev_ops = {
        .promiscuous_disable = qede_promiscuous_disable,
        .allmulticast_enable = qede_allmulticast_enable,
        .allmulticast_disable = qede_allmulticast_disable,
+       .set_mc_addr_list = qede_set_mc_addr_list,
        .dev_stop = qede_dev_stop,
        .dev_close = qede_dev_close,
        .stats_get = qede_get_stats,
@@ -2755,6 +2774,7 @@ static const struct eth_dev_ops qede_eth_vf_dev_ops = {
        .promiscuous_disable = qede_promiscuous_disable,
        .allmulticast_enable = qede_allmulticast_enable,
        .allmulticast_disable = qede_allmulticast_disable,
+       .set_mc_addr_list = qede_set_mc_addr_list,
        .dev_stop = qede_dev_stop,
        .dev_close = qede_dev_close,
        .stats_get = qede_get_stats,
@@ -2957,16 +2977,24 @@ static int qede_common_dev_init(struct rte_eth_dev *eth_dev, bool is_vf)
                do_once = false;
        }
 
+       /* Bring-up the link */
+       qede_dev_set_link_state(eth_dev, true);
+
        adapter->num_tx_queues = 0;
        adapter->num_rx_queues = 0;
        SLIST_INIT(&adapter->fdir_info.fdir_list_head);
        SLIST_INIT(&adapter->vlan_list_head);
        SLIST_INIT(&adapter->uc_list_head);
+       SLIST_INIT(&adapter->mc_list_head);
        adapter->mtu = ETHER_MTU;
-       adapter->new_mtu = ETHER_MTU;
-       if (!is_vf)
-               if (qede_start_vport(adapter, adapter->mtu))
-                       return -1;
+       adapter->vport_started = false;
+
+       /* VF tunnel offloads is enabled by default in PF driver */
+       adapter->vxlan.enable = true;
+       adapter->vxlan.num_filters = 0;
+       adapter->vxlan.filter_type = ETH_TUNNEL_FILTER_IMAC |
+                                    ETH_TUNNEL_FILTER_IVLAN;
+       adapter->vxlan.udp_port = QEDE_VXLAN_DEF_PORT;
 
        DP_INFO(edev, "MAC address : %02x:%02x:%02x:%02x:%02x:%02x\n",
                adapter->primary_mac.addr_bytes[0],
index 8f21b33..2145aa6 100644 (file)
@@ -184,7 +184,6 @@ struct qede_dev {
        struct ecore_sb_info *sb_array;
        struct qede_fastpath *fp_array;
        uint16_t mtu;
-       uint16_t new_mtu;
        bool enable_tx_switching;
        bool rss_enable;
        struct rte_eth_rss_conf rss_conf;
@@ -207,6 +206,8 @@ struct qede_dev {
        struct qede_fdir_info fdir_info;
        bool vlan_strip_flg;
        char drv_ver[QEDE_PMD_DRV_VER_STR_SIZE];
+       bool vport_started;
+       int vlan_offload_mask;
        void *ethdev;
 };
 
index da6364e..153ef96 100644 (file)
@@ -141,8 +141,8 @@ qede_config_cmn_fdir_filter(struct rte_eth_dev *eth_dev,
        if (add) {
                SLIST_FOREACH(tmp, &qdev->fdir_info.fdir_list_head, list) {
                        if (memcmp(tmp->mz->addr, pkt, pkt_len) == 0) {
-                               DP_ERR(edev, "flowdir filter exist\n");
-                               rc = -EEXIST;
+                               DP_INFO(edev, "flowdir filter exist\n");
+                               rc = 0;
                                goto err2;
                        }
                }
index 31132ce..ffe196a 100644 (file)
@@ -192,9 +192,15 @@ static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq)
 void qede_rx_queue_release(void *rx_queue)
 {
        struct qede_rx_queue *rxq = rx_queue;
+       struct qede_dev *qdev = rxq->qdev;
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+
+       PMD_INIT_FUNC_TRACE(edev);
 
        if (rxq) {
                qede_rx_queue_release_mbufs(rxq);
+               qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring);
+               qdev->ops->common->chain_free(edev, &rxq->rx_comp_ring);
                rte_free(rxq->sw_rx_ring);
                rte_free(rxq);
        }
@@ -350,9 +356,14 @@ static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq)
 void qede_tx_queue_release(void *tx_queue)
 {
        struct qede_tx_queue *txq = tx_queue;
+       struct qede_dev *qdev = txq->qdev;
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+
+       PMD_INIT_FUNC_TRACE(edev);
 
        if (txq) {
                qede_tx_queue_release_mbufs(txq);
+               qdev->ops->common->chain_free(edev, &txq->tx_pbl);
                rte_free(txq->sw_tx_ring);
                rte_free(txq);
        }
@@ -417,6 +428,8 @@ int qede_alloc_fp_resc(struct qede_dev *qdev)
 
        for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) {
                fp = &qdev->fp_array[sb_idx];
+               if (!fp)
+                       continue;
                fp->sb_info = rte_calloc("sb", 1, sizeof(struct ecore_sb_info),
                                RTE_CACHE_LINE_SIZE);
                if (!fp->sb_info) {
@@ -439,8 +452,6 @@ void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev)
        struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
        struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
        struct qede_fastpath *fp;
-       struct qede_rx_queue *rxq;
-       struct qede_tx_queue *txq;
        uint16_t sb_idx;
        uint8_t i;
 
@@ -448,6 +459,8 @@ void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev)
 
        for (sb_idx = 0; sb_idx < QEDE_RXTX_MAX(qdev); sb_idx++) {
                fp = &qdev->fp_array[sb_idx];
+               if (!fp)
+                       continue;
                DP_INFO(edev, "Free sb_info index 0x%x\n",
                                fp->sb_info->igu_sb_id);
                if (fp->sb_info) {
@@ -463,21 +476,13 @@ void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev)
        for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
                if (eth_dev->data->rx_queues[i]) {
                        qede_rx_queue_release(eth_dev->data->rx_queues[i]);
-                       rxq = eth_dev->data->rx_queues[i];
-                       qdev->ops->common->chain_free(edev,
-                                                     &rxq->rx_bd_ring);
-                       qdev->ops->common->chain_free(edev,
-                                                     &rxq->rx_comp_ring);
                        eth_dev->data->rx_queues[i] = NULL;
                }
        }
 
        for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
                if (eth_dev->data->tx_queues[i]) {
-                       txq = eth_dev->data->tx_queues[i];
                        qede_tx_queue_release(eth_dev->data->tx_queues[i]);
-                       qdev->ops->common->chain_free(edev,
-                                                     &txq->tx_pbl);
                        eth_dev->data->tx_queues[i] = NULL;
                }
        }
@@ -1462,6 +1467,8 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                         */
                        rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
                        packet_type |= qede_rx_cqe_to_pkt_type_outer(rx_mb);
+               } else {
+                       packet_type |= qede_rx_cqe_to_pkt_type(parse_flag);
                }
 
                /* Common handling for non-tunnel packets and for inner
@@ -1483,7 +1490,6 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        ol_flags |= PKT_RX_IP_CKSUM_BAD;
                } else {
                        ol_flags |= PKT_RX_IP_CKSUM_GOOD;
-                       packet_type |= qede_rx_cqe_to_pkt_type(parse_flag);
                }
 
                if (CQE_HAS_VLAN(parse_flag) ||
@@ -1627,6 +1633,7 @@ qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg,
                        QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len);
                        PMD_TX_LOG(DEBUG, txq, "BD len %04x", m_seg->data_len);
                }
+               start_seg++;
                m_seg = m_seg->next;
        }
 
index 8583a67..dbd350e 100644 (file)
@@ -89,7 +89,7 @@ static struct rte_eth_link pmd_link = {
                .link_speed = ETH_SPEED_NUM_10G,
                .link_duplex = ETH_LINK_FULL_DUPLEX,
                .link_status = ETH_LINK_DOWN,
-               .link_autoneg = ETH_LINK_AUTONEG
+               .link_autoneg = ETH_LINK_FIXED,
 };
 
 static uint16_t
index 518c2a2..ec8a157 100644 (file)
@@ -145,7 +145,7 @@ efx_port_loopback_set(
        EFSYS_ASSERT(link_mode < EFX_LINK_NMODES);
 
        if (EFX_TEST_QWORD_BIT(encp->enc_loopback_types[link_mode],
-               loopback_type) == 0) {
+               (int)loopback_type) == 0) {
                rc = ENOTSUP;
                goto fail1;
        }
index f428b62..5b52c22 100644 (file)
@@ -141,6 +141,8 @@ prefetch_read_once(const volatile void *addr)
 #define __out_ecount_opt(_n)
 #define __out_bcount(_n)
 #define __out_bcount_opt(_n)
+#define __out_bcount_part(_n, _l)
+#define __out_bcount_part_opt(_n, _l)
 
 #define __deref_out
 
index 49d7e93..2196c3f 100644 (file)
@@ -548,6 +548,8 @@ sfc_set_rss_defaults(struct sfc_adapter *sa)
 
 fail_hash_support_get:
 fail_scale_support_get:
+       efx_rx_fini(sa->nic);
+
 fail_rx_init:
        efx_ev_fini(sa->nic);
 
index 5fbebbf..ba22f7e 100644 (file)
@@ -859,7 +859,7 @@ static int
 sfc_kvarg_perf_profile_handler(__rte_unused const char *key,
                               const char *value_str, void *opaque)
 {
-       uint64_t *value = opaque;
+       uint32_t *value = opaque;
 
        if (strcasecmp(value_str, SFC_KVARG_PERF_PROFILE_THROUGHPUT) == 0)
                *value = EFX_EVQ_FLAGS_TYPE_THROUGHPUT;
index e770b98..fddc670 100644 (file)
@@ -107,7 +107,6 @@ sfc_flow_parse_init(const struct rte_flow_item *item,
        const uint8_t *spec;
        const uint8_t *mask;
        const uint8_t *last;
-       uint8_t match;
        uint8_t supp;
        unsigned int i;
 
@@ -168,12 +167,11 @@ sfc_flow_parse_init(const struct rte_flow_item *item,
                return -rte_errno;
        }
 
-       /* Check that mask and spec not asks for more match than supp_mask */
+       /* Check that mask does not ask for more match than supp_mask */
        for (i = 0; i < size; i++) {
-               match = spec[i] | mask[i];
                supp = ((const uint8_t *)supp_mask)[i];
 
-               if ((match | supp) != supp) {
+               if (~supp & mask[i]) {
                        rte_flow_error_set(error, ENOTSUP,
                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
                                           "Item's field is not supported");
index 7816393..8fc93d6 100644 (file)
@@ -814,7 +814,7 @@ sfc_rx_mbuf_data_alignment(struct rte_mempool *mb_pool)
 
        order = MIN(order, rte_bsf32(data_off));
 
-       return 1u << (order - 1);
+       return 1u << order;
 }
 
 static uint16_t
@@ -1095,6 +1095,41 @@ sfc_efx_to_rte_hash_type(efx_rx_hash_type_t efx_hash_types)
 #endif
 
 #if EFSYS_OPT_RX_SCALE
+static int
+sfc_rx_process_adv_conf_rss(struct sfc_adapter *sa,
+                           struct rte_eth_rss_conf *conf)
+{
+       efx_rx_hash_type_t efx_hash_types = sa->rss_hash_types;
+
+       if (sa->rss_support != EFX_RX_SCALE_EXCLUSIVE) {
+               if ((conf->rss_hf != 0 && conf->rss_hf != SFC_RSS_OFFLOADS) ||
+                   conf->rss_key != NULL)
+                       return EINVAL;
+       }
+
+       if (conf->rss_hf != 0) {
+               if ((conf->rss_hf & ~SFC_RSS_OFFLOADS) != 0) {
+                       sfc_err(sa, "unsupported hash functions requested");
+                       return EINVAL;
+               }
+
+               efx_hash_types = sfc_rte_to_efx_hash_type(conf->rss_hf);
+       }
+
+       if (conf->rss_key != NULL) {
+               if (conf->rss_key_len != sizeof(sa->rss_key)) {
+                       sfc_err(sa, "RSS key size is wrong (should be %lu)",
+                               sizeof(sa->rss_key));
+                       return EINVAL;
+               }
+               rte_memcpy(sa->rss_key, conf->rss_key, sizeof(sa->rss_key));
+       }
+
+       sa->rss_hash_types = efx_hash_types;
+
+       return 0;
+}
+
 static int
 sfc_rx_rss_config(struct sfc_adapter *sa)
 {
@@ -1347,16 +1382,23 @@ sfc_rx_configure(struct sfc_adapter *sa)
                           MIN(sa->rxq_count, EFX_MAXRSS) : 0;
 
        if (sa->rss_channels > 0) {
+               struct rte_eth_rss_conf *adv_conf_rss;
                unsigned int sw_index;
 
                for (sw_index = 0; sw_index < EFX_RSS_TBL_SIZE; ++sw_index)
                        sa->rss_tbl[sw_index] = sw_index % sa->rss_channels;
+
+               adv_conf_rss = &dev_conf->rx_adv_conf.rss_conf;
+               rc = sfc_rx_process_adv_conf_rss(sa, adv_conf_rss);
+               if (rc != 0)
+                       goto fail_rx_process_adv_conf_rss;
        }
 #endif
 
 done:
        return 0;
 
+fail_rx_process_adv_conf_rss:
 fail_rx_qinit_info:
 fail_rxqs_realloc:
 fail_rxqs_alloc:
index c8f918d..3c695e4 100644 (file)
@@ -551,7 +551,7 @@ pmd_ethdev_register(struct rte_vdev_device *vdev,
        soft_dev->data->dev_private = dev_private;
        soft_dev->data->dev_link.link_speed = hard_speed;
        soft_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
-       soft_dev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
+       soft_dev->data->dev_link.link_autoneg = ETH_LINK_FIXED;
        soft_dev->data->dev_link.link_status = ETH_LINK_DOWN;
        soft_dev->data->mac_addrs = &eth_addr;
        soft_dev->data->promiscuous = 1;
index 45aebed..88f5fb8 100644 (file)
@@ -1054,22 +1054,29 @@ eth_stats_get(struct rte_eth_dev *dev,
        uint64_t tx_err_total = 0;
        uint64_t rx_total_bytes = 0;
        uint64_t tx_total_bytes = 0;
-       const struct pmd_internals *internals = dev->data->dev_private;
 
-       for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && i < nb_rx; i++) {
-               stats->q_ipackets[i] = internals->rx_queue[i].rx_pkts;
-               stats->q_ibytes[i] = internals->rx_queue[i].rx_bytes;
-               rx_total += stats->q_ipackets[i];
-               rx_total_bytes += stats->q_ibytes[i];
+       for (i = 0; i < nb_rx; i++) {
+               struct szedata2_rx_queue *rxq = dev->data->rx_queues[i];
+
+               if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+                       stats->q_ipackets[i] = rxq->rx_pkts;
+                       stats->q_ibytes[i] = rxq->rx_bytes;
+               }
+               rx_total += rxq->rx_pkts;
+               rx_total_bytes += rxq->rx_bytes;
        }
 
-       for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS && i < nb_tx; i++) {
-               stats->q_opackets[i] = internals->tx_queue[i].tx_pkts;
-               stats->q_obytes[i] = internals->tx_queue[i].tx_bytes;
-               stats->q_errors[i] = internals->tx_queue[i].err_pkts;
-               tx_total += stats->q_opackets[i];
-               tx_total_bytes += stats->q_obytes[i];
-               tx_err_total += stats->q_errors[i];
+       for (i = 0; i < nb_tx; i++) {
+               struct szedata2_tx_queue *txq = dev->data->tx_queues[i];
+
+               if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+                       stats->q_opackets[i] = txq->tx_pkts;
+                       stats->q_obytes[i] = txq->tx_bytes;
+                       stats->q_errors[i] = txq->err_pkts;
+               }
+               tx_total += txq->tx_pkts;
+               tx_total_bytes += txq->tx_bytes;
+               tx_err_total += txq->err_pkts;
        }
 
        stats->ipackets = rx_total;
@@ -1417,9 +1424,9 @@ get_szedata2_index(const struct rte_pci_addr *pcislot_addr, uint32_t *index)
        FILE *fd;
        char pcislot_path[PATH_MAX];
        uint32_t domain;
-       uint32_t bus;
-       uint32_t devid;
-       uint32_t function;
+       uint8_t bus;
+       uint8_t devid;
+       uint8_t function;
 
        dir = opendir("/sys/class/combo");
        if (dir == NULL)
@@ -1444,7 +1451,7 @@ get_szedata2_index(const struct rte_pci_addr *pcislot_addr, uint32_t *index)
                if (fd == NULL)
                        continue;
 
-               ret = fscanf(fd, "%4" PRIx16 ":%2" PRIx8 ":%2" PRIx8 ".%" PRIx8,
+               ret = fscanf(fd, "%8" SCNx32 ":%2" SCNx8 ":%2" SCNx8 ".%" SCNx8,
                                &domain, &bus, &devid, &function);
                fclose(fd);
                if (ret != 4)
index b8187f9..c38c02e 100644 (file)
@@ -95,7 +95,7 @@ static struct rte_eth_link pmd_link = {
        .link_speed = ETH_SPEED_NUM_10G,
        .link_duplex = ETH_LINK_FULL_DUPLEX,
        .link_status = ETH_LINK_DOWN,
-       .link_autoneg = ETH_LINK_AUTONEG
+       .link_autoneg = ETH_LINK_FIXED,
 };
 
 static void
@@ -544,7 +544,9 @@ apply:
        case SIOCSIFMTU:
                break;
        default:
-               RTE_ASSERT(!"unsupported request type: must not happen");
+               RTE_LOG(WARNING, PMD, "%s: ioctl() called with wrong arg\n",
+                       pmd->name);
+               return -EINVAL;
        }
        if (ioctl(pmd->ioctl_sock, request, ifr) < 0)
                goto error;
index 2536ee4..6091b32 100644 (file)
@@ -117,6 +117,7 @@ struct pmd_internal {
        char *dev_name;
        char *iface_name;
        uint16_t max_queues;
+       int vid;
        rte_atomic32_t started;
 };
 
@@ -527,6 +528,9 @@ update_queuing_status(struct rte_eth_dev *dev)
        unsigned int i;
        int allow_queuing = 1;
 
+       if (!dev->data->rx_queues || !dev->data->tx_queues)
+               return;
+
        if (rte_atomic32_read(&internal->started) == 0 ||
            rte_atomic32_read(&internal->dev_attached) == 0)
                allow_queuing = 0;
@@ -551,13 +555,36 @@ update_queuing_status(struct rte_eth_dev *dev)
        }
 }
 
+static void
+queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal)
+{
+       struct vhost_queue *vq;
+       int i;
+
+       for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+               vq = eth_dev->data->rx_queues[i];
+               if (!vq)
+                       continue;
+               vq->vid = internal->vid;
+               vq->internal = internal;
+               vq->port = eth_dev->data->port_id;
+       }
+       for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+               vq = eth_dev->data->tx_queues[i];
+               if (!vq)
+                       continue;
+               vq->vid = internal->vid;
+               vq->internal = internal;
+               vq->port = eth_dev->data->port_id;
+       }
+}
+
 static int
 new_device(int vid)
 {
        struct rte_eth_dev *eth_dev;
        struct internal_list *list;
        struct pmd_internal *internal;
-       struct vhost_queue *vq;
        unsigned i;
        char ifname[PATH_MAX];
 #ifdef RTE_LIBRTE_VHOST_NUMA
@@ -580,22 +607,11 @@ new_device(int vid)
                eth_dev->data->numa_node = newnode;
 #endif
 
-       for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
-               vq = eth_dev->data->rx_queues[i];
-               if (vq == NULL)
-                       continue;
-               vq->vid = vid;
-               vq->internal = internal;
-               vq->port = eth_dev->data->port_id;
-       }
-       for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
-               vq = eth_dev->data->tx_queues[i];
-               if (vq == NULL)
-                       continue;
-               vq->vid = vid;
-               vq->internal = internal;
-               vq->port = eth_dev->data->port_id;
-       }
+       internal->vid = vid;
+       if (rte_atomic32_read(&internal->started) == 1)
+               queue_setup(eth_dev, internal);
+       else
+               RTE_LOG(INFO, PMD, "RX/TX queues not exist yet\n");
 
        for (i = 0; i < rte_vhost_get_vring_num(vid); i++)
                rte_vhost_enable_guest_notification(vid, i, 0);
@@ -640,17 +656,19 @@ destroy_device(int vid)
 
        eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
 
-       for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
-               vq = eth_dev->data->rx_queues[i];
-               if (vq == NULL)
-                       continue;
-               vq->vid = -1;
-       }
-       for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
-               vq = eth_dev->data->tx_queues[i];
-               if (vq == NULL)
-                       continue;
-               vq->vid = -1;
+       if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) {
+               for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+                       vq = eth_dev->data->rx_queues[i];
+                       if (!vq)
+                               continue;
+                       vq->vid = -1;
+               }
+               for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+                       vq = eth_dev->data->tx_queues[i];
+                       if (!vq)
+                               continue;
+                       vq->vid = -1;
+               }
        }
 
        state = vring_states[eth_dev->data->port_id];
@@ -773,12 +791,13 @@ rte_eth_vhost_get_vid_from_port_id(uint16_t port_id)
 }
 
 static int
-eth_dev_start(struct rte_eth_dev *dev)
+eth_dev_start(struct rte_eth_dev *eth_dev)
 {
-       struct pmd_internal *internal = dev->data->dev_private;
+       struct pmd_internal *internal = eth_dev->data->dev_private;
 
+       queue_setup(eth_dev, internal);
        rte_atomic32_set(&internal->started, 1);
-       update_queuing_status(dev);
+       update_queuing_status(eth_dev);
 
        return 0;
 }
@@ -816,10 +835,13 @@ eth_dev_close(struct rte_eth_dev *dev)
        pthread_mutex_unlock(&internal_list_lock);
        rte_free(list);
 
-       for (i = 0; i < dev->data->nb_rx_queues; i++)
-               rte_free(dev->data->rx_queues[i]);
-       for (i = 0; i < dev->data->nb_tx_queues; i++)
-               rte_free(dev->data->tx_queues[i]);
+       if (dev->data->rx_queues)
+               for (i = 0; i < dev->data->nb_rx_queues; i++)
+                       rte_free(dev->data->rx_queues[i]);
+
+       if (dev->data->tx_queues)
+               for (i = 0; i < dev->data->nb_tx_queues; i++)
+                       rte_free(dev->data->tx_queues[i]);
 
        rte_free(dev->data->mac_addrs);
        free(internal->dev_name);
@@ -1086,6 +1108,7 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
        data->nb_rx_queues = queues;
        data->nb_tx_queues = queues;
        internal->max_queues = queues;
+       internal->vid = -1;
        data->dev_link = pmd_link;
        data->mac_addrs = eth_addr;
        data->dev_flags = RTE_ETH_DEV_INTR_LSC;
index 93d9649..7669c64 100644 (file)
@@ -670,6 +670,8 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
                Vmxnet3_TxQueueDesc *tqd = &hw->tqd_start[i];
                vmxnet3_tx_queue_t *txq  = dev->data->tx_queues[i];
 
+               txq->shared = &hw->tqd_start[i];
+
                tqd->ctrl.txNumDeferred  = 0;
                tqd->ctrl.txThreshold    = 1;
                tqd->conf.txRingBasePA   = txq->cmd_ring.basePA;
@@ -690,6 +692,8 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev)
                Vmxnet3_RxQueueDesc *rqd  = &hw->rqd_start[i];
                vmxnet3_rx_queue_t *rxq   = dev->data->rx_queues[i];
 
+               rxq->shared = &hw->rqd_start[i];
+
                rqd->conf.rxRingBasePA[0] = rxq->cmd_ring[0].basePA;
                rqd->conf.rxRingBasePA[1] = rxq->cmd_ring[1].basePA;
                rqd->conf.compRingBasePA  = rxq->comp_ring.basePA;
index aa396ab..940068f 100644 (file)
@@ -937,7 +937,7 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
        txq->queue_id = queue_idx;
        txq->port_id = dev->data->port_id;
-       txq->shared = &hw->tqd_start[queue_idx];
+       txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
        txq->hw = hw;
        txq->qid = queue_idx;
        txq->stopped = TRUE;
@@ -1040,7 +1040,7 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->mp = mp;
        rxq->queue_id = queue_idx;
        rxq->port_id = dev->data->port_id;
-       rxq->shared = &hw->rqd_start[queue_idx];
+       rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
        rxq->hw = hw;
        rxq->qid1 = queue_idx;
        rxq->qid2 = queue_idx + hw->num_rx_queues;
index f8f5bbd..dc1d964 100644 (file)
 #include <rte_string_fns.h>
 #include <rte_cycles.h>
 
+#ifndef APP_MAX_LCORE
+#if (RTE_MAX_LCORE > 64)
+#define APP_MAX_LCORE 64
+#else
+#define APP_MAX_LCORE RTE_MAX_LCORE
+#endif
+#endif
+
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
 #define FATAL_ERROR(fmt, args...)       rte_exit(EXIT_FAILURE, fmt "\n", ##args)
 #define PRINT_INFO(fmt, args...)        RTE_LOG(INFO, APP, fmt "\n", ##args)
 
 /* Max ports than can be used (each port is associated with two lcores) */
-#define MAX_PORTS               (RTE_MAX_LCORE / 2)
+#define MAX_PORTS               (APP_MAX_LCORE / 2)
 
 /* Max size of a single packet */
 #define MAX_PACKET_SZ (2048)
@@ -133,7 +141,7 @@ static uint64_t input_cores_mask = 0;
 static uint64_t output_cores_mask = 0;
 
 /* Array storing port_id that is associated with each lcore */
-static uint16_t port_ids[RTE_MAX_LCORE];
+static uint16_t port_ids[APP_MAX_LCORE];
 
 /* Structure type for recording lcore-specific stats */
 struct stats {
@@ -143,7 +151,7 @@ struct stats {
 };
 
 /* Array of lcore-specific stats */
-static struct stats lcore_stats[RTE_MAX_LCORE];
+static struct stats lcore_stats[APP_MAX_LCORE];
 
 /* Print out statistics on packets handled */
 static void
@@ -362,7 +370,9 @@ setup_port_lcore_affinities(void)
        uint16_t rx_port = 0;
 
        /* Setup port_ids[] array, and check masks were ok */
-       RTE_LCORE_FOREACH(i) {
+       for (i = 0; i < APP_MAX_LCORE; i++) {
+               if (!rte_lcore_is_enabled(i))
+                       continue;
                if (input_cores_mask & (1ULL << i)) {
                        /* Skip ports that are not enabled */
                        while ((ports_mask & (1 << rx_port)) == 0) {
index d4e1682..ff57522 100644 (file)
@@ -1506,8 +1506,8 @@ l2fwd_crypto_default_options(struct l2fwd_crypto_options *options)
        options->aead_iv_random_size = -1;
        options->aead_iv.length = 0;
 
-       options->auth_xform.aead.algo = RTE_CRYPTO_AEAD_AES_GCM;
-       options->auth_xform.aead.op = RTE_CRYPTO_AEAD_OP_ENCRYPT;
+       options->aead_xform.aead.algo = RTE_CRYPTO_AEAD_AES_GCM;
+       options->aead_xform.aead.op = RTE_CRYPTO_AEAD_OP_ENCRYPT;
 
        options->aad_param = 0;
        options->aad_random_size = -1;
index 7d76c8c..0b60a42 100644 (file)
@@ -320,13 +320,14 @@ struct lthread *lthread_current(void)
 /*
  * Tasklet to cancel a thread
  */
-static void
+static void *
 _cancel(void *arg)
 {
        struct lthread *lt = (struct lthread *) arg;
 
        lt->state |= BIT(ST_LT_CANCELLED);
        lthread_detach();
+       return NULL;
 }
 
 
index ff245a0..a74680b 100644 (file)
@@ -143,7 +143,7 @@ struct lthread_mutex;
 struct lthread_condattr;
 struct lthread_mutexattr;
 
-typedef void (*lthread_func_t) (void *);
+typedef void *(*lthread_func_t) (void *);
 
 /*
  * Define the size of stack for an lthread
index fa65234..2ac1aab 100644 (file)
@@ -2021,17 +2021,18 @@ cpu_load_collector(__rte_unused void *arg) {
  *
  * This loop is used to start empty scheduler on lcore.
  */
-static void
+static void *
 lthread_null(__rte_unused void *args)
 {
        int lcore_id = rte_lcore_id();
 
        RTE_LOG(INFO, L3FWD, "Starting scheduler on lcore %d.\n", lcore_id);
        lthread_exit(NULL);
+       return NULL;
 }
 
 /* main processing loop */
-static void
+static void *
 lthread_tx_per_ring(void *dummy)
 {
        int nb_rx;
@@ -2076,6 +2077,7 @@ lthread_tx_per_ring(void *dummy)
                        lthread_cond_wait(ready, 0);
 
        }
+       return NULL;
 }
 
 /*
@@ -2084,7 +2086,7 @@ lthread_tx_per_ring(void *dummy)
  * This lthread is used to spawn one new lthread per ring from producers.
  *
  */
-static void
+static void *
 lthread_tx(void *args)
 {
        struct lthread *lt;
@@ -2129,9 +2131,10 @@ lthread_tx(void *args)
                }
 
        }
+       return NULL;
 }
 
-static void
+static void *
 lthread_rx(void *dummy)
 {
        int ret;
@@ -2155,7 +2158,7 @@ lthread_rx(void *dummy)
 
        if (rx_conf->n_rx_queue == 0) {
                RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", rte_lcore_id());
-               return;
+               return NULL;
        }
 
        RTE_LOG(INFO, L3FWD, "Entering main Rx loop on lcore %u\n", rte_lcore_id());
@@ -2227,6 +2230,7 @@ lthread_rx(void *dummy)
                        lthread_yield();
                }
        }
+       return NULL;
 }
 
 /*
@@ -2235,8 +2239,9 @@ lthread_rx(void *dummy)
  * This lthread loop spawns all rx and tx lthreads on master lcore
  */
 
-static void
-lthread_spawner(__rte_unused void *arg) {
+static void *
+lthread_spawner(__rte_unused void *arg)
+{
        struct lthread *lt[MAX_THREAD];
        int i;
        int n_thread = 0;
@@ -2277,6 +2282,7 @@ lthread_spawner(__rte_unused void *arg) {
        for (i = 0; i < n_thread; i++)
                lthread_join(lt[i], NULL);
 
+       return NULL;
 }
 
 /*
index febae39..5811cff 100644 (file)
@@ -149,8 +149,7 @@ void *helloworld_pthread(void *arg)
  */
 __thread pthread_t tid[HELLOW_WORLD_MAX_LTHREADS];
 
-static void initial_lthread(void *args);
-static void initial_lthread(void *args __attribute__((unused)))
+static void *initial_lthread(void *args __attribute__((unused)))
 {
        int lcore = (int) rte_lcore_id();
        /*
@@ -225,6 +224,7 @@ static void initial_lthread(void *args __attribute__((unused)))
        /* shutdown the lthread scheduler */
        lthread_scheduler_shutdown(rte_lcore_id());
        lthread_detach();
+       return NULL;
 }
 
 
@@ -235,8 +235,6 @@ static void initial_lthread(void *args __attribute__((unused)))
  * in the core mask
  */
 static int
-lthread_scheduler(void *args);
-static int
 lthread_scheduler(void *args __attribute__((unused)))
 {
        /* create initial thread  */
index bc7cf2b..24cc389 100644 (file)
@@ -394,7 +394,7 @@ int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
 int
 pthread_create(pthread_t *__restrict tid,
                const pthread_attr_t *__restrict attr,
-               void *(func) (void *),
+               lthread_func_t func,
               void *__restrict arg)
 {
        if (override) {
@@ -419,7 +419,7 @@ pthread_create(pthread_t *__restrict tid,
                        }
                }
                return lthread_create((struct lthread **)tid, lcore,
-                                     (void (*)(void *))func, arg);
+                                     func, arg);
        }
        return _sys_pthread_funcs.f_pthread_create(tid, attr, func, arg);
 }
index fe17452..a9a89b9 100644 (file)
@@ -210,7 +210,7 @@ receive_stage(__attribute__((unused)) void *args)
        }
 }
 
-static void
+static int
 pipeline_stage(__attribute__((unused)) void *args)
 {
        int i, ret;
@@ -272,9 +272,11 @@ pipeline_stage(__attribute__((unused)) void *args)
                        }
                }
        }
+
+       return 0;
 }
 
-static void
+static int
 send_stage(__attribute__((unused)) void *args)
 {
        uint16_t nb_dq_pkts;
@@ -316,6 +318,8 @@ send_stage(__attribute__((unused)) void *args)
                        /* TODO: Check if nb_dq_pkts == nb_tx_pkts? */
                }
        }
+
+       return 0;
 }
 
 int
@@ -375,15 +379,13 @@ main(int argc, char **argv)
                                if (is_bit_set(port_id, portmask))
                                        init_ring(lcore_id, port_id);
 
-                       /* typecast is a workaround for GCC 4.3 bug */
-                       rte_eal_remote_launch((int (*)(void *))pipeline_stage,
+                       rte_eal_remote_launch(pipeline_stage,
                                        NULL, lcore_id);
                }
        }
 
        /* Start send_stage() on the last slave core */
-       /* typecast is a workaround for GCC 4.3 bug */
-       rte_eal_remote_launch((int (*)(void *))send_stage, NULL, last_lcore_id);
+       rte_eal_remote_launch(send_stage, NULL, last_lcore_id);
 
        /* Start receive_stage() on the master core */
        receive_stage(NULL);
index 9fe0d9d..b8e14b8 100644 (file)
@@ -290,19 +290,40 @@ rte_cryptodev_sym_capability_get(uint8_t dev_id,
 
 }
 
-#define param_range_check(x, y) \
-       (((x < y.min) || (x > y.max)) || \
-       (y.increment != 0 && (x % y.increment) != 0))
+static int
+param_range_check(uint16_t size, const struct rte_crypto_param_range *range)
+{
+       unsigned int next_size;
+
+       /* Check lower/upper bounds */
+       if (size < range->min)
+               return -1;
+
+       if (size > range->max)
+               return -1;
+
+       /* If range is actually only one value, size is correct */
+       if (range->increment == 0)
+               return 0;
+
+       /* Check if value is one of the supported sizes */
+       for (next_size = range->min; next_size <= range->max;
+                       next_size += range->increment)
+               if (size == next_size)
+                       return 0;
+
+       return -1;
+}
 
 int
 rte_cryptodev_sym_capability_check_cipher(
                const struct rte_cryptodev_symmetric_capability *capability,
                uint16_t key_size, uint16_t iv_size)
 {
-       if (param_range_check(key_size, capability->cipher.key_size))
+       if (param_range_check(key_size, &capability->cipher.key_size) != 0)
                return -1;
 
-       if (param_range_check(iv_size, capability->cipher.iv_size))
+       if (param_range_check(iv_size, &capability->cipher.iv_size) != 0)
                return -1;
 
        return 0;
@@ -313,13 +334,13 @@ rte_cryptodev_sym_capability_check_auth(
                const struct rte_cryptodev_symmetric_capability *capability,
                uint16_t key_size, uint16_t digest_size, uint16_t iv_size)
 {
-       if (param_range_check(key_size, capability->auth.key_size))
+       if (param_range_check(key_size, &capability->auth.key_size) != 0)
                return -1;
 
-       if (param_range_check(digest_size, capability->auth.digest_size))
+       if (param_range_check(digest_size, &capability->auth.digest_size) != 0)
                return -1;
 
-       if (param_range_check(iv_size, capability->auth.iv_size))
+       if (param_range_check(iv_size, &capability->auth.iv_size) != 0)
                return -1;
 
        return 0;
@@ -331,16 +352,16 @@ rte_cryptodev_sym_capability_check_aead(
                uint16_t key_size, uint16_t digest_size, uint16_t aad_size,
                uint16_t iv_size)
 {
-       if (param_range_check(key_size, capability->aead.key_size))
+       if (param_range_check(key_size, &capability->aead.key_size) != 0)
                return -1;
 
-       if (param_range_check(digest_size, capability->aead.digest_size))
+       if (param_range_check(digest_size, &capability->aead.digest_size) != 0)
                return -1;
 
-       if (param_range_check(aad_size, capability->aead.aad_size))
+       if (param_range_check(aad_size, &capability->aead.aad_size) != 0)
                return -1;
 
-       if (param_range_check(iv_size, capability->aead.iv_size))
+       if (param_range_check(iv_size, &capability->aead.iv_size) != 0)
                return -1;
 
        return 0;
index 6993dd2..3a80311 100644 (file)
@@ -55,7 +55,7 @@ extern "C" {
  * Guarantees that the LOAD and STORE operations generated before the
  * barrier occur before the LOAD and STORE operations generated after.
  */
-#define        rte_mb()  {asm volatile("sync" : : : "memory"); }
+#define        rte_mb()  asm volatile("sync" : : : "memory")
 
 /**
  * Write memory barrier.
index 4616a08..99ae35e 100644 (file)
@@ -66,7 +66,7 @@ extern "C" {
 /**
  * Patch level number i.e. the z in yy.mm.z
  */
-#define RTE_VER_MINOR 2
+#define RTE_VER_MINOR 3
 
 /**
  * Extra string to be appended to version number
index 16a181c..17c20d4 100644 (file)
@@ -491,6 +491,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
                        hugepg_tbl[i].orig_va = virtaddr;
                }
                else {
+                       /* rewrite physical addresses in IOVA as VA mode */
+                       if (rte_eal_iova_mode() == RTE_IOVA_VA)
+                               hugepg_tbl[i].physaddr = (uintptr_t)virtaddr;
                        hugepg_tbl[i].final_va = virtaddr;
                }
 
@@ -1109,7 +1112,8 @@ rte_eal_hugepage_init(void)
                                continue;
                }
 
-               if (phys_addrs_available) {
+               if (phys_addrs_available &&
+                               rte_eal_iova_mode() != RTE_IOVA_VA) {
                        /* find physical addresses for each hugepage */
                        if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
                                RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
index fb1a622..a75ef5a 100644 (file)
@@ -710,10 +710,7 @@ vfio_type1_dma_map(int vfio_container_fd)
                dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
                dma_map.vaddr = ms[i].addr_64;
                dma_map.size = ms[i].len;
-               if (rte_eal_iova_mode() == RTE_IOVA_VA)
-                       dma_map.iova = dma_map.vaddr;
-               else
-                       dma_map.iova = ms[i].iova;
+               dma_map.iova = ms[i].iova;
                dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
                ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
@@ -813,10 +810,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
                dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
                dma_map.vaddr = ms[i].addr_64;
                dma_map.size = ms[i].len;
-               if (rte_eal_iova_mode() == RTE_IOVA_VA)
-                       dma_map.iova = dma_map.vaddr;
-               else
-                       dma_map.iova = ms[i].iova;
+               dma_map.iova = ms[i].iova;
                dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
                                 VFIO_DMA_MAP_FLAG_WRITE;
 
index 3f8c0bc..6a6968d 100644 (file)
 #undef NET_NAME_UNKNOWN
 #endif
 
+#if (defined(RHEL_RELEASE_CODE) && \
+       (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)))
+#define ndo_change_mtu ndo_change_mtu_rh74
+#endif
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
 #define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
 #endif
index 443a3f2..6691edf 100644 (file)
@@ -3915,7 +3915,8 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 #define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS
 #endif /* >= 4.1.0 */
 
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) )
+#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) ) \
+    || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,4) ))
 /* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */
 #define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL
 #endif /* >= 4.2.0 */
@@ -3933,9 +3934,15 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 #endif
 
 #if ((LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) || \
-    (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0)))
+     (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0)) || \
+     (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)))
 #define HAVE_VF_VLAN_PROTO
-#endif /* >= 4.9.0, >= SLES12SP3 */
+#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4))
+/* In RHEL/Centos 7.4, the "new" version of ndo_set_vf_vlan
+ * is in the struct net_device_ops_extended */
+#define ndo_set_vf_vlan extended.ndo_set_vf_vlan
+#endif
+#endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)
 #define HAVE_PCI_ENABLE_MSIX
index 4d23bc1..eea11d0 100644 (file)
@@ -533,6 +533,12 @@ rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id)
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
        dev = &rte_eth_devices[port_id];
+       if (!dev->data->dev_started) {
+               RTE_PMD_DEBUG_TRACE(
+                   "port %d must be started before start any queue\n", port_id);
+               return -EINVAL;
+       }
+
        if (rx_queue_id >= dev->data->nb_rx_queues) {
                RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
                return -EINVAL;
@@ -585,6 +591,12 @@ rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id)
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
 
        dev = &rte_eth_devices[port_id];
+       if (!dev->data->dev_started) {
+               RTE_PMD_DEBUG_TRACE(
+                   "port %d must be started before start any queue\n", port_id);
+               return -EINVAL;
+       }
+
        if (tx_queue_id >= dev->data->nb_tx_queues) {
                RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
                return -EINVAL;
index 2cc2eed..eba11ca 100644 (file)
@@ -3125,6 +3125,9 @@ static inline int rte_eth_tx_descriptor_status(uint16_t port_id,
  * invoke this function concurrently on the same tx queue without SW lock.
  * @see rte_eth_dev_info_get, struct rte_eth_txconf::txq_flags
  *
+ * @see rte_eth_tx_prepare to perform some prior checks or adjustments
+ * for offloads.
+ *
  * @param port_id
  *   The port identifier of the Ethernet device.
  * @param queue_id
index ad64a16..f69316d 100644 (file)
@@ -123,9 +123,6 @@ rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size)
 static inline void
 rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev)
 {
-       /* free ether device */
-       rte_eth_dev_release_port(eth_dev);
-
        if (rte_eal_process_type() == RTE_PROC_PRIMARY)
                rte_free(eth_dev->data->dev_private);
 
@@ -139,6 +136,9 @@ rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev)
 
        eth_dev->device = NULL;
        eth_dev->intr_handle = NULL;
+
+       /* free ether device */
+       rte_eth_dev_release_port(eth_dev);
 }
 
 typedef int (*eth_dev_pci_callback_t)(struct rte_eth_dev *eth_dev);
index 55fd7bd..cbf78fa 100644 (file)
@@ -573,7 +573,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
                                 * Return index where key is stored,
                                 * subtracting the first dummy index
                                 */
-                               return prim_bkt->key_idx[i] - 1;
+                               ret = prim_bkt->key_idx[i] - 1;
+                               goto failure;
                        }
                }
        }
@@ -593,7 +594,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
                                 * Return index where key is stored,
                                 * subtracting the first dummy index
                                 */
-                               return sec_bkt->key_idx[i] - 1;
+                               ret = sec_bkt->key_idx[i] - 1;
+                               goto failure;
                        }
                }
        }
index 040bd70..3b84dca 100644 (file)
@@ -88,7 +88,9 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp)
        /* chain with the first fragment. */
        rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
        rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
+       fp->frags[curr_idx].mb = NULL;
        m = fp->frags[IP_FIRST_FRAG_IDX].mb;
+       fp->frags[IP_FIRST_FRAG_IDX].mb = NULL;
 
        /* update mbuf fields for reassembled packet. */
        m->ol_flags |= PKT_TX_IP_CKSUM;
index dde58cb..26b9a88 100644 (file)
@@ -111,7 +111,9 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
        /* chain with the first fragment. */
        rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
        rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
+       fp->frags[curr_idx].mb = NULL;
        m = fp->frags[IP_FIRST_FRAG_IDX].mb;
+       fp->frags[IP_FIRST_FRAG_IDX].mb = NULL;
 
        /* update mbuf fields for reassembled packet. */
        m->ol_flags |= PKT_TX_IP_CKSUM;
index 16a6048..807a8d4 100644 (file)
@@ -222,14 +222,17 @@ extern "C" {
 
 /**
  * Bits 45:48 used for the tunnel type.
- * When doing Tx offload like TSO or checksum, the HW needs to configure the
- * tunnel type into the HW descriptors.
+ * The tunnel type must be specified for TSO or checksum on the inner part
+ * of tunnel packets.
+ * These flags can be used with PKT_TX_TCP_SEG for TSO, or PKT_TX_xxx_CKSUM.
+ * The mbuf fields for inner and outer header lengths are required:
+ * outer_l2_len, outer_l3_len, l2_len, l3_len, l4_len and tso_segsz for TSO.
  */
 #define PKT_TX_TUNNEL_VXLAN   (0x1ULL << 45)
 #define PKT_TX_TUNNEL_GRE     (0x2ULL << 45)
 #define PKT_TX_TUNNEL_IPIP    (0x3ULL << 45)
 #define PKT_TX_TUNNEL_GENEVE  (0x4ULL << 45)
-/**< TX packet with MPLS-in-UDP RFC 7510 header. */
+/** TX packet with MPLS-in-UDP RFC 7510 header. */
 #define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45)
 /* add new TX TUNNEL type here */
 #define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
@@ -245,12 +248,8 @@ extern "C" {
  *  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
  *    PKT_TX_TCP_CKSUM)
  *  - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
- *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
- *    to 0 in the packet
+ *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag
  *  - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
- *  - calculate the pseudo header checksum without taking ip_len in account,
- *    and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
- *    rte_ipv6_phdr_cksum() that can be used as helpers.
  */
 #define PKT_TX_TCP_SEG       (1ULL << 50)
 
@@ -263,9 +262,6 @@ extern "C" {
  *  - fill l2_len and l3_len in mbuf
  *  - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
  *  - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
- *  - calculate the pseudo header checksum and set it in the L4 header (only
- *    for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
- *    For SCTP, set the crc field to 0.
  */
 #define PKT_TX_L4_NO_CKSUM   (0ULL << 52) /**< Disable L4 cksum of TX pkt. */
 #define PKT_TX_TCP_CKSUM     (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */
@@ -277,7 +273,6 @@ extern "C" {
  * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should
  * also be set by the application, although a PMD will only check
  * PKT_TX_IP_CKSUM.
- *  - set the IP checksum field in the packet to 0
  *  - fill the mbuf offload information: l2_len, l3_len
  */
 #define PKT_TX_IP_CKSUM      (1ULL << 54)
@@ -302,10 +297,8 @@ extern "C" {
 
 /**
  * Offload the IP checksum of an external header in the hardware. The
- * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh
- * a PMD will only check PKT_TX_IP_CKSUM.  The IP checksum field in the
- * packet must be set to 0.
- *  - set the outer IP checksum field in the packet to 0
+ * flag PKT_TX_OUTER_IPV4 should also be set by the application, although
+ * a PMD will only check PKT_TX_OUTER_IP_CKSUM.
  *  - fill the mbuf offload information: outer_l2_len, outer_l3_len
  */
 #define PKT_TX_OUTER_IP_CKSUM   (1ULL << 58)
index 5bd74ea..ad17458 100644 (file)
@@ -437,12 +437,18 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
        }
 
        /* not enough room to store one object */
-       if (i == 0)
-               return -EINVAL;
+       if (i == 0) {
+               ret = -EINVAL;
+               goto fail;
+       }
 
        STAILQ_INSERT_TAIL(&mp->mem_list, memhdr, next);
        mp->nb_mem_chunks++;
        return i;
+
+fail:
+       rte_free(memhdr);
+       return ret;
 }
 
 int
@@ -514,9 +520,6 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr,
        size_t off, phys_len;
        int ret, cnt = 0;
 
-       /* mempool must not be populated */
-       if (mp->nb_mem_chunks != 0)
-               return -EEXIST;
        /* address and len must be page-aligned */
        if (RTE_PTR_ALIGN_CEIL(addr, pg_sz) != addr)
                return -EINVAL;
@@ -685,7 +688,7 @@ rte_mempool_populate_anon(struct rte_mempool *mp)
        char *addr;
 
        /* mempool is already populated, error */
-       if (!STAILQ_EMPTY(&mp->mem_list)) {
+       if ((!STAILQ_EMPTY(&mp->mem_list)) || mp->nb_mem_chunks != 0) {
                rte_errno = EINVAL;
                return 0;
        }
index 73ec398..23468cb 100644 (file)
@@ -406,6 +406,12 @@ struct ipv6_hdr {
        uint8_t  dst_addr[16]; /**< IP address of destination host(s). */
 } __attribute__((__packed__));
 
+/* IPv6 vtc_flow: IPv / TC / flow_label */
+#define IPV6_HDR_FL_SHIFT 0
+#define IPV6_HDR_TC_SHIFT 20
+#define IPV6_HDR_FL_MASK ((1u << IPV6_HDR_TC_SHIFT) - 1)
+#define IPV6_HDR_TC_MASK (0xf << IPV6_HDR_TC_SHIFT)
+
 /**
  * Process the pseudo-header checksum of an IPv6 header.
  *
index 15d93d9..c028027 100644 (file)
@@ -3,12 +3,11 @@ DPDK_17.11 {
 
        eal_parse_pci_BDF;
        eal_parse_pci_DomBDF;
-       rte_pci_addr_cmp;
-       rte_pci_addr_parse;
-       rte_pci_device_name;
        pci_map_resource;
        pci_unmap_resource;
        rte_eal_compare_pci_addr;
+       rte_pci_addr_cmp;
+       rte_pci_addr_parse;
        rte_pci_device_name;
 
        local: *;
index 4c6fed4..173ec75 100644 (file)
@@ -200,6 +200,38 @@ fdset_del(struct fdset *pfdset, int fd)
        return dat;
 }
 
+/**
+ *  Unregister the fd from the fdset.
+ *
+ *  If parameters are invalid, return directly -2.
+ *  And check whether fd is busy, if yes, return -1.
+ *  Otherwise, try to delete the fd from fdset and
+ *  return true.
+ */
+int
+fdset_try_del(struct fdset *pfdset, int fd)
+{
+       int i;
+
+       if (pfdset == NULL || fd == -1)
+               return -2;
+
+       pthread_mutex_lock(&pfdset->fd_mutex);
+       i = fdset_find_fd(pfdset, fd);
+       if (i != -1 && pfdset->fd[i].busy) {
+               pthread_mutex_unlock(&pfdset->fd_mutex);
+               return -1;
+       }
+
+       if (i != -1) {
+               pfdset->fd[i].fd = -1;
+               pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+               pfdset->fd[i].dat = NULL;
+       }
+
+       pthread_mutex_unlock(&pfdset->fd_mutex);
+       return 0;
+}
 
 /**
  * This functions runs in infinite blocking loop until there is no fd in
index 90d34db..c008838 100644 (file)
@@ -63,6 +63,7 @@ int fdset_add(struct fdset *pfdset, int fd,
        fd_cb rcb, fd_cb wcb, void *dat);
 
 void *fdset_del(struct fdset *pfdset, int fd);
+int fdset_try_del(struct fdset *pfdset, int fd);
 
 void *fdset_event_dispatch(void *arg);
 
index 811e6bf..f816601 100644 (file)
@@ -181,6 +181,11 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
                msgh.msg_control = control;
                msgh.msg_controllen = sizeof(control);
                cmsg = CMSG_FIRSTHDR(&msgh);
+               if (cmsg == NULL) {
+                       RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n");
+                       errno = EINVAL;
+                       return -1;
+               }
                cmsg->cmsg_len = CMSG_LEN(fdsize);
                cmsg->cmsg_level = SOL_SOCKET;
                cmsg->cmsg_type = SCM_RIGHTS;
@@ -756,13 +761,25 @@ rte_vhost_driver_unregister(const char *path)
                                vhost_user_remove_reconnect(vsocket);
                        }
 
+again:
                        pthread_mutex_lock(&vsocket->conn_mutex);
                        for (conn = TAILQ_FIRST(&vsocket->conn_list);
                             conn != NULL;
                             conn = next) {
                                next = TAILQ_NEXT(conn, next);
 
-                               fdset_del(&vhost_user.fdset, conn->connfd);
+                               /*
+                                * If r/wcb is executing, release the
+                                * conn_mutex lock, and try again since
+                                * the r/wcb may use the conn_mutex lock.
+                                */
+                               if (fdset_try_del(&vhost_user.fdset,
+                                                 conn->connfd) == -1) {
+                                       pthread_mutex_unlock(
+                                                       &vsocket->conn_mutex);
+                                       goto again;
+                               }
+
                                RTE_LOG(INFO, VHOST_CONFIG,
                                        "free connfd = %d for device '%s'\n",
                                        conn->connfd, path);
index 50e654d..c0efb31 100644 (file)
@@ -476,7 +476,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg)
 
        if (vq->enabled && (dev->features &
                                (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) {
-               dev = translate_ring_addresses(dev, msg->payload.state.index);
+               dev = translate_ring_addresses(dev, msg->payload.addr.index);
                if (!dev)
                        return -1;
 
@@ -501,7 +501,7 @@ vhost_user_set_vring_base(struct virtio_net *dev,
        return 0;
 }
 
-static void
+static int
 add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
                   uint64_t host_phys_addr, uint64_t size)
 {
@@ -511,6 +511,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
                dev->max_guest_pages *= 2;
                dev->guest_pages = realloc(dev->guest_pages,
                                        dev->max_guest_pages * sizeof(*page));
+               if (!dev->guest_pages) {
+                       RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n");
+                       return -1;
+               }
        }
 
        if (dev->nr_guest_pages > 0) {
@@ -519,7 +523,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
                if (host_phys_addr == last_page->host_phys_addr +
                                      last_page->size) {
                        last_page->size += size;
-                       return;
+                       return 0;
                }
        }
 
@@ -527,9 +531,11 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
        page->guest_phys_addr = guest_phys_addr;
        page->host_phys_addr  = host_phys_addr;
        page->size = size;
+
+       return 0;
 }
 
-static void
+static int
 add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
                uint64_t page_size)
 {
@@ -543,7 +549,9 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
        size = page_size - (guest_phys_addr & (page_size - 1));
        size = RTE_MIN(size, reg_size);
 
-       add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+       if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0)
+               return -1;
+
        host_user_addr  += size;
        guest_phys_addr += size;
        reg_size -= size;
@@ -552,12 +560,16 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
                size = RTE_MIN(reg_size, page_size);
                host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)
                                                  host_user_addr);
-               add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size);
+               if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr,
+                               size) < 0)
+                       return -1;
 
                host_user_addr  += size;
                guest_phys_addr += size;
                reg_size -= size;
        }
+
+       return 0;
 }
 
 #ifdef RTE_LIBRTE_VHOST_DEBUG
@@ -705,7 +717,12 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
                                      mmap_offset;
 
                if (dev->dequeue_zero_copy)
-                       add_guest_pages(dev, reg, alignment);
+                       if (add_guest_pages(dev, reg, alignment) < 0) {
+                               RTE_LOG(ERR, VHOST_CONFIG,
+                                       "adding guest pages to region %u failed.\n",
+                                       i);
+                               goto err_mmap;
+                       }
 
                RTE_LOG(INFO, VHOST_CONFIG,
                        "guest memory region %u, size: 0x%" PRIx64 "\n"
@@ -857,8 +874,8 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 
        dev->flags &= ~VIRTIO_DEV_READY;
 
-       /* Here we are safe to get the last used index */
-       msg->payload.state.num = vq->last_used_idx;
+       /* Here we are safe to get the last avail index */
+       msg->payload.state.num = vq->last_avail_idx;
 
        RTE_LOG(INFO, VHOST_CONFIG,
                "vring base idx:%d file:%d\n", msg->payload.state.index,
@@ -873,6 +890,11 @@ vhost_user_get_vring_base(struct virtio_net *dev,
 
        vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
 
+       if (vq->callfd >= 0)
+               close(vq->callfd);
+
+       vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+
        if (dev->dequeue_zero_copy)
                free_zmbufs(vq);
        rte_free(vq->shadow_used_ring);
@@ -967,7 +989,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
         * mmap from 0 to workaround a hugepage mmap bug: mmap will
         * fail when offset is not page size aligned.
         */
-       addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+       addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
        close(fd);
        if (addr == MAP_FAILED) {
                RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
index a013c07..ecfabca 100644 (file)
@@ -297,7 +297,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
                        rte_memcpy((void *)(uintptr_t)dst,
                                        (void *)(uintptr_t)src, len);
 
-                       PRINT_PACKET(dev, (uintptr_t)dst, len, 0);
+                       PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0);
                        vhost_log_write(dev, guest_addr, len);
                        remain -= len;
                        guest_addr += len;
@@ -796,7 +796,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
                                                        len);
 
                                        PRINT_PACKET(dev, (uintptr_t)dst,
-                                                       len, 0);
+                                                       (uint32_t)len, 0);
                                        vhost_log_write(dev, guest_addr, len);
 
                                        remain -= len;
@@ -1234,7 +1234,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
        rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
 
        PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-                       desc_chunck_len, 0);
+                       (uint32_t)desc_chunck_len, 0);
 
        mbuf_offset = 0;
        mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
@@ -1322,7 +1322,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
                        desc_avail  = desc->len;
 
                        PRINT_PACKET(dev, (uintptr_t)desc_addr,
-                                       desc_chunck_len, 0);
+                                       (uint32_t)desc_chunck_len, 0);
                } else if (unlikely(desc_chunck_len == 0)) {
                        desc_chunck_len = desc_avail;
                        desc_gaddr += desc_offset;
@@ -1337,7 +1337,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
                        desc_offset = 0;
 
                        PRINT_PACKET(dev, (uintptr_t)desc_addr,
-                                       desc_chunck_len, 0);
+                                       (uint32_t)desc_chunck_len, 0);
                }
 
                /*
index 9736341..59cf6b1 100644 (file)
@@ -64,7 +64,6 @@ notemplate:
        @echo "use T=template from the following list:"
        @$(MAKE) -rR showconfigs | sed 's,^,  ,'
 
-
 .PHONY: defconfig
 defconfig:
        @$(MAKE) config T=$(shell \
@@ -75,15 +74,25 @@ defconfig:
                         print "arm-armv7a"} \
                 else if ($$0 == "ppc64") { \
                         print "ppc_64-power8"} \
+                else if ($$0 == "amd64") { \
+                        print "x86_64-native"} \
                 else { \
-                        printf "%s-native", $$0} }')-$(shell \
+                        printf "%s-native", $$0} }' \
+               )-$(shell \
                 uname | awk '{ \
                 if ($$0 == "Linux") { \
                         print "linuxapp"} \
                 else { \
-                        print "bsdapp"} }')-$(shell \
-                ${CC} -v 2>&1 | \
-                grep " version " | cut -d ' ' -f 1)
+                        print "bsdapp"} }' \
+               )-$(shell \
+               ${CC} --version | grep -o 'cc\|gcc\|icc\|clang' | awk \
+               '{ \
+               if ($$1 == "cc") { \
+                       print "gcc" } \
+               else { \
+                       print $$1 } \
+               }' \
+               )
 
 .PHONY: config
 ifeq ($(RTE_CONFIG_TEMPLATE),)
index 8681e4c..1bd171e 100644 (file)
@@ -30,7 +30,7 @@
 # OF THE POSSIBILITY OF SUCH DAMAGE.
 
 Name: dpdk
-Version: 17.11.2
+Version: 17.11.3
 Release: 1
 Packager: packaging@6wind.com
 URL: http://dpdk.org
index 7997b48..83cf730 100644 (file)
@@ -6484,7 +6484,7 @@ test_multi_session_random_usage(void)
                sessions[i] = rte_cryptodev_sym_session_create(
                                ts_params->session_mpool);
 
-               rte_memcpy(&ut_paramz[i].ut_params, &testsuite_params,
+               rte_memcpy(&ut_paramz[i].ut_params, &unittest_params,
                                sizeof(struct crypto_unittest_params));
 
                test_AES_CBC_HMAC_SHA512_decrypt_create_session_params(
index 7d69887..4a62a92 100644 (file)
@@ -60,7 +60,7 @@ struct worker_stats worker_stats[RTE_MAX_LCORE];
  * worker thread used for testing the time to do a round-trip of a cache
  * line between two cores and back again
  */
-static void
+static int
 flip_bit(volatile uint64_t *arg)
 {
        uint64_t old_val = 0;
@@ -70,6 +70,7 @@ flip_bit(volatile uint64_t *arg)
                old_val = *arg;
                *arg = 0;
        }
+       return 0;
 }
 
 /*
index 310109e..473ea11 100644 (file)
@@ -61,7 +61,7 @@
 #define memtest "memtest"
 #define memtest1 "memtest1"
 #define memtest2 "memtest2"
-#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
+#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 20)
 #define launch_proc(ARGV) process_dup(ARGV, \
                sizeof(ARGV)/(sizeof(ARGV[0])), __func__)
 
@@ -1166,10 +1166,11 @@ test_memory_flags(void)
 #ifdef RTE_EXEC_ENV_BSDAPP
        int i, num_sockets = 1;
 #else
-       int i, num_sockets = get_number_of_sockets();
+       int i, num_sockets = RTE_MIN(get_number_of_sockets(),
+                       RTE_MAX_NUMA_NODES);
 #endif
 
-       if (num_sockets <= 0 || num_sockets > RTE_MAX_NUMA_NODES) {
+       if (num_sockets <= 0) {
                printf("Error - cannot get number of sockets!\n");
                return -1;
        }
index 37ead50..56b0fd0 100644 (file)
@@ -355,17 +355,17 @@ test_mempool_sp_sc(void)
        }
        if (rte_mempool_lookup("test_mempool_sp_sc") != mp_spsc) {
                printf("Cannot lookup mempool from its name\n");
-               rte_mempool_free(mp_spsc);
-               RET_ERR();
+               ret = -1;
+               goto err;
        }
        lcore_next = rte_get_next_lcore(lcore_id, 0, 1);
        if (lcore_next >= RTE_MAX_LCORE) {
-               rte_mempool_free(mp_spsc);
-               RET_ERR();
+               ret = -1;
+               goto err;
        }
        if (rte_eal_lcore_role(lcore_next) != ROLE_RTE) {
-               rte_mempool_free(mp_spsc);
-               RET_ERR();
+               ret = -1;
+               goto err;
        }
        rte_spinlock_init(&scsp_spinlock);
        memset(scsp_obj_table, 0, sizeof(scsp_obj_table));
@@ -376,7 +376,10 @@ test_mempool_sp_sc(void)
 
        if (rte_eal_wait_lcore(lcore_next) < 0)
                ret = -1;
+
+err:
        rte_mempool_free(mp_spsc);
+       mp_spsc = NULL;
 
        return ret;
 }
index 429f6eb..6f2c230 100644 (file)
@@ -175,11 +175,11 @@ test_reorder_insert(void)
        b = rte_reorder_create("test_insert", rte_socket_id(), size);
        TEST_ASSERT_NOT_NULL(b, "Failed to create reorder buffer");
 
-       ret = rte_mempool_get_bulk(p, (void *)bufs, num_bufs);
-       TEST_ASSERT_SUCCESS(ret, "Error getting mbuf from pool");
-
-       for (i = 0; i < num_bufs; i++)
+       for (i = 0; i < num_bufs; i++) {
+               bufs[i] = rte_pktmbuf_alloc(p);
+               TEST_ASSERT_NOT_NULL(bufs[i], "Packet allocation failed\n");
                bufs[i]->seqn = i;
+       }
 
        /* This should fill up order buffer:
         * reorder_seq = 0
@@ -194,6 +194,7 @@ test_reorder_insert(void)
                        ret = -1;
                        goto exit;
                }
+               bufs[i] = NULL;
        }
 
        /* early packet - should move mbufs to ready buf and move sequence window
@@ -208,6 +209,7 @@ test_reorder_insert(void)
                ret = -1;
                goto exit;
        }
+       bufs[4] = NULL;
 
        /* early packet from current sequence window - full ready buffer */
        bufs[5]->seqn = 2 * size;
@@ -218,6 +220,7 @@ test_reorder_insert(void)
                ret = -1;
                goto exit;
        }
+       bufs[5] = NULL;
 
        /* late packet */
        bufs[6]->seqn = 3 * size;
@@ -228,11 +231,15 @@ test_reorder_insert(void)
                ret = -1;
                goto exit;
        }
+       bufs[6] = NULL;
 
        ret = 0;
 exit:
-       rte_mempool_put_bulk(p, (void *)bufs, num_bufs);
        rte_reorder_free(b);
+       for (i = 0; i < num_bufs; i++) {
+               if (bufs[i] != NULL)
+                       rte_pktmbuf_free(bufs[i]);
+       }
        return ret;
 }
 
@@ -248,6 +255,10 @@ test_reorder_drain(void)
        int ret = 0;
        unsigned i, cnt;
 
+       /* initialize all robufs to NULL */
+       for (i = 0; i < num_bufs; i++)
+               robufs[i] = NULL;
+
        /* This would create a reorder buffer instance consisting of:
         * reorder_seq = 0
         * ready_buf: RB[size] = {NULL, NULL, NULL, NULL}
@@ -256,9 +267,6 @@ test_reorder_drain(void)
        b = rte_reorder_create("test_drain", rte_socket_id(), size);
        TEST_ASSERT_NOT_NULL(b, "Failed to create reorder buffer");
 
-       ret = rte_mempool_get_bulk(p, (void *)bufs, num_bufs);
-       TEST_ASSERT_SUCCESS(ret, "Error getting mbuf from pool");
-
        /* Check no drained packets if reorder is empty */
        cnt = rte_reorder_drain(b, robufs, 1);
        if (cnt != 0) {
@@ -268,8 +276,11 @@ test_reorder_drain(void)
                goto exit;
        }
 
-       for (i = 0; i < num_bufs; i++)
+       for (i = 0; i < num_bufs; i++) {
+               bufs[i] = rte_pktmbuf_alloc(p);
+               TEST_ASSERT_NOT_NULL(bufs[i], "Packet allocation failed\n");
                bufs[i]->seqn = i;
+       }
 
        /* Insert packet with seqn 1:
         * reorder_seq = 0
@@ -277,6 +288,7 @@ test_reorder_drain(void)
         * OB[] = {1, NULL, NULL, NULL}
         */
        rte_reorder_insert(b, bufs[1]);
+       bufs[1] = NULL;
 
        cnt = rte_reorder_drain(b, robufs, 1);
        if (cnt != 1) {
@@ -285,6 +297,8 @@ test_reorder_drain(void)
                ret = -1;
                goto exit;
        }
+       if (robufs[0] != NULL)
+               rte_pktmbuf_free(robufs[i]);
 
        /* Insert more packets
         * RB[] = {NULL, NULL, NULL, NULL}
@@ -292,18 +306,22 @@ test_reorder_drain(void)
         */
        rte_reorder_insert(b, bufs[2]);
        rte_reorder_insert(b, bufs[3]);
+       bufs[2] = NULL;
+       bufs[3] = NULL;
 
        /* Insert more packets
         * RB[] = {NULL, NULL, NULL, NULL}
         * OB[] = {NULL, 2, 3, 4}
         */
        rte_reorder_insert(b, bufs[4]);
+       bufs[4] = NULL;
 
        /* Insert more packets
         * RB[] = {2, 3, 4, NULL}
         * OB[] = {NULL, NULL, 7, NULL}
         */
        rte_reorder_insert(b, bufs[7]);
+       bufs[7] = NULL;
 
        /* drained expected packets */
        cnt = rte_reorder_drain(b, robufs, 4);
@@ -313,6 +331,10 @@ test_reorder_drain(void)
                ret = -1;
                goto exit;
        }
+       for (i = 0; i < 3; i++) {
+               if (robufs[i] != NULL)
+                       rte_pktmbuf_free(robufs[i]);
+       }
 
        /*
         * RB[] = {NULL, NULL, NULL, NULL}
@@ -327,8 +349,13 @@ test_reorder_drain(void)
        }
        ret = 0;
 exit:
-       rte_mempool_put_bulk(p, (void *)bufs, num_bufs);
        rte_reorder_free(b);
+       for (i = 0; i < num_bufs; i++) {
+               if (bufs[i] != NULL)
+                       rte_pktmbuf_free(bufs[i]);
+               if (robufs[i] != NULL)
+                       rte_pktmbuf_free(robufs[i]);
+       }
        return ret;
 }
 
index a6fef72..fcd3f37 100644 (file)
@@ -98,9 +98,9 @@ rte_pipeline_table_action_handler_hit
 table_action_stub_hit(struct rte_pipeline *p, struct rte_mbuf **pkts,
        uint64_t pkts_mask, struct rte_pipeline_table_entry **entry, void *arg);
 
-rte_pipeline_table_action_handler_miss
+static int
 table_action_stub_miss(struct rte_pipeline *p, struct rte_mbuf **pkts,
-       uint64_t pkts_mask, struct rte_pipeline_table_entry **entry, void *arg);
+       uint64_t pkts_mask, struct rte_pipeline_table_entry *entry, void *arg);
 
 rte_pipeline_table_action_handler_hit
 table_action_0x00(__attribute__((unused)) struct rte_pipeline *p,
@@ -130,11 +130,11 @@ table_action_stub_hit(__attribute__((unused)) struct rte_pipeline *p,
        return 0;
 }
 
-rte_pipeline_table_action_handler_miss
+static int
 table_action_stub_miss(struct rte_pipeline *p,
        __attribute__((unused)) struct rte_mbuf **pkts,
        uint64_t pkts_mask,
-       __attribute__((unused)) struct rte_pipeline_table_entry **entry,
+       __attribute__((unused)) struct rte_pipeline_table_entry *entry,
        __attribute__((unused)) void *arg)
 {
        printf("STUB Table Action Miss - setting mask to 0x%"PRIx64"\n",
@@ -546,8 +546,7 @@ test_table_pipeline(void)
 
        /* TEST - one packet per port */
        action_handler_hit = NULL;
-       action_handler_miss =
-               (rte_pipeline_table_action_handler_miss) table_action_stub_miss;
+       action_handler_miss = table_action_stub_miss;
        table_entry_default_action = RTE_PIPELINE_ACTION_PORT;
        override_miss_mask = 0x01; /* one packet per port */
        setup_pipeline(e_TEST_STUB);
@@ -582,8 +581,7 @@ test_table_pipeline(void)
 
        printf("TEST - two tables, hitmask override to 0x01\n");
        connect_miss_action_to_table = 1;
-       action_handler_miss =
-               (rte_pipeline_table_action_handler_miss)table_action_stub_miss;
+       action_handler_miss = table_action_stub_miss;
        override_miss_mask = 0x01;
        setup_pipeline(e_TEST_STUB);
        if (test_pipeline_single_filter(e_TEST_STUB, 2) < 0)