New upstream version 18.11.1 85/18885/1
authorChristian Ehrhardt <christian.ehrhardt@canonical.com>
Mon, 15 Apr 2019 12:36:48 +0000 (14:36 +0200)
committerChristian Ehrhardt <christian.ehrhardt@canonical.com>
Mon, 15 Apr 2019 12:39:46 +0000 (14:39 +0200)
Change-Id: Ic52e74a9ed6f3ae06acea4a27357bd7153efc2a3
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
206 files changed:
app/pdump/main.c
app/test-bbdev/test_bbdev_perf.c
app/test-eventdev/test_perf_common.c
app/test-pmd/cmdline.c
app/test-pmd/cmdline_flow.c
app/test-pmd/cmdline_tm.c
app/test-pmd/testpmd.c
config/arm/meson.build
config/x86/meson.build
devtools/check-git-log.sh
devtools/check-symbol-change.sh
devtools/checkpatches.sh
devtools/test-meson-builds.sh
doc/guides/conf.py
doc/guides/contributing/documentation.rst
doc/guides/cryptodevs/aesni_mb.rst
doc/guides/cryptodevs/qat.rst
doc/guides/linux_gsg/sys_reqs.rst
doc/guides/nics/i40e.rst
doc/guides/nics/ifc.rst
doc/guides/nics/mlx5.rst
doc/guides/prog_guide/env_abstraction_layer.rst
doc/guides/prog_guide/generic_receive_offload_lib.rst
doc/guides/prog_guide/power_man.rst
doc/guides/prog_guide/rte_flow.rst
doc/guides/rel_notes/known_issues.rst
doc/guides/rel_notes/release_18_11.rst
doc/guides/sample_app_ug/flow_filtering.rst
doc/guides/testpmd_app_ug/run_app.rst
doc/guides/testpmd_app_ug/testpmd_funcs.rst
drivers/baseband/turbo_sw/Makefile
drivers/bus/dpaa/dpaa_bus.c
drivers/bus/fslmc/fslmc_bus.c
drivers/bus/fslmc/fslmc_vfio.c
drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
drivers/bus/fslmc/qbman/qbman_portal.c
drivers/bus/fslmc/qbman/qbman_sys.h
drivers/bus/ifpga/ifpga_bus.c
drivers/bus/ifpga/rte_bus_ifpga.h
drivers/bus/pci/linux/pci_vfio.c
drivers/bus/vmbus/linux/vmbus_uio.c
drivers/common/qat/qat_qp.c
drivers/common/qat/qat_qp.h
drivers/compress/qat/qat_comp.c
drivers/compress/qat/qat_comp.h
drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c
drivers/crypto/caam_jr/caam_jr.c
drivers/crypto/caam_jr/caam_jr_uio.c
drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
drivers/crypto/dpaa_sec/dpaa_sec.c
drivers/crypto/null/null_crypto_pmd_ops.c
drivers/crypto/openssl/rte_openssl_pmd_ops.c
drivers/crypto/qat/qat_sym.c
drivers/crypto/qat/qat_sym_session.c
drivers/crypto/snow3g/rte_snow3g_pmd_ops.c
drivers/crypto/zuc/rte_zuc_pmd_ops.c
drivers/net/af_packet/rte_eth_af_packet.c
drivers/net/atlantic/atl_rxtx.c
drivers/net/avf/base/avf_adminq_cmd.h
drivers/net/bnx2x/bnx2x.c
drivers/net/bnx2x/bnx2x.h
drivers/net/bnx2x/bnx2x_ethdev.c
drivers/net/bnx2x/bnx2x_ethdev.h
drivers/net/bnx2x/ecore_sp.c
drivers/net/bonding/rte_eth_bond_api.c
drivers/net/bonding/rte_eth_bond_pmd.c
drivers/net/cxgbe/cxgbe_flow.c
drivers/net/cxgbe/cxgbe_main.c
drivers/net/dpaa/dpaa_ethdev.c
drivers/net/dpaa2/dpaa2_ethdev.c
drivers/net/e1000/em_rxtx.c
drivers/net/e1000/igb_ethdev.c
drivers/net/ena/ena_ethdev.c
drivers/net/enic/enic_clsf.c
drivers/net/enic/enic_main.c
drivers/net/fm10k/fm10k_ethdev.c
drivers/net/fm10k/fm10k_rxtx.c
drivers/net/i40e/base/i40e_type.h
drivers/net/i40e/i40e_ethdev.c
drivers/net/i40e/i40e_ethdev.h
drivers/net/i40e/i40e_ethdev_vf.c
drivers/net/i40e/i40e_pf.c
drivers/net/i40e/i40e_rxtx.c
drivers/net/i40e/rte_pmd_i40e.c
drivers/net/ifc/base/ifcvf.h
drivers/net/ifc/ifcvf_vdpa.c
drivers/net/ixgbe/base/ixgbe_82599.c
drivers/net/ixgbe/base/ixgbe_phy.c
drivers/net/ixgbe/base/ixgbe_phy.h
drivers/net/ixgbe/base/ixgbe_type.h
drivers/net/ixgbe/base/ixgbe_x550.c
drivers/net/ixgbe/ixgbe_ethdev.c
drivers/net/ixgbe/ixgbe_ethdev.h
drivers/net/ixgbe/ixgbe_pf.c
drivers/net/ixgbe/ixgbe_rxtx.c
drivers/net/mlx5/Makefile
drivers/net/mlx5/meson.build
drivers/net/mlx5/mlx5.c
drivers/net/mlx5/mlx5_flow.c
drivers/net/mlx5/mlx5_flow_tcf.c
drivers/net/mlx5/mlx5_flow_verbs.c
drivers/net/mlx5/mlx5_rxq.c
drivers/net/mlx5/mlx5_rxtx.h
drivers/net/netvsc/hn_ethdev.c
drivers/net/netvsc/hn_nvs.c
drivers/net/netvsc/hn_rxtx.c
drivers/net/netvsc/hn_var.h
drivers/net/qede/qede_rxtx.c
drivers/net/qede/qede_rxtx.h
drivers/net/sfc/base/efx_tx.c
drivers/net/sfc/efsys.h
drivers/net/sfc/sfc.h
drivers/net/sfc/sfc_ef10_tx.c
drivers/net/sfc/sfc_ethdev.c
drivers/net/sfc/sfc_log.h
drivers/net/sfc/sfc_port.c
drivers/net/sfc/sfc_tso.h
drivers/net/sfc/sfc_tx.c
drivers/net/tap/rte_eth_tap.c
drivers/net/tap/tap_tcmsgs.c
drivers/net/vhost/rte_eth_vhost.c
drivers/net/virtio/virtio_user/virtio_user_dev.c
drivers/net/virtio/virtqueue.h
drivers/net/vmxnet3/vmxnet3_rxtx.c
drivers/raw/dpaa2_qdma/dpaa2_qdma.c
drivers/raw/ifpga_rawdev/base/opae_hw_api.c
drivers/raw/ifpga_rawdev/base/opae_hw_api.h
drivers/raw/ifpga_rawdev/ifpga_rawdev.c
examples/bond/main.c
examples/flow_filtering/flow_blocks.c
examples/ipsec-secgw/ipsec-secgw.c
examples/ipsec-secgw/ipsec.c
examples/ipsec-secgw/ipsec.h
examples/ipsec-secgw/sa.c
examples/ipsec-secgw/sp4.c
examples/ipsec-secgw/sp6.c
examples/kni/main.c
examples/tep_termination/main.c
examples/vhost/main.c
examples/vhost_crypto/main.c
kernel/linux/kni/compat.h
kernel/linux/kni/ethtool/igb/igb_main.c
kernel/linux/kni/ethtool/igb/kcompat.h
kernel/linux/kni/ethtool/ixgbe/kcompat.h
lib/librte_compressdev/rte_comp.h
lib/librte_eal/bsdapp/eal/eal.c
lib/librte_eal/common/eal_common_memory.c
lib/librte_eal/common/eal_common_memzone.c
lib/librte_eal/common/eal_common_options.c
lib/librte_eal/common/eal_common_proc.c
lib/librte_eal/common/eal_filesystem.h
lib/librte_eal/common/eal_internal_cfg.h
lib/librte_eal/common/eal_options.h
lib/librte_eal/common/eal_private.h
lib/librte_eal/common/hotplug_mp.c
lib/librte_eal/common/include/generic/rte_atomic.h
lib/librte_eal/common/include/rte_malloc.h
lib/librte_eal/common/include/rte_version.h
lib/librte_eal/common/malloc_elem.c
lib/librte_eal/common/malloc_mp.c
lib/librte_eal/common/rte_malloc.c
lib/librte_eal/common/rte_option.c
lib/librte_eal/linuxapp/eal/eal.c
lib/librte_eal/linuxapp/eal/eal_memalloc.c
lib/librte_eal/linuxapp/eal/eal_memory.c
lib/librte_eal/linuxapp/eal/eal_vfio.c
lib/librte_eal/linuxapp/eal/eal_vfio.h
lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
lib/librte_efd/rte_efd.c
lib/librte_ethdev/rte_ethdev.c
lib/librte_ethdev/rte_ethdev.h
lib/librte_eventdev/rte_event_eth_rx_adapter.c
lib/librte_eventdev/rte_event_eth_tx_adapter.c
lib/librte_eventdev/rte_event_timer_adapter.h
lib/librte_eventdev/rte_eventdev.h
lib/librte_gro/gro_tcp4.c
lib/librte_gro/gro_tcp4.h
lib/librte_gro/gro_vxlan_tcp4.c
lib/librte_gso/gso_common.h
lib/librte_hash/rte_cuckoo_hash.c
lib/librte_hash/rte_cuckoo_hash.h
lib/librte_ip_frag/rte_ip_frag.h
lib/librte_ip_frag/rte_ipv6_fragmentation.c
lib/librte_net/rte_ip.h
lib/librte_sched/rte_sched.c
lib/librte_telemetry/rte_telemetry.c
lib/librte_telemetry/rte_telemetry_internal.h
lib/librte_timer/rte_timer.c
lib/librte_vhost/fd_man.c
lib/librte_vhost/fd_man.h
lib/librte_vhost/socket.c
lib/librte_vhost/vhost.h
lib/librte_vhost/vhost_crypto.c
lib/librte_vhost/vhost_user.c
lib/librte_vhost/virtio_net.c
lib/meson.build
meson.build
mk/rte.cpuflags.mk
mk/rte.sdkinstall.mk
mk/toolchain/gcc/rte.toolchain-compat.mk
pkg/dpdk.spec
test/test/meson.build
test/test/test_cryptodev_blockcipher.c
test/test/test_eal_flags.c
test/test/test_hash_readwrite.c
test/test/test_metrics.c

index 9e86bf6..5e183ea 100644 (file)
@@ -494,6 +494,7 @@ cleanup_pdump_resources(void)
 {
        int i;
        struct pdump_tuples *pt;
+       char name[RTE_ETH_NAME_MAX_LEN];
 
        /* disable pdump and free the pdump_tuple resources */
        for (i = 0; i < num_tuples; i++) {
@@ -510,6 +511,14 @@ cleanup_pdump_resources(void)
                        free_ring_data(pt->rx_ring, pt->rx_vdev_id, &pt->stats);
                if (pt->dir & RTE_PDUMP_FLAG_TX)
                        free_ring_data(pt->tx_ring, pt->tx_vdev_id, &pt->stats);
+
+               /* Remove the vdev created */
+               rte_eth_dev_get_name_by_port(pt->rx_vdev_id, name);
+               rte_eal_hotplug_remove("vdev", name);
+
+               rte_eth_dev_get_name_by_port(pt->tx_vdev_id, name);
+               rte_eal_hotplug_remove("vdev", name);
+
        }
        cleanup_rings();
 }
index fbe6cc9..fa26deb 100644 (file)
@@ -1953,7 +1953,10 @@ offload_latency_test_dec(struct rte_mempool *mempool, struct test_buffers *bufs,
                if (unlikely(num_to_process - dequeued < burst_sz))
                        burst_sz = num_to_process - dequeued;
 
-               rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
+               ret = rte_bbdev_dec_op_alloc_bulk(mempool, ops_enq, burst_sz);
+               TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
+                               burst_sz);
+
                if (test_vector.op_type != RTE_BBDEV_OP_NONE)
                        copy_reference_dec_op(ops_enq, burst_sz, dequeued,
                                        bufs->inputs,
@@ -2035,7 +2038,10 @@ offload_latency_test_enc(struct rte_mempool *mempool, struct test_buffers *bufs,
                if (unlikely(num_to_process - dequeued < burst_sz))
                        burst_sz = num_to_process - dequeued;
 
-               rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
+               ret = rte_bbdev_enc_op_alloc_bulk(mempool, ops_enq, burst_sz);
+               TEST_ASSERT_SUCCESS(ret, "Allocation failed for %d ops",
+                               burst_sz);
+
                if (test_vector.op_type != RTE_BBDEV_OP_NONE)
                        copy_reference_enc_op(ops_enq, burst_sz, dequeued,
                                        bufs->inputs,
index 8618775..f99a6a6 100644 (file)
@@ -327,7 +327,8 @@ perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
                }
 
                if (new_cycles - dead_lock_cycles > dead_lock_sample &&
-                               opt->prod_type == EVT_PROD_TYPE_SYNT) {
+                   (opt->prod_type == EVT_PROD_TYPE_SYNT ||
+                    opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR)) {
                        remaining = t->outstand_pkts - processed_pkts(t);
                        if (dead_lock_remaining == remaining) {
                                rte_event_dev_dump(opt->dev_id, stdout);
index 1275074..51704b5 100644 (file)
@@ -15567,10 +15567,9 @@ static void cmd_set_mplsogre_encap_parsed(void *parsed_result,
        struct cmd_set_mplsogre_encap_result *res = parsed_result;
        union {
                uint32_t mplsogre_label;
-               uint8_t label[3];
+               uint8_t label[4];
        } id = {
-               .mplsogre_label =
-                       rte_cpu_to_be_32(res->label) & RTE_BE32(0x00ffffff),
+               .mplsogre_label = rte_cpu_to_be_32(res->label<<12),
        };
 
        if (strcmp(res->mplsogre, "mplsogre_encap") == 0)
@@ -15583,7 +15582,7 @@ static void cmd_set_mplsogre_encap_parsed(void *parsed_result,
                mplsogre_encap_conf.select_ipv4 = 0;
        else
                return;
-       rte_memcpy(mplsogre_encap_conf.label, &id.label[1], 3);
+       rte_memcpy(mplsogre_encap_conf.label, &id.label, 3);
        if (mplsogre_encap_conf.select_ipv4) {
                IPV4_ADDR_TO_UINT(res->ip_src, mplsogre_encap_conf.ipv4_src);
                IPV4_ADDR_TO_UINT(res->ip_dst, mplsogre_encap_conf.ipv4_dst);
@@ -15804,10 +15803,9 @@ static void cmd_set_mplsoudp_encap_parsed(void *parsed_result,
        struct cmd_set_mplsoudp_encap_result *res = parsed_result;
        union {
                uint32_t mplsoudp_label;
-               uint8_t label[3];
+               uint8_t label[4];
        } id = {
-               .mplsoudp_label =
-                       rte_cpu_to_be_32(res->label) & RTE_BE32(0x00ffffff),
+               .mplsoudp_label = rte_cpu_to_be_32(res->label<<12),
        };
 
        if (strcmp(res->mplsoudp, "mplsoudp_encap") == 0)
@@ -15820,7 +15818,7 @@ static void cmd_set_mplsoudp_encap_parsed(void *parsed_result,
                mplsoudp_encap_conf.select_ipv4 = 0;
        else
                return;
-       rte_memcpy(mplsoudp_encap_conf.label, &id.label[1], 3);
+       rte_memcpy(mplsoudp_encap_conf.label, &id.label, 3);
        mplsoudp_encap_conf.udp_src = rte_cpu_to_be_16(res->udp_src);
        mplsoudp_encap_conf.udp_dst = rte_cpu_to_be_16(res->udp_dst);
        if (mplsoudp_encap_conf.select_ipv4) {
@@ -18457,7 +18455,7 @@ cmd_show_tx_metadata_parsed(void *parsed_result,
        }
        if (!strcmp(res->cmd_keyword, "tx_metadata")) {
                printf("Port %u tx_metadata: %u\n", res->cmd_pid,
-                               ports[res->cmd_pid].tx_metadata);
+                       rte_be_to_cpu_32(ports[res->cmd_pid].tx_metadata));
        }
 }
 
index 23ea7cc..5c0108f 100644 (file)
@@ -3808,6 +3808,7 @@ parse_vc_action_mplsogre_encap(struct context *ctx, const struct token *token,
        header += sizeof(gre);
        memcpy(mpls.label_tc_s, mplsogre_encap_conf.label,
               RTE_DIM(mplsogre_encap_conf.label));
+       mpls.label_tc_s[2] |= 0x1;
        memcpy(header, &mpls, sizeof(mpls));
        header += sizeof(mpls);
        action_encap_data->conf.size = header -
@@ -3998,6 +3999,7 @@ parse_vc_action_mplsoudp_encap(struct context *ctx, const struct token *token,
        header += sizeof(udp);
        memcpy(mpls.label_tc_s, mplsoudp_encap_conf.label,
               RTE_DIM(mplsoudp_encap_conf.label));
+       mpls.label_tc_s[2] |= 0x1;
        memcpy(header, &mpls, sizeof(mpls));
        header += sizeof(mpls);
        action_encap_data->conf.size = header -
index 4c76348..1012084 100644 (file)
@@ -1156,18 +1156,18 @@ struct cmd_add_port_tm_node_wred_profile_result {
        uint16_t port_id;
        uint32_t wred_profile_id;
        cmdline_fixed_string_t color_g;
-       uint16_t min_th_g;
-       uint16_t max_th_g;
+       uint64_t min_th_g;
+       uint64_t max_th_g;
        uint16_t maxp_inv_g;
        uint16_t wq_log2_g;
        cmdline_fixed_string_t color_y;
-       uint16_t min_th_y;
-       uint16_t max_th_y;
+       uint64_t min_th_y;
+       uint64_t max_th_y;
        uint16_t maxp_inv_y;
        uint16_t wq_log2_y;
        cmdline_fixed_string_t color_r;
-       uint16_t min_th_r;
-       uint16_t max_th_r;
+       uint64_t min_th_r;
+       uint64_t max_th_r;
        uint16_t maxp_inv_r;
        uint16_t wq_log2_r;
 };
@@ -1206,11 +1206,11 @@ cmdline_parse_token_string_t cmd_add_port_tm_node_wred_profile_color_g =
 cmdline_parse_token_num_t cmd_add_port_tm_node_wred_profile_min_th_g =
        TOKEN_NUM_INITIALIZER(
                struct cmd_add_port_tm_node_wred_profile_result,
-                       min_th_g, UINT16);
+                       min_th_g, UINT64);
 cmdline_parse_token_num_t cmd_add_port_tm_node_wred_profile_max_th_g =
        TOKEN_NUM_INITIALIZER(
                struct cmd_add_port_tm_node_wred_profile_result,
-                       max_th_g, UINT16);
+                       max_th_g, UINT64);
 cmdline_parse_token_num_t cmd_add_port_tm_node_wred_profile_maxp_inv_g =
        TOKEN_NUM_INITIALIZER(
                struct cmd_add_port_tm_node_wred_profile_result,
@@ -1226,11 +1226,11 @@ cmdline_parse_token_string_t cmd_add_port_tm_node_wred_profile_color_y =
 cmdline_parse_token_num_t cmd_add_port_tm_node_wred_profile_min_th_y =
        TOKEN_NUM_INITIALIZER(
                struct cmd_add_port_tm_node_wred_profile_result,
-                       min_th_y, UINT16);
+                       min_th_y, UINT64);
 cmdline_parse_token_num_t cmd_add_port_tm_node_wred_profile_max_th_y =
        TOKEN_NUM_INITIALIZER(
                struct cmd_add_port_tm_node_wred_profile_result,
-                       max_th_y, UINT16);
+                       max_th_y, UINT64);
 cmdline_parse_token_num_t cmd_add_port_tm_node_wred_profile_maxp_inv_y =
        TOKEN_NUM_INITIALIZER(
                struct cmd_add_port_tm_node_wred_profile_result,
@@ -1246,11 +1246,11 @@ cmdline_parse_token_string_t cmd_add_port_tm_node_wred_profile_color_r =
 cmdline_parse_token_num_t cmd_add_port_tm_node_wred_profile_min_th_r =
        TOKEN_NUM_INITIALIZER(
                struct cmd_add_port_tm_node_wred_profile_result,
-                       min_th_r, UINT16);
+                       min_th_r, UINT64);
 cmdline_parse_token_num_t cmd_add_port_tm_node_wred_profile_max_th_r =
        TOKEN_NUM_INITIALIZER(
                struct cmd_add_port_tm_node_wred_profile_result,
-                       max_th_r, UINT16);
+                       max_th_r, UINT64);
 cmdline_parse_token_num_t cmd_add_port_tm_node_wred_profile_maxp_inv_r =
        TOKEN_NUM_INITIALIZER(
                struct cmd_add_port_tm_node_wred_profile_result,
index 4c75587..7b0c8e6 100644 (file)
@@ -2406,9 +2406,13 @@ pmd_test_exit(void)
        if (ports != NULL) {
                no_link_check = 1;
                RTE_ETH_FOREACH_DEV(pt_id) {
-                       printf("\nShutting down port %d...\n", pt_id);
+                       printf("\nStopping port %d...\n", pt_id);
                        fflush(stdout);
                        stop_port(pt_id);
+               }
+               RTE_ETH_FOREACH_DEV(pt_id) {
+                       printf("\nShutting down port %d...\n", pt_id);
+                       fflush(stdout);
                        close_port(pt_id);
 
                        /*
index 3f91725..dae55d6 100644 (file)
@@ -47,6 +47,7 @@ flags_common_default = [
 flags_generic = [
        ['RTE_MACHINE', '"armv8a"'],
        ['RTE_MAX_LCORE', 256],
+       ['RTE_USE_C11_MEM_MODEL', true],
        ['RTE_CACHE_LINE_SIZE', 128]]
 flags_cavium = [
        ['RTE_MACHINE', '"thunderx"'],
@@ -57,11 +58,13 @@ flags_cavium = [
        ['RTE_USE_C11_MEM_MODEL', false]]
 flags_dpaa = [
        ['RTE_MACHINE', '"dpaa"'],
+       ['RTE_USE_C11_MEM_MODEL', true],
        ['RTE_CACHE_LINE_SIZE', 64],
        ['RTE_MAX_NUMA_NODES', 1],
        ['RTE_MAX_LCORE', 16]]
 flags_dpaa2 = [
        ['RTE_MACHINE', '"dpaa2"'],
+       ['RTE_USE_C11_MEM_MODEL', true],
        ['RTE_CACHE_LINE_SIZE', 64],
        ['RTE_MAX_NUMA_NODES', 1],
        ['RTE_MAX_LCORE', 16],
index 33efb5e..9e5952a 100644 (file)
@@ -2,7 +2,16 @@
 # Copyright(c) 2017 Intel Corporation
 
 # for checking defines we need to use the correct compiler flags
-march_opt = '-march=@0@'.format(machine)
+march_opt = ['-march=@0@'.format(machine)]
+
+# get binutils version for the workaround of Bug 97
+ldver = run_command('ld', '-v').stdout().strip()
+if ldver.contains('2.30')
+       if cc.has_argument('-mno-avx512f')
+               march_opt += '-mno-avx512f'
+               message('Binutils 2.30 detected, disabling AVX512 support as workaround for bug #97')
+       endif
+endif
 
 # we require SSE4.2 for DPDK
 sse_errormsg = '''SSE4.2 instruction set is required for DPDK.
index 85d67fb..d39064f 100755 (executable)
@@ -90,9 +90,10 @@ bad=$(echo "$headlines" | grep -E --color=always \
        -e ':.*\<[hsf]w\>' \
        -e ':.*\<l[234]\>' \
        -e ':.*\<api\>' \
-       -e ':.*\<arm\>' \
-       -e ':.*\<armv7\>' \
-       -e ':.*\<armv8\>' \
+       -e ':.*\<ARM\>' \
+       -e ':.*\<(Aarch64|AArch64|AARCH64|Aarch32|AArch32|AARCH32)\>' \
+       -e ':.*\<(Armv7|ARMv7|ArmV7|armV7|ARMV7)\>' \
+       -e ':.*\<(Armv8|ARMv8|ArmV8|armV8|ARMV8)\>' \
        -e ':.*\<crc\>' \
        -e ':.*\<dma\>' \
        -e ':.*\<eeprom\>' \
index 1d21e91..8f986a5 100755 (executable)
@@ -31,6 +31,7 @@ build_map_changes()
                # Triggering this rule sets in_sec to 1, which actives the
                # symbol rule below
                /^.*{/ {
+                       gsub("+", "");
                        if (in_map == 1) {
                                sec=$(NF-1); in_sec=1;
                        }
index ee8debe..3b03b7e 100755 (executable)
@@ -44,6 +44,8 @@ print_usage () {
 }
 
 check_forbidden_additions() { # <patch>
+       res=0
+
        # refrain from new additions of rte_panic() and rte_exit()
        # multiple folders and expressions are separated by spaces
        awk -v FOLDERS="lib drivers" \
@@ -51,7 +53,8 @@ check_forbidden_additions() { # <patch>
                -v RET_ON_FAIL=1 \
                -v MESSAGE='Using rte_panic/rte_exit' \
                -f $(dirname $(readlink -e $0))/check-forbidden-tokens.awk \
-               "$1"
+               "$1" || res=1
+
        # svg figures must be included with wildcard extension
        # because of png conversion for pdf docs
        awk -v FOLDERS='doc' \
@@ -59,7 +62,9 @@ check_forbidden_additions() { # <patch>
                -v RET_ON_FAIL=1 \
                -v MESSAGE='Using explicit .svg extension instead of .*' \
                -f $(dirname $(readlink -e $0))/check-forbidden-tokens.awk \
-               "$1"
+               "$1" || res = 1
+
+       return $res
 }
 
 number=0
index 79109b7..3edc805 100755 (executable)
@@ -24,7 +24,7 @@ build () # <directory> <meson options>
 {
        builddir=$1
        shift
-       if [ ! -d "$builddir" ] ; then
+       if [ ! -f "$builddir/build.ninja" ] ; then
                options="--werror -Dexamples=all $*"
                echo "$MESON $options $srcdir $builddir"
                $MESON $options $srcdir $builddir
index c883306..da99a3f 100644 (file)
@@ -9,6 +9,7 @@ from sphinx import __version__ as sphinx_version
 from sphinx.highlighting import PygmentsBridge
 from pygments.formatters.latex import LatexFormatter
 from os import listdir
+from os import environ
 from os.path import basename
 from os.path import dirname
 from os.path import join as path_join
@@ -37,7 +38,9 @@ html_add_permalinks = ""
 html_show_copyright = False
 highlight_language = 'none'
 
-version = subprocess.check_output(['make', '-sRrC', '../../', 'showversion'])
+# If MAKEFLAGS is exported by the user, garbage text might end up in version
+version = subprocess.check_output(['make', '-sRrC', '../../', 'showversion'],
+                                  env=dict(environ, MAKEFLAGS=""))
 version = version.decode('utf-8').rstrip()
 release = version
 
index c28a95c..c72280a 100644 (file)
@@ -204,6 +204,19 @@ The main required packages can be installed as follows:
    # Red Hat/Fedora, selective install.
    sudo dnf     -y install texlive-collection-latexextra
 
+`Latexmk <http://personal.psu.edu/jcc8/software/latexmk-jcc/>`_ is a perl script
+for running LaTeX for resolving cross references,
+and it also runs auxiliary programs like bibtex, makeindex if necessary, and dvips.
+It has also a number of other useful capabilities (see man 1 latexmk).
+
+.. code-block:: console
+
+   # Ubuntu/Debian.
+   sudo apt-get -y install latexmk
+
+   # Red Hat/Fedora.
+   sudo dnf     -y install latexmk
+
 
 Build commands
 ~~~~~~~~~~~~~~
index 63e060d..d9fd41a 100644 (file)
@@ -1,5 +1,5 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
-    Copyright(c) 2015-2017 Intel Corporation.
+    Copyright(c) 2015-2018 Intel Corporation.
 
 AESN-NI Multi Buffer Crypto Poll Mode Driver
 ============================================
@@ -51,6 +51,8 @@ Limitations
 
 * Chained mbufs are not supported.
 * Only in-place is currently supported (destination address is the same as source address).
+* RTE_CRYPTO_AEAD_AES_GCM only works properly when the multi-buffer library is
+  0.51.0 or newer.
 
 
 Installation
@@ -59,8 +61,8 @@ Installation
 To build DPDK with the AESNI_MB_PMD the user is required to download the multi-buffer
 library from `here <https://github.com/01org/intel-ipsec-mb>`_
 and compile it on their user system before building DPDK.
-The latest version of the library supported by this PMD is v0.50, which
-can be downloaded from `<https://github.com/01org/intel-ipsec-mb/archive/v0.50.zip>`_.
+The latest version of the library supported by this PMD is v0.51, which
+can be downloaded from `<https://github.com/01org/intel-ipsec-mb/archive/v0.51.zip>`.
 
 .. code-block:: console
 
index 9fb9f01..3ca70bc 100644 (file)
@@ -79,10 +79,9 @@ Limitations
 * SNOW 3G (UIA2) and ZUC (EIA3) supported only if hash length and offset fields are byte-multiple.
 * No BSD support as BSD QAT kernel driver not available.
 * ZUC EEA3/EIA3 is not supported by dh895xcc devices
-* Maximum additional authenticated data (AAD) for GCM is 240 bytes long.
+* Maximum additional authenticated data (AAD) for GCM is 240 bytes long and must be passed to the device in a buffer rounded up to the nearest block-size multiple (x16) and padded with zeros.
 * Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
 
-
 Extra notes on KASUMI F9
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
index 29c5f47..1c6f86a 100644 (file)
@@ -70,6 +70,15 @@ Compilation of the DPDK
 
     * libnuma-dev in Debian/Ubuntu;
 
+    .. note::
+
+        On systems with NUMA support, `libnuma-dev` (aka `numactl-devel`)
+        is a recommended dependency when `--legacy-mem` switch is used,
+        and a *required* dependency if default memory mode is used.
+        While DPDK will compile and run without `libnuma`
+        even on NUMA-enabled systems,
+        both usability and performance will be degraded.
+
 *   Python, version 2.7+ or 3.2+, to use various helper scripts included in the DPDK package.
 
 
index bfacbd1..40bf0f1 100644 (file)
@@ -695,3 +695,9 @@ See :numref:`figure_intel_perf_test_setup` for the performance test setup.
    * Start creating a stream on packet generator.
 
    * Set the Ethernet II type to 0x0800.
+
+Tx bytes affected by the link status change
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For firmware versions prior to 6.01 for X710 series and 3.33 for X722 series, the tx_bytes statistics data is affected by
+the link down event. Each time the link status changes to down, the tx_bytes decreases 110 bytes.
index 48f9adf..08fe1d2 100644 (file)
@@ -19,9 +19,9 @@ Config File Options
 
 The following option can be modified in the ``config`` file.
 
-- ``CONFIG_RTE_LIBRTE_IFCVF_VDPA_PMD`` (default ``y`` for linux)
+- ``CONFIG_RTE_LIBRTE_IFC_PMD`` (default ``y`` for linux)
 
-  Toggle compilation of the ``librte_ifcvf_vdpa`` driver.
+  Toggle compilation of the ``librte_pmd_ifc`` driver.
 
 
 IFCVF vDPA Implementation
index 23f0f57..436898a 100644 (file)
@@ -227,20 +227,6 @@ Environment variables
   enabled and most useful when ``CONFIG_RTE_EAL_PMD_PATH`` is also set,
   since ``LD_LIBRARY_PATH`` has no effect in this case.
 
-- ``MLX5_PMD_ENABLE_PADDING``
-
-  Enables HW packet padding in PCI bus transactions.
-
-  When packet size is cache aligned and CRC stripping is enabled, 4 fewer
-  bytes are written to the PCI bus. Enabling padding makes such packets
-  aligned again.
-
-  In cases where PCI bandwidth is the bottleneck, padding can improve
-  performance by 10%.
-
-  This is disabled by default since this can also decrease performance for
-  unaligned packet sizes.
-
 - ``MLX5_SHUT_UP_BF``
 
   Configures HW Tx doorbell register as IO-mapped.
@@ -295,6 +281,19 @@ Run-time configuration
 
   - CPU having 128B cacheline with ConnectX-5 and Bluefield.
 
+- ``rxq_pkt_pad_en`` parameter [int]
+
+  A nonzero value enables padding Rx packet to the size of cacheline on PCI
+  transaction. This feature would waste PCI bandwidth but could improve
+  performance by avoiding partial cacheline write which may cause costly
+  read-modify-copy in memory transaction on some architectures. Disabled by
+  default.
+
+  Supported on:
+
+  - x86_64 with ConnectX-4, ConnectX-4 LX, ConnectX-5, ConnectX-6 and Bluefield.
+  - POWER8 and ARMv8 with ConnectX-4 LX, ConnectX-5, ConnectX-6 and Bluefield.
+
 - ``mprq_en`` parameter [int]
 
   A nonzero value enables configuring Multi-Packet Rx queues. Rx queue is
index 8b5d050..426acfc 100644 (file)
@@ -147,15 +147,6 @@ A default validator callback is provided by EAL, which can be enabled with a
 ``--socket-limit`` command-line option, for a simple way to limit maximum amount
 of memory that can be used by DPDK application.
 
-.. note::
-
-    In multiprocess scenario, all related processes (i.e. primary process, and
-    secondary processes running with the same prefix) must be in the same memory
-    modes. That is, if primary process is run in dynamic memory mode, all of its
-    secondary processes must be run in the same mode. The same is applicable to
-    ``--single-file-segments`` command-line option - both primary and secondary
-    processes must shared this mode.
-
 + Legacy memory mode
 
 This mode is enabled by specifying ``--legacy-mem`` command-line switch to the
index 9c6a4d0..5b3fb91 100644 (file)
@@ -191,3 +191,22 @@ Header fields deciding if packets are neighbors include:
         ignore IPv4 ID fields for the packets whose DF bit is 1.
         Additionally, packets which have different value of DF bit can't
         be merged.
+
+GRO Library Limitations
+-----------------------
+
+- GRO library uses MBUF->l2_len/l3_len/l4_len/outer_l2_len/
+  outer_l3_len/packet_type to get protocol headers for the
+  input packet, rather than parsing the packet header. Therefore,
+  before call GRO APIs to merge packets, user applications
+  must set MBUF->l2_len/l3_len/l4_len/outer_l2_len/outer_l3_len/
+  packet_type to the same values as the protocol headers of the
+  packet.
+
+- GRO library doesn't support to process the packets with IPv4
+  Options or VLAN tagged.
+
+- GRO library just supports to process the packet organized
+  in a single MBUF. If the input packet consists of multiple
+  MBUFs (i.e. chained MBUFs), GRO reassembly behaviors are
+  unknown.
index 68b7e8b..0a3755a 100644 (file)
@@ -184,7 +184,7 @@ API Overview for Empty Poll Power Management
 
 * **Update Valid Poll Counter**: update the valid poll counter.
 
-* **Set the Fequence Index**: update the power state/frequency mapping.
+* **Set the Frequency Index**: update the power state/frequency mapping.
 
 * **Detect empty poll state change**: empty poll state change detection algorithm then take action.
 
@@ -195,6 +195,8 @@ The mechanism can applied to any device which is based on polling. e.g. NIC, FPG
 References
 ----------
 
-*   l3fwd-power: The sample application in DPDK that performs L3 forwarding with power management.
+*   The :doc:`../sample_app_ug/l3_forward_power_man`
+    chapter in the :doc:`../sample_app_ug/index` section.
 
-*   The "L3 Forwarding with Power Management Sample Application" chapter in the *DPDK Sample Application's User Guide*.
+*   The :doc:`../sample_app_ug/vm_power_management`
+    chapter in the :doc:`../sample_app_ug/index` section.
index c186375..dbf4999 100644 (file)
@@ -2314,7 +2314,10 @@ in pattern, Some PMDs will reject rule because behaviour will be undefined.
 Action: ``SET_MAC_SRC``
 ^^^^^^^^^^^^^^^^^^^^^^^
 
-Set source MAC address
+Set source MAC address.
+
+It must be used with a valid RTE_FLOW_ITEM_TYPE_ETH flow pattern item.
+Otherwise, RTE_FLOW_ERROR_TYPE_ACTION error will be returned.
 
 .. _table_rte_flow_action_set_mac_src:
 
@@ -2329,7 +2332,10 @@ Set source MAC address
 Action: ``SET_MAC_DST``
 ^^^^^^^^^^^^^^^^^^^^^^^
 
-Set source MAC address
+Set destination MAC address.
+
+It must be used with a valid RTE_FLOW_ITEM_TYPE_ETH flow pattern item.
+Otherwise, RTE_FLOW_ERROR_TYPE_ACTION error will be returned.
 
 .. _table_rte_flow_action_set_mac_dst:
 
index a1face9..358dfa3 100644 (file)
@@ -826,3 +826,38 @@ Kernel crash when hot-unplug igb_uio device while DPDK application is running
 
 **Driver/Module**:
    ``igb_uio`` module.
+
+
+AVX-512 support disabled
+------------------------
+
+**Description**:
+   ``AVX-512`` support has been disabled on some conditions.
+   This shouldn't be confused with ``CONFIG_RTE_ENABLE_AVX512`` config option which is already
+   disabled by default. This config option defines if ``AVX-512`` specific implementations of
+   some file to be used or not. What has been disabled is compiler feature to produce ``AVX-512``
+   instructions from any source code.
+
+   On DPDK v18.11 ``AVX-512`` is disabled for all ``GCC`` builds which reported to cause a performance
+   drop.
+
+   On DPDK v19.02 ``AVX-512`` disable scope is reduced to ``GCC`` and ``binutils version 2.30`` based
+   on information accured from the GCC community defect.
+
+**Reason**:
+   Generated ``AVX-512`` code cause crash:
+   https://bugs.dpdk.org/show_bug.cgi?id=97
+   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88096
+
+**Resolution/Workaround**:
+   * Update ``binutils`` to newer version than ``2.30``.
+
+   OR
+
+   * Use different compiler, like ``clang`` for this case.
+
+**Affected Environment/Platform**:
+    ``GCC`` and ``binutils version 2.30``.
+
+**Driver/Module**:
+    ALL.
index 65bab55..2f386fc 100644 (file)
@@ -861,3 +861,319 @@ Tested Platforms
 
      * Ubuntu 18.04.1 LTS with NXP QorIQ LSDK 1809 support packages
      * Ubuntu 16.04.3 LTS with NXP QorIQ LSDK 1803 support packages
+
+Fixes and Validation in 18.11 Stable Release
+--------------------------------------------
+
+18.11.1 Fixes
+~~~~~~~~~~~~~
+
+* app/bbdev: fix return value check
+* app/eventdev: detect deadlock for timer event producer
+* app/pdump: fix vdev cleanup
+* app/testpmd: expand RED queue thresholds to 64 bits
+* app/testpmd: fix MPLS BoS bit default value
+* app/testpmd: fix MPLSoGRE encapsulation
+* app/testpmd: fix MPLSoUDP encapsulation
+* app/testpmd: fix quit to stop all ports before close
+* app/testpmd: fix Tx metadata show command
+* bb/turbo_sw: fix dynamic linking
+* build: fix meson check for binutils 2.30
+* build: fix variable name in dependency error message
+* build: mention -march in pkg-config description
+* build: use static deps for pkg-config libs.private
+* bus/dpaa: do nothing if bus not present
+* bus/dpaa: fix logical to physical core affine logic
+* bus/fslmc: fix parse method for bus devices
+* bus/fslmc: fix ring mode to use correct cache settings
+* bus/fslmc: fix to convert error msg to warning
+* bus/fslmc: fix to reset portal memory before use
+* bus/fslmc: fix to use correct physical core for logical core
+* bus/ifpga: fix AFU probe failure handler
+* bus/ifpga: fix build for cpp applications
+* bus/ifpga: fix forcing optional devargs
+* bus/vmbus: fix race in subchannel creation
+* common/qat: remove check of valid firmware response
+* compressdev: fix structure comment
+* compress/qat: fix dequeue error counter
+* compress/qat: fix returned status on overflow
+* compress/qat: fix return on building request error
+* config: enable C11 memory model for armv8 with meson
+* crypto/dpaa2_sec: fix FLC address for physical mode
+* crypto/qat: fix block size error handling
+* crypto/qat: fix digest in wireless auth case
+* crypto/qat: fix message for CCM when setting unused counter
+* crypto/qat: fix message for NULL algo setting unused counter
+* devtools: fix build check for whether meson has run
+* devtools: fix return of forbidden addition checks
+* devtools: fix symbol check when adding experimental section
+* devtools: fix wrong headline lowercase for arm
+* doc: add dependency for PDF in contributing guide
+* doc: add GCM AAD limitation in qat guide
+* doc: add GRO limitations in programmers guide
+* doc: add missing loopback option in testpmd guide
+* doc: clarify libnuma requirement for NUMA systems
+* doc: fix AESNI_MB guide
+* doc: fix a parameter name in testpmd guide
+* doc: fix a typo in power management guide
+* doc: fix a typo in testpmd guide
+* doc: fix a typo in testpmd guide
+* doc: fix flow action command names in testpmd guide
+* doc: fix garbage text in generated HTML guides
+* doc: fix ifc naming
+* doc: fix MAC address rewrite actions in prog guide
+* doc: fix references in power management guide
+* doc: remove note on memory mode limitation in multi-process
+* drivers/crypto: fix PMDs memory leak
+* drivers: fix sprintf with snprintf
+* drivers/net: fix several Tx prepare functions
+* eal/bsd: remove clean up of files at startup
+* eal: check string parameter lengths
+* eal: clean up unused files on initialization
+* eal: close multi-process socket during cleanup
+* eal: fix build of external app with clang on armv8
+* eal: fix clang build with intrinsics forced
+* eal: fix core number validation
+* eal: fix detection of duplicate option register
+* eal: fix leak on multi-process request error
+* eal: fix log level of error in option register
+* eal: fix missing newline in a log
+* eal: fix out of bound access when no CPU available
+* eal: fix strdup usages in internal config
+* eal/linux: fix parsing zero socket memory and limits
+* efd: fix tail queue leak
+* ethdev: declare Tx prepare API as not experimental
+* ethdev: fix errno to have positive value
+* ethdev: fix typo in queue setup error log
+* eventdev: fix error log in eth Rx adapter
+* eventdev: fix eth Tx adapter queue count checks
+* eventdev: fix xstats documentation typo
+* eventdev: remove redundant timer adapter function prototypes
+* examples/bond: fix crash when there is no active slave
+* examples/bond: fix initialization order
+* examples/flow_filtering: fix example documentation
+* examples/ipsec-secgw: fix crypto-op might never get dequeued
+* examples/ipsec-secgw: fix inbound SA checking
+* examples/ipsec-secgw: fix outbound codepath for single SA
+* examples/ipsec-secgw: make local variables static
+* examples/kni: fix crash while handling userspace request
+* examples/tep_term: remove unused constant
+* examples/vhost_crypto: fix bracket
+* examples/vhost: fix a typo
+* examples/vhost: fix path allocation failure handling
+* gro: check invalid TCP header length
+* gro: fix overflow of payload length calculation
+* gso: fix VxLAN/GRE tunnel checks
+* hash: fix out-of-bound write while freeing key slot
+* hash: fix return of bulk lookup
+* ip_frag: fix IPv6 when MTU sizes not aligned to 8 bytes
+* kni: fix build for dev_open in Linux 5.0
+* kni: fix build for igb_ndo_bridge_setlink in Linux 5.0
+* kni: fix build on RHEL 8
+* kni: fix build on RHEL8 for arm and Power9
+* malloc: fix deadlock when reading stats
+* malloc: fix duplicate mem event notification
+* malloc: fix finding maximum contiguous IOVA size
+* malloc: make alignment requirements more stringent
+* malloc: notify primary process about hotplug in secondary
+* mem: check for memfd support in segment fd API
+* mem: fix segment fd API error code for external segment
+* mem: fix storing old policy
+* mem: fix variable shadowing
+* memzone: fix unlock on initialization failure
+* mk: do not install meson.build in usertools
+* mk: fix scope of disabling AVX512F support
+* net/af_packet: fix setting MTU decrements sockaddr twice
+* net/avf/base: fix comment referencing internal data
+* net/bnx2x: cleanup info logs
+* net/bnx2x: fix segfaults due to stale interrupt status
+* net/bonding: fix possible null pointer reference
+* net/bonding: fix reset active slave
+* net/cxgbe: fix control queue mbuf pool naming convention
+* net/cxgbe: fix overlapping regions in TID table
+* net/cxgbe: skip parsing match items with no spec
+* net/dpaa2: fix bad check for not-null
+* net/dpaa2: fix device init for secondary process
+* net/dpaa: fix secondary process
+* net/ena: add reset reason in Rx error
+* net/ena: add supported RSS offloads types
+* net/ena: destroy queues if start failed
+* net/ena: do not reconfigure queues on reset
+* net/ena: fix cleanup for out of order packets
+* net/ena: fix dev init with multi-process
+* net/ena: fix errno to positive value
+* net/ena: fix invalid reference to variable in union
+* net/ena: skip packet with wrong request id
+* net/ena: update completion queue after cleanup
+* net/enic: remove useless include
+* net: fix underflow for checksum of invalid IPv4 packets
+* net/fm10k: fix internal switch initial status
+* net/i40e: clear VF reset flags after reset
+* net/i40e: fix config name in comment
+* net/i40e: fix get RSS conf
+* net/i40e: fix getting RSS configuration
+* net/i40e: fix overwriting RSS RETA
+* net/i40e: fix port close
+* net/i40e: fix queue region DCB configure
+* net/i40e: fix statistics
+* net/i40e: fix statistics inconsistency
+* net/i40e: fix using recovery mode firmware
+* net/i40e: fix VF overwrite PF RSS LUT for X722
+* net/i40e: perform basic validation on VF messages
+* net/i40e: remove redundant reset of queue number
+* net/i40e: revert fix offload not supported mask
+* net/ifc: store only registered device instance
+* net/ifcvf: fix typo on struct name
+* net/igb: fix LSC interrupt when using MSI-X
+* net/ixgbe/base: add LHA ID
+* net/ixgbe: fix crash on remove
+* net/ixgbe: fix over using multicast table for VF
+* net/ixgbe: fix overwriting RSS RETA
+* net/ixgbe: fix Rx LRO capability offload for x550
+* net/mlx5: fix build for armv8
+* net/mlx5: fix deprecated library API for Rx padding
+* net/mlx5: fix function documentation
+* net/mlx5: fix instruction hotspot on replenishing Rx buffer
+* net/mlx5: fix Multi-Packet RQ mempool free
+* net/mlx5: fix Rx packet padding
+* net/mlx5: fix shared counter allocation logic
+* net/mlx5: fix TC rule handle assignment
+* net/mlx5: fix typos and code style
+* net/mlx5: fix validation of Rx queue number
+* net/mlx5: fix VXLAN port registration race condition
+* net/mlx5: fix VXLAN without decap action for E-Switch
+* net/mlx5: remove checks for outer tunnel items on E-Switch
+* net/mlx5: support ethernet type for tunnels on E-Switch
+* net/mlx5: support tunnel inner items on E-Switch
+* net/mlx5: validate ethernet type on E-Switch
+* net/mlx5: validate tunnel inner items on E-Switch
+* net/netvsc: disable multi-queue on older servers
+* net/netvsc: enable SR-IOV
+* net/netvsc: fix probe when VF not found
+* net/netvsc: fix transmit descriptor pool cleanup
+* net/qede: fix performance bottleneck in Rx path
+* net/qede: remove prefetch in Tx path
+* net/sfc: add missing header guard to TSO header file
+* net/sfc/base: fix Tx descriptor max number check
+* net/sfc: discard last seen VLAN TCI if Tx packet is dropped
+* net/sfc: fix crash in EF10 TSO if no payload
+* net/sfc: fix datapath name references in logs
+* net/sfc: fix port ID log
+* net/sfc: fix Rx packets counter
+* net/sfc: fix typo in preprocessor check
+* net/sfc: fix VF error/missed stats mapping
+* net/sfc: pass HW Tx queue index on creation
+* net/tap: add buffer overflow checks before checksum
+* net/tap: allow full length names
+* net/tap: fix possible uninitialized variable access
+* net/tap: let kernel choose tun device name
+* net/vhost: fix double free of MAC address
+* net/virtio: add barrier before reading the flags
+* net/virtio-user: fix used ring in cvq handling
+* raw/ifpga: fix memory leak
+* Revert "net/mlx5: fix instruction hotspot on replenishing Rx buffer"
+* sched: fix memory leak on init failure
+* telemetry: fix using ports of different types
+* test: check zero socket memory as valid
+* test/crypto: fix misleading trace message
+* test/fbarray: add to meson
+* test/hash: fix perf result
+* test/mem: add external mem autotest to meson
+* test/metrics: fix a negative case
+* timer: fix race condition
+* version: 18.11.1-rc1
+* version: 18.11.1-rc2
+* vfio: allow secondary process to query IOMMU type
+* vfio: do not unregister callback in secondary process
+* vfio: fix error message
+* vhost/crypto: fix possible dead loop
+* vhost/crypto: fix possible out of bound access
+* vhost: enforce avail index and desc read ordering
+* vhost: enforce desc flags and content read ordering
+* vhost: ensure event idx is mapped when negotiated
+* vhost: fix access for indirect descriptors
+* vhost: fix crash after mmap failure
+* vhost: fix deadlock in driver unregister
+* vhost: fix double read of descriptor flags
+* vhost: fix memory leak on realloc failure
+* vhost: fix possible dead loop in vector filling
+* vhost: fix possible out of bound access in vector filling
+* vhost: fix race condition when adding fd in the fdset
+
+18.11.1 Validation
+~~~~~~~~~~~~~~~~~~
+
+* Intel(R) Testing
+
+   * 18.11.1 LTS release passed the basic Intel(R) NIC(ixgbe and i40e) testing
+
+   * cryptodev
+   * virtio and Intel NIC/virtio performance
+   * vlan
+   * vxlan
+   * Jumbo frames
+   * Generic filter
+   * Flow director
+   * PF and VF
+
+* Mellanox(R) Testing
+
+   * Basic functionality
+
+      * Send and receive multiple types of traffic
+      * testpmd xstats counter test
+      * testpmd timestamp test
+
+   * Changing/checking link status through testpmd
+
+      * RTE flow and flow_director tests
+      * Some RSS tests
+      * VLAN stripping and insertion tests
+      * Checksum and TSO tests
+      * ptype tests
+      * Port interrupt testing
+      * Multi-process testing
+
+   * Drivers tested
+
+      * MLNX_OFED_LINUX-4.5-1.0.1.0
+      * MLNX_OFED_LINUX-4.4-2.0.1.0
+      * rdma-core upstream commit 0ea43f6
+
+   * Tested NICs
+
+      * ConnectX-4 Lx (fw 14.24.1000)
+      * ConnectX-5 (fw 16.24.1000)
+
+   * OSes tested
+
+      * RHEL7.4 (kernel 5.0.0)
+      * RHEL7.4 (kernel 3.10.0-693.el7.x86_64)
+
+
+* OVS Testing Intel(R)
+
+   * OVS testing against head OVS Master and OVS 2.11.0 with VSPERF
+   * Tested with i40e (X710), ixgbe (82599ES) and igb(I350) devices
+
+      * PVP
+      * P2P
+      * Multiqueue
+      * Vhostuserclient reconnect
+      * Vhost cross-NUMA awareness
+      * Jumbo frames
+      * Rate limiting
+      * QoS policer
+
+* Microsoft(R) Azure Testing
+
+   * SRIOV/Failsafe
+   * DPDK-OVS
+
+* Red Hat(R) Virtualization Testing
+
+   * PF
+   * VF
+   * vhost single/multi queues and cross-NUMA
+   * vhostclient reconnect
+   * vhost live migration with single/multi queues and cross-NUMA
index 840d557..9dba85a 100644 (file)
@@ -53,7 +53,7 @@ applications and the Environment Abstraction Layer (EAL) options.
 Explanation
 -----------
 
-The example is build from 2 main files,
+The example is built from 2 files,
 ``main.c`` which holds the example logic and ``flow_blocks.c`` that holds the
 implementation for building the flow rule.
 
@@ -380,13 +380,9 @@ This function is located in the ``flow_blocks.c`` file.
    {
            struct rte_flow_attr attr;
            struct rte_flow_item pattern[MAX_PATTERN_NUM];
-           struct rte_flow_action action[MAX_PATTERN_NUM];
+           struct rte_flow_action action[MAX_ACTION_NUM];
            struct rte_flow *flow = NULL;
            struct rte_flow_action_queue queue = { .index = rx_q };
-           struct rte_flow_item_eth eth_spec;
-           struct rte_flow_item_eth eth_mask;
-           struct rte_flow_item_vlan vlan_spec;
-           struct rte_flow_item_vlan vlan_mask;
            struct rte_flow_item_ipv4 ip_spec;
            struct rte_flow_item_ipv4 ip_mask;
 
@@ -404,37 +400,19 @@ This function is located in the ``flow_blocks.c`` file.
             * create the action sequence.
             * one action only,  move packet to queue
             */
-
            action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE;
            action[0].conf = &queue;
            action[1].type = RTE_FLOW_ACTION_TYPE_END;
 
            /*
-            * set the first level of the pattern (eth).
+            * set the first level of the pattern (ETH).
             * since in this example we just want to get the
             * ipv4 we set this level to allow all.
             */
-           memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
-           memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
-           eth_spec.type = 0;
-           eth_mask.type = 0;
            pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
-           pattern[0].spec = &eth_spec;
-           pattern[0].mask = &eth_mask;
-
-           /*
-            * setting the second level of the pattern (vlan).
-            * since in this example we just want to get the
-            * ipv4 we also set this level to allow all.
-            */
-           memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
-           memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
-           pattern[1].type = RTE_FLOW_ITEM_TYPE_VLAN;
-           pattern[1].spec = &vlan_spec;
-           pattern[1].mask = &vlan_mask;
 
            /*
-            * setting the third level of the pattern (ip).
+            * setting the second level of the pattern (IP).
             * in this example this is the level we care about
             * so we set it according to the parameters.
             */
@@ -444,12 +422,12 @@ This function is located in the ``flow_blocks.c`` file.
            ip_mask.hdr.dst_addr = dest_mask;
            ip_spec.hdr.src_addr = htonl(src_ip);
            ip_mask.hdr.src_addr = src_mask;
-           pattern[2].type = RTE_FLOW_ITEM_TYPE_IPV4;
-           pattern[2].spec = &ip_spec;
-           pattern[2].mask = &ip_mask;
+           pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4;
+           pattern[1].spec = &ip_spec;
+           pattern[1].mask = &ip_mask;
 
            /* the final level must be always type end */
-           pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
+           pattern[2].type = RTE_FLOW_ITEM_TYPE_END;
 
            int res = rte_flow_validate(port_id, &attr, pattern, action, error);
            if(!res)
@@ -464,14 +442,10 @@ The first part of the function is declaring the structures that will be used.
 
    struct rte_flow_attr attr;
    struct rte_flow_item pattern[MAX_PATTERN_NUM];
-   struct rte_flow_action action[MAX_PATTERN_NUM];
+   struct rte_flow_action action[MAX_ACTION_NUM];
    struct rte_flow *flow;
    struct rte_flow_error error;
    struct rte_flow_action_queue queue = { .index = rx_q };
-   struct rte_flow_item_eth eth_spec;
-   struct rte_flow_item_eth eth_mask;
-   struct rte_flow_item_vlan vlan_spec;
-   struct rte_flow_item_vlan vlan_mask;
    struct rte_flow_item_ipv4 ip_spec;
    struct rte_flow_item_ipv4 ip_mask;
 
@@ -491,33 +465,17 @@ the rule. In this case send the packet to queue.
    action[0].conf = &queue;
    action[1].type = RTE_FLOW_ACTION_TYPE_END;
 
-The forth part is responsible for creating the pattern and is build from
-number of step. In each step we build one level of the pattern starting with
+The fourth part is responsible for creating the pattern and is built from
+number of steps. In each step we build one level of the pattern starting with
 the lowest one.
 
 Setting the first level of the pattern ETH:
 
 .. code-block:: c
 
-   memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
-   memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
-   eth_spec.type = 0;
-   eth_mask.type = 0;
    pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
-   pattern[0].spec = &eth_spec;
-   pattern[0].mask = &eth_mask;
-
-Setting the second level of the pattern VLAN:
-
-.. code-block:: c
-
-   memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
-   memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
-   pattern[1].type = RTE_FLOW_ITEM_TYPE_VLAN;
-   pattern[1].spec = &vlan_spec;
-   pattern[1].mask = &vlan_mask;
 
-Setting the third level ip:
+Setting the second level of the pattern IP:
 
 .. code-block:: c
 
@@ -527,15 +485,15 @@ Setting the third level ip:
    ip_mask.hdr.dst_addr = dest_mask;
    ip_spec.hdr.src_addr = htonl(src_ip);
    ip_mask.hdr.src_addr = src_mask;
-   pattern[2].type = RTE_FLOW_ITEM_TYPE_IPV4;
-   pattern[2].spec = &ip_spec;
-   pattern[2].mask = &ip_mask;
+   pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4;
+   pattern[1].spec = &ip_spec;
+   pattern[1].mask = &ip_mask;
 
 Closing the pattern part.
 
 .. code-block:: c
 
-   pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
+   pattern[2].type = RTE_FLOW_ITEM_TYPE_END;
 
 The last part of the function is to validate the rule and create it.
 
index f717bd3..4495ed0 100644 (file)
@@ -196,7 +196,7 @@ The commandline options are:
 
 *   ``--port-topology=mode``
 
-    Set port topology, where mode is ``paired`` (the default) or ``chained``.
+    Set port topology, where mode is ``paired`` (the default), ``chained`` or ``loop``.
 
     In ``paired`` mode, the forwarding is between pairs of ports, for example: (0,1), (2,3), (4,5).
 
@@ -204,6 +204,8 @@ The commandline options are:
 
     The ordering of the ports can be changed using the portlist testpmd runtime function.
 
+    In ``loop`` mode, ingress traffic is simply transmitted back on the same interface.
+
 *   ``--forward-mode=mode``
 
     Set the forwarding mode where ``mode`` is one of the following::
index 056f8bb..854af2d 100644 (file)
@@ -304,7 +304,7 @@ The available information categories are:
 * ``mac``: Changes the source and the destination Ethernet addresses of packets before forwarding them.
   Default application behaviour is to set source Ethernet address to that of the transmitting interface, and destination
   address to a dummy value (set during init). The user may specify a target destination Ethernet address via the 'eth-peer' or
-  'eth-peer-configfile' command-line options. It is not currently possible to specify a specific source Ethernet address.
+  'eth-peers-configfile' command-line options. It is not currently possible to specify a specific source Ethernet address.
 
 * ``macswap``: MAC swap forwarding mode.
   Swaps the source and the destination Ethernet addresses of packets before forwarding them.
@@ -318,7 +318,7 @@ The available information categories are:
 
 * ``csum``: Changes the checksum field with hardware or software methods depending on the offload flags on the packet.
 
-* ``icmpecho``: Receives a burst of packets, lookup for IMCP echo requests and, if any, send back ICMP echo replies.
+* ``icmpecho``: Receives a burst of packets, lookup for ICMP echo requests and, if any, send back ICMP echo replies.
 
 * ``ieee1588``: Demonstrate L2 IEEE1588 V2 PTP timestamping for RX and TX. Requires ``CONFIG_RTE_LIBRTE_IEEE1588=y``.
 
@@ -1149,7 +1149,7 @@ set eth-peer
 
 Set the forwarding peer address for certain port::
 
-   testpmd> set eth-peer (port_id) (perr_addr)
+   testpmd> set eth-peer (port_id) (peer_addr)
 
 This is equivalent to the ``--eth-peer`` command-line option.
 
@@ -3931,12 +3931,12 @@ This section lists supported actions and their attributes, if any.
 
   - ``ipv6_addr``: New IPv6 destination address.
 
-- ``of_set_tp_src``: Set a new source port number in the outermost TCP/UDP
+- ``set_tp_src``: Set a new source port number in the outermost TCP/UDP
   header.
 
   - ``port``: New TCP/UDP source port number.
 
-- ``of_set_tp_dst``: Set a new destination port number in the outermost TCP/UDP
+- ``set_tp_dst``: Set a new destination port number in the outermost TCP/UDP
   header.
 
   - ``port``: New TCP/UDP destination port number.
index 79eb554..d364677 100644 (file)
@@ -27,8 +27,8 @@ CFLAGS += -I$(FLEXRAN_SDK)/lib_turbo
 CFLAGS += -I$(FLEXRAN_SDK)/lib_crc
 CFLAGS += -I$(FLEXRAN_SDK)/lib_rate_matching
 
-LDLIBS += -L$(FLEXRAN_SDK)/lib_crc -lcrc
 LDLIBS += -L$(FLEXRAN_SDK)/lib_turbo -lturbo
+LDLIBS += -L$(FLEXRAN_SDK)/lib_crc -lcrc
 LDLIBS += -L$(FLEXRAN_SDK)/lib_rate_matching -lrate_matching
 LDLIBS += -L$(FLEXRAN_SDK)/lib_common -lcommon
 LDLIBS += -lstdc++ -lirc -limf -lipps
index 203f60d..c7da96f 100644 (file)
@@ -250,52 +250,53 @@ dpaa_clean_device_list(void)
 
 int rte_dpaa_portal_init(void *arg)
 {
-       cpu_set_t cpuset;
        pthread_t id;
-       uint32_t cpu = rte_lcore_id();
+       unsigned int cpu, lcore = rte_lcore_id();
        int ret;
        struct dpaa_portal *dpaa_io_portal;
 
        BUS_INIT_FUNC_TRACE();
 
-       if ((size_t)arg == 1 || cpu == LCORE_ID_ANY)
-               cpu = rte_get_master_lcore();
-       /* if the core id is not supported */
+       if ((size_t)arg == 1 || lcore == LCORE_ID_ANY)
+               lcore = rte_get_master_lcore();
        else
-               if (cpu >= RTE_MAX_LCORE)
+               if (lcore >= RTE_MAX_LCORE)
                        return -1;
 
-       /* Set CPU affinity for this thread */
-       CPU_ZERO(&cpuset);
-       CPU_SET(cpu, &cpuset);
+       cpu = lcore_config[lcore].core_id;
+
+       /* Set CPU affinity for this thread.*/
        id = pthread_self();
-       ret = pthread_setaffinity_np(id, sizeof(cpu_set_t), &cpuset);
+       ret = pthread_setaffinity_np(id, sizeof(cpu_set_t),
+                       &lcore_config[lcore].cpuset);
        if (ret) {
-               DPAA_BUS_LOG(ERR, "pthread_setaffinity_np failed on "
-                       "core :%d with ret: %d", cpu, ret);
+               DPAA_BUS_LOG(ERR, "pthread_setaffinity_np failed on core :%u"
+                            " (lcore=%u) with ret: %d", cpu, lcore, ret);
                return ret;
        }
 
        /* Initialise bman thread portals */
        ret = bman_thread_init();
        if (ret) {
-               DPAA_BUS_LOG(ERR, "bman_thread_init failed on "
-                       "core %d with ret: %d", cpu, ret);
+               DPAA_BUS_LOG(ERR, "bman_thread_init failed on core %u"
+                            " (lcore=%u) with ret: %d", cpu, lcore, ret);
                return ret;
        }
 
-       DPAA_BUS_LOG(DEBUG, "BMAN thread initialized");
+       DPAA_BUS_LOG(DEBUG, "BMAN thread initialized - CPU=%d lcore=%d",
+                    cpu, lcore);
 
        /* Initialise qman thread portals */
        ret = qman_thread_init();
        if (ret) {
-               DPAA_BUS_LOG(ERR, "bman_thread_init failed on "
-                       "core %d with ret: %d", cpu, ret);
+               DPAA_BUS_LOG(ERR, "qman_thread_init failed on core %u"
+                           " (lcore=%u) with ret: %d", cpu, lcore, ret);
                bman_thread_finish();
                return ret;
        }
 
-       DPAA_BUS_LOG(DEBUG, "QMAN thread initialized");
+       DPAA_BUS_LOG(DEBUG, "QMAN thread initialized - CPU=%d lcore=%d",
+                    cpu, lcore);
 
        dpaa_io_portal = rte_malloc(NULL, sizeof(struct dpaa_portal),
                                    RTE_CACHE_LINE_SIZE);
@@ -312,8 +313,8 @@ int rte_dpaa_portal_init(void *arg)
 
        ret = pthread_setspecific(dpaa_portal_key, (void *)dpaa_io_portal);
        if (ret) {
-               DPAA_BUS_LOG(ERR, "pthread_setspecific failed on "
-                           "core %d with ret: %d", cpu, ret);
+               DPAA_BUS_LOG(ERR, "pthread_setspecific failed on core %u"
+                            " (lcore=%u) with ret: %d", cpu, lcore, ret);
                dpaa_portal_finish(NULL);
 
                return ret;
@@ -542,6 +543,10 @@ rte_dpaa_bus_probe(void)
        unsigned int svr_ver;
        int probe_all = rte_dpaa_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST;
 
+       /* If DPAA bus is not present nothing needs to be done */
+       if (TAILQ_EMPTY(&rte_dpaa_bus.device_list))
+               return 0;
+
        svr_file = fopen(DPAA_SOC_ID_FILE, "r");
        if (svr_file) {
                if (fscanf(svr_file, "svr:%x", &svr_ver) > 0)
@@ -586,8 +591,7 @@ rte_dpaa_bus_probe(void)
        /* Register DPAA mempool ops only if any DPAA device has
         * been detected.
         */
-       if (!TAILQ_EMPTY(&rte_dpaa_bus.device_list))
-               rte_mbuf_set_platform_mempool_ops(DPAA_MEMPOOL_OPS_NAME);
+       rte_mbuf_set_platform_mempool_ops(DPAA_MEMPOOL_OPS_NAME);
 
        return 0;
 }
index 89af938..2dcdca0 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
- *   Copyright 2016 NXP
+ *   Copyright 2016,2018 NXP
  *
  */
 
@@ -227,20 +227,16 @@ static int
 rte_fslmc_parse(const char *name, void *addr)
 {
        uint16_t dev_id;
-       char *t_ptr;
-       char *sep = strchr(name, ':');
+       char *t_ptr = NULL, *dname = NULL;
 
-       if (strncmp(name, RTE_STR(FSLMC_BUS_NAME),
-               strlen(RTE_STR(FSLMC_BUS_NAME)))) {
-               return -EINVAL;
-       }
+       /* 'name' is expected to contain name of device, for example, dpio.1,
+        * dpni.2, etc.
+        */
 
-       if (!sep) {
-               DPAA2_BUS_ERR("Incorrect device name observed");
+       dname = strdup(name);
+       if (!dname)
                return -EINVAL;
-       }
-
-       t_ptr = (char *)(sep + 1);
+       t_ptr = dname;
 
        if (strncmp("dpni", t_ptr, 4) &&
            strncmp("dpseci", t_ptr, 6) &&
@@ -251,24 +247,29 @@ rte_fslmc_parse(const char *name, void *addr)
            strncmp("dpmcp", t_ptr, 5) &&
            strncmp("dpdmai", t_ptr, 6)) {
                DPAA2_BUS_ERR("Unknown or unsupported device");
-               return -EINVAL;
+               goto err_out;
        }
 
        t_ptr = strchr(name, '.');
        if (!t_ptr) {
                DPAA2_BUS_ERR("Incorrect device string observed (%s)", t_ptr);
-               return -EINVAL;
+               goto err_out;
        }
 
        t_ptr = (char *)(t_ptr + 1);
        if (sscanf(t_ptr, "%hu", &dev_id) <= 0) {
                DPAA2_BUS_ERR("Incorrect device string observed (%s)", t_ptr);
-               return -EINVAL;
+               goto err_out;
        }
+       free(dname);
 
        if (addr)
-               strcpy(addr, (char *)(sep + 1));
+               strcpy(addr, name);
+
        return 0;
+err_out:
+       free(dname);
+       return -EINVAL;
 }
 
 static int
@@ -293,8 +294,8 @@ rte_fslmc_scan(void)
                goto scan_fail;
 
        /* Scan devices on the group */
-       sprintf(fslmc_dirpath, "%s/%d/devices", VFIO_IOMMU_GROUP_PATH,
-               groupid);
+       snprintf(fslmc_dirpath, sizeof(fslmc_dirpath), "%s/%d/devices",
+                       VFIO_IOMMU_GROUP_PATH, groupid);
        dir = opendir(fslmc_dirpath);
        if (!dir) {
                DPAA2_BUS_ERR("Unable to open VFIO group directory");
index 493b6e5..ce82a99 100644 (file)
@@ -176,7 +176,7 @@ static int vfio_map_irq_region(struct fslmc_vfio_group *group)
        vaddr = (unsigned long *)mmap(NULL, 0x1000, PROT_WRITE |
                PROT_READ, MAP_SHARED, container_device_fd, 0x6030000);
        if (vaddr == MAP_FAILED) {
-               DPAA2_BUS_ERR("Unable to map region (errno = %d)", errno);
+               DPAA2_BUS_INFO("Unable to map region (errno = %d)", errno);
                return -errno;
        }
 
index ce06998..ba2e28c 100644 (file)
@@ -53,6 +53,10 @@ static uint32_t io_space_count;
 /* Variable to store DPAA2 platform type */
 uint32_t dpaa2_svr_family;
 
+/* Physical core id for lcores running on dpaa2. */
+/* DPAA2 only support 1 lcore to 1 phy cpu mapping */
+static unsigned int dpaa2_cpu[RTE_MAX_LCORE];
+
 /* Variable to store DPAA2 DQRR size */
 uint8_t dpaa2_dqrr_size;
 /* Variable to store DPAA2 EQCR size */
@@ -92,7 +96,8 @@ dpaa2_core_cluster_sdest(int cpu_id)
 }
 
 #ifdef RTE_LIBRTE_PMD_DPAA2_EVENTDEV
-static void dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id)
+static void
+dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, int lcoreid)
 {
 #define STRING_LEN     28
 #define COMMAND_LEN    50
@@ -125,7 +130,7 @@ static void dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id)
                return;
        }
 
-       cpu_mask = cpu_mask << rte_lcore_id();
+       cpu_mask = cpu_mask << dpaa2_cpu[lcoreid];
        snprintf(command, COMMAND_LEN, "echo %X > /proc/irq/%s/smp_affinity",
                 cpu_mask, token);
        ret = system(command);
@@ -139,7 +144,7 @@ static void dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id)
        fclose(file);
 }
 
-static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev)
+static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, int lcoreid)
 {
        struct epoll_event epoll_ev;
        int eventfd, dpio_epoll_fd, ret;
@@ -176,32 +181,36 @@ static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev)
        }
        dpio_dev->epoll_fd = dpio_epoll_fd;
 
-       dpaa2_affine_dpio_intr_to_respective_core(dpio_dev->hw_id);
+       dpaa2_affine_dpio_intr_to_respective_core(dpio_dev->hw_id, lcoreid);
 
        return 0;
 }
 #endif
 
 static int
-dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
+dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int lcoreid)
 {
        int sdest, ret;
+       int cpu_id;
 
        /* Set the Stashing Destination */
-       if (cpu_id < 0) {
-               cpu_id = rte_get_master_lcore();
-               if (cpu_id < 0) {
+       if (lcoreid < 0) {
+               lcoreid = rte_get_master_lcore();
+               if (lcoreid < 0) {
                        DPAA2_BUS_ERR("Getting CPU Index failed");
                        return -1;
                }
        }
+
+       cpu_id = dpaa2_cpu[lcoreid];
+
        /* Set the STASH Destination depending on Current CPU ID.
         * Valid values of SDEST are 4,5,6,7. Where,
         */
 
        sdest = dpaa2_core_cluster_sdest(cpu_id);
-       DPAA2_BUS_DEBUG("Portal= %d  CPU= %u SDEST= %d",
-                       dpio_dev->index, cpu_id, sdest);
+       DPAA2_BUS_DEBUG("Portal= %d  CPU= %u lcore id =%u SDEST= %d",
+                       dpio_dev->index, cpu_id, lcoreid, sdest);
 
        ret = dpio_set_stashing_destination(dpio_dev->dpio, CMD_PRI_LOW,
                                            dpio_dev->token, sdest);
@@ -211,7 +220,7 @@ dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
        }
 
 #ifdef RTE_LIBRTE_PMD_DPAA2_EVENTDEV
-       if (dpaa2_dpio_intr_init(dpio_dev)) {
+       if (dpaa2_dpio_intr_init(dpio_dev, lcoreid)) {
                DPAA2_BUS_ERR("Interrupt registration failed for dpio");
                return -1;
        }
@@ -220,7 +229,7 @@ dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
        return 0;
 }
 
-struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(int cpu_id)
+struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(int lcoreid)
 {
        struct dpaa2_dpio_dev *dpio_dev = NULL;
        int ret;
@@ -236,7 +245,7 @@ struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(int cpu_id)
        DPAA2_BUS_DEBUG("New Portal %p (%d) affined thread - %lu",
                        dpio_dev, dpio_dev->index, syscall(SYS_gettid));
 
-       ret = dpaa2_configure_stashing(dpio_dev, cpu_id);
+       ret = dpaa2_configure_stashing(dpio_dev, lcoreid);
        if (ret)
                DPAA2_BUS_ERR("dpaa2_configure_stashing failed");
 
@@ -340,6 +349,39 @@ dpaa2_affine_qbman_ethrx_swp(void)
        }
 }
 
+/*
+ * This checks for not supported lcore mappings as well as get the physical
+ * cpuid for the lcore.
+ * one lcore can only map to 1 cpu i.e. 1@10-14 not supported.
+ * one cpu can be mapped to more than one lcores.
+ */
+static int
+dpaa2_check_lcore_cpuset(void)
+{
+       unsigned int lcore_id, i;
+       int ret = 0;
+
+       for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+               dpaa2_cpu[lcore_id] = 0xffffffff;
+
+       for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+               for (i = 0; i < RTE_MAX_LCORE; i++) {
+                       if (CPU_ISSET(i, &lcore_config[lcore_id].cpuset)) {
+                               RTE_LOG(DEBUG, EAL, "lcore id = %u cpu=%u\n",
+                                       lcore_id, i);
+                               if (dpaa2_cpu[lcore_id] != 0xffffffff) {
+                                       DPAA2_BUS_ERR(
+                                   "ERR:lcore map to multi-cpu not supported");
+                                       ret = -1;
+                               } else  {
+                                       dpaa2_cpu[lcore_id] = i;
+                               }
+                       }
+               }
+       }
+       return ret;
+}
+
 static int
 dpaa2_create_dpio_device(int vdev_fd,
                         struct vfio_device_info *obj_info,
@@ -349,6 +391,7 @@ dpaa2_create_dpio_device(int vdev_fd,
        struct vfio_region_info reg_info = { .argsz = sizeof(reg_info)};
        struct qbman_swp_desc p_des;
        struct dpio_attr attr;
+       static int check_lcore_cpuset;
 
        if (obj_info->num_regions < NUM_DPIO_REGIONS) {
                DPAA2_BUS_ERR("Not sufficient number of DPIO regions");
@@ -368,7 +411,16 @@ dpaa2_create_dpio_device(int vdev_fd,
        /* Using single portal  for all devices */
        dpio_dev->mc_portal = rte_mcp_ptr_list[MC_PORTAL_INDEX];
 
+       if (!check_lcore_cpuset) {
+               check_lcore_cpuset = 1;
+
+               if (dpaa2_check_lcore_cpuset() < 0)
+                       goto err;
+       }
+
        dpio_dev->dpio = malloc(sizeof(struct fsl_mc_io));
+       memset(dpio_dev->dpio, 0, sizeof(struct fsl_mc_io));
+
        if (!dpio_dev->dpio) {
                DPAA2_BUS_ERR("Memory allocation failure");
                goto err;
index 3380e54..bbea37e 100644 (file)
@@ -683,8 +683,8 @@ static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
        full_mask = s->eqcr.pi_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
-               s->eqcr.ci = qbman_cinh_read(&s->sys,
-                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.ci = qbman_cena_read_reg(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask;
                s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
                                eqcr_ci, s->eqcr.ci);
                if (!s->eqcr.available)
@@ -809,8 +809,8 @@ static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
        full_mask = s->eqcr.pi_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
-               s->eqcr.ci = qbman_cinh_read(&s->sys,
-                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.ci = qbman_cena_read_reg(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask;
                s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
                                        eqcr_ci, s->eqcr.ci);
                if (!s->eqcr.available)
@@ -941,8 +941,8 @@ static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
        full_mask = s->eqcr.pi_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
-               s->eqcr.ci = qbman_cinh_read(&s->sys,
-                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.ci = qbman_cena_read_reg(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask;
                s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
                                        eqcr_ci, s->eqcr.ci);
                if (!s->eqcr.available)
index d41af83..0571097 100644 (file)
@@ -55,6 +55,7 @@
 #define QBMAN_CENA_SWP_RR(vb)  (0x700 + ((uint32_t)(vb) >> 1))
 #define QBMAN_CENA_SWP_VDQCR   0x780
 #define QBMAN_CENA_SWP_EQCR_CI 0x840
+#define QBMAN_CENA_SWP_EQCR_CI_MEMBACK 0x1840
 
 /* CENA register offsets in memory-backed mode */
 #define QBMAN_CENA_SWP_DQRR_MEM(n)  (0x800 + ((uint32_t)(n) << 6))
index 5f23ed8..55d3abf 100644 (file)
@@ -125,15 +125,13 @@ ifpga_scan_one(struct rte_rawdev *rawdev,
                                     IFPGA_AFU_BTS);
                        goto end;
                }
+               afu_pr_conf.pr_enable = 1;
        } else {
-               IFPGA_BUS_ERR("arg %s is mandatory for ifpga bus",
-                         IFPGA_AFU_BTS);
-               goto end;
+               afu_pr_conf.pr_enable = 0;
        }
 
        afu_pr_conf.afu_id.uuid.uuid_low = 0;
        afu_pr_conf.afu_id.uuid.uuid_high = 0;
-       afu_pr_conf.pr_enable = path?1:0;
 
        if (ifpga_find_afu_dev(rawdev, &afu_pr_conf.afu_id))
                goto end;
@@ -308,12 +306,19 @@ ifpga_probe_all_drivers(struct rte_afu_device *afu_dev)
        }
 
        TAILQ_FOREACH(drv, &ifpga_afu_drv_list, next) {
-               if (ifpga_probe_one_driver(drv, afu_dev)) {
-                       ret = -1;
-                       break;
-               }
+               ret = ifpga_probe_one_driver(drv, afu_dev);
+               if (ret < 0)
+                       /* negative value is an error */
+                       return ret;
+               if (ret > 0)
+                       /* positive value means driver doesn't support it */
+                       continue;
+               return 0;
        }
-       return ret;
+       if ((ret > 0) && (afu_dev->driver == NULL))
+               return 0;
+       else
+               return ret;
 }
 
 /*
index d53c0f4..0bf43ba 100644 (file)
@@ -13,7 +13,7 @@
 
 #ifdef __cplusplus
 extern "C" {
-#endif
+#endif /* __cplusplus */
 
 #include <rte_bus.h>
 #include <rte_pci.h>
@@ -143,4 +143,8 @@ RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
 #define RTE_PMD_REGISTER_AFU_ALIAS(nm, alias)\
 static const char *afudrvinit_ ## nm ## _alias = RTE_STR(alias)
 
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
 #endif /* _RTE_BUS_IFPGA_H_ */
index 54a4c95..8e3d43e 100644 (file)
@@ -658,7 +658,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
        vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0);
        if (vfio_res == NULL) {
                RTE_LOG(ERR, EAL,
-                       "%s(): cannot store uio mmap details\n", __func__);
+                       "%s(): cannot store vfio mmap details\n", __func__);
                goto err_vfio_dev_fd;
        }
        memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr));
index 12e97e3..38df4d7 100644 (file)
@@ -357,6 +357,12 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary,
                        continue;
                }
 
+               if (!vmbus_isnew_subchannel(primary, relid))
+                       continue;       /* Already know about you */
+
+               if (!vmbus_uio_ring_present(dev, relid))
+                       continue;       /* Ring may not be ready yet */
+
                snprintf(subchan_path, sizeof(subchan_path), "%s/%lu",
                         chan_path, relid);
                err = vmbus_uio_sysfs_read(subchan_path, "subchannel_id",
@@ -370,12 +376,6 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary,
                if (subid == 0)
                        continue;       /* skip primary channel */
 
-               if (!vmbus_isnew_subchannel(primary, relid))
-                       continue;
-
-               if (!vmbus_uio_ring_present(dev, relid))
-                       continue;       /* Ring may not be ready yet */
-
                err = vmbus_uio_sysfs_read(subchan_path, "monitor_id",
                                           &monid, UINT8_MAX);
                if (err) {
index 79f6a01..4e66c58 100644 (file)
@@ -634,27 +634,20 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
        uint32_t head;
        uint32_t resp_counter = 0;
        uint8_t *resp_msg;
-       uint8_t hdr_flags;
 
        rx_queue = &(tmp_qp->rx_q);
        tx_queue = &(tmp_qp->tx_q);
        head = rx_queue->head;
        resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head;
-       hdr_flags = ((struct icp_qat_fw_comn_resp_hdr *)resp_msg)->hdr_flags;
 
        while (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG &&
                        resp_counter != nb_ops) {
 
-               if (unlikely(!ICP_QAT_FW_COMN_VALID_FLAG_GET(hdr_flags))) {
-                       /* Fatal firmware error */
-                       QAT_LOG(ERR, "QAT Firmware returned invalid response");
-                       return 0;
-               }
-
                if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC)
                        qat_sym_process_response(ops, resp_msg);
                else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION)
-                       qat_comp_process_response(ops, resp_msg);
+                       qat_comp_process_response(ops, resp_msg,
+                                       &tmp_qp->stats.dequeue_err_count);
 
                head = adf_modulo(head + rx_queue->msg_size,
                                  rx_queue->modulo_mask);
@@ -682,7 +675,8 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
 }
 
 __rte_weak int
-qat_comp_process_response(void **op __rte_unused, uint8_t *resp __rte_unused)
+qat_comp_process_response(void **op __rte_unused, uint8_t *resp __rte_unused,
+                         uint64_t *dequeue_err_count __rte_unused)
 {
        return  0;
 }
index 6f1525e..9833bcb 100644 (file)
@@ -107,6 +107,7 @@ qat_qps_per_service(const struct qat_qp_hw_data *qp_hw_data,
 
 /* Needed for weak function*/
 int
-qat_comp_process_response(void **op __rte_unused, uint8_t *resp __rte_unused);
+qat_comp_process_response(void **op __rte_unused, uint8_t *resp __rte_unused,
+                         uint64_t *dequeue_err_count);
 
 #endif /* _QAT_QP_H_ */
index 2754742..450cfd6 100644 (file)
@@ -36,6 +36,7 @@ qat_comp_build_request(void *in_op, uint8_t *out_msg,
                QAT_DP_LOG(ERR, "QAT PMD only supports stateless compression "
                                "operation requests, op (%p) is not a "
                                "stateless operation.", op);
+               op->status = RTE_COMP_OP_STATUS_INVALID_ARGS;
                return -EINVAL;
        }
 
@@ -61,6 +62,7 @@ qat_comp_build_request(void *in_op, uint8_t *out_msg,
                                RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS);
                if (ret) {
                        QAT_DP_LOG(ERR, "QAT PMD Cannot fill source sgl array");
+                       op->status = RTE_COMP_OP_STATUS_INVALID_ARGS;
                        return ret;
                }
 
@@ -71,6 +73,7 @@ qat_comp_build_request(void *in_op, uint8_t *out_msg,
                                RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS);
                if (ret) {
                        QAT_DP_LOG(ERR, "QAT PMD Cannot fill dest. sgl array");
+                       op->status = RTE_COMP_OP_STATUS_INVALID_ARGS;
                        return ret;
                }
 
@@ -106,7 +109,7 @@ qat_comp_build_request(void *in_op, uint8_t *out_msg,
 }
 
 int
-qat_comp_process_response(void **op, uint8_t *resp)
+qat_comp_process_response(void **op, uint8_t *resp, uint64_t *dequeue_err_count)
 {
        struct icp_qat_fw_comp_resp *resp_msg =
                        (struct icp_qat_fw_comp_resp *)resp;
@@ -114,6 +117,9 @@ qat_comp_process_response(void **op, uint8_t *resp)
                        (resp_msg->opaque_data);
        struct qat_comp_xform *qat_xform = (struct qat_comp_xform *)
                                (rx_op->private_xform);
+       int err = resp_msg->comn_resp.comn_status &
+                       ((1 << QAT_COMN_RESP_CMP_STATUS_BITPOS) |
+                        (1 << QAT_COMN_RESP_XLAT_STATUS_BITPOS));
 
 #if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
        QAT_DP_LOG(DEBUG, "Direction: %s",
@@ -132,24 +138,36 @@ qat_comp_process_response(void **op, uint8_t *resp)
                        rx_op->debug_status = ERR_CODE_QAT_COMP_WRONG_FW;
                        *op = (void *)rx_op;
                        QAT_DP_LOG(ERR, "QAT has wrong firmware");
+                       ++(*dequeue_err_count);
                        return 0;
                }
        }
 
-       if ((ICP_QAT_FW_COMN_RESP_CMP_STAT_GET(resp_msg->comn_resp.comn_status)
-               | ICP_QAT_FW_COMN_RESP_XLAT_STAT_GET(
-                               resp_msg->comn_resp.comn_status)) !=
-                               ICP_QAT_FW_COMN_STATUS_FLAG_OK) {
-
-               if (unlikely((ICP_QAT_FW_COMN_RESP_XLAT_STAT_GET(
-                               resp_msg->comn_resp.comn_status) !=
-                               ICP_QAT_FW_COMN_STATUS_FLAG_OK) &&
-                               (qat_xform->qat_comp_request_type
-                               == QAT_COMP_REQUEST_DYNAMIC_COMP_STATELESS)))
+       if (err) {
+               if (unlikely((err & (1 << QAT_COMN_RESP_XLAT_STATUS_BITPOS))
+                            && (qat_xform->qat_comp_request_type
+                                == QAT_COMP_REQUEST_DYNAMIC_COMP_STATELESS))) {
                        QAT_DP_LOG(ERR, "QAT intermediate buffer may be too "
                            "small for output, try configuring a larger size");
+               }
+
+               int8_t cmp_err_code =
+                       (int8_t)resp_msg->comn_resp.comn_error.cmp_err_code;
+               int8_t xlat_err_code =
+                       (int8_t)resp_msg->comn_resp.comn_error.xlat_err_code;
+
+               if ((cmp_err_code == ERR_CODE_OVERFLOW_ERROR && !xlat_err_code)
+                               ||
+                   (!cmp_err_code && xlat_err_code == ERR_CODE_OVERFLOW_ERROR)
+                               ||
+                   (cmp_err_code == ERR_CODE_OVERFLOW_ERROR &&
+                    xlat_err_code == ERR_CODE_OVERFLOW_ERROR))
+                       rx_op->status =
+                               RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED;
+               else
+                       rx_op->status = RTE_COMP_OP_STATUS_ERROR;
 
-               rx_op->status = RTE_COMP_OP_STATUS_ERROR;
+               ++(*dequeue_err_count);
                rx_op->debug_status =
                        *((uint16_t *)(&resp_msg->comn_resp.comn_error));
        } else {
index 99a4462..19f48df 100644 (file)
@@ -60,8 +60,8 @@ qat_comp_build_request(void *in_op, uint8_t *out_msg, void *op_cookie,
                       enum qat_device_gen qat_dev_gen __rte_unused);
 
 int
-qat_comp_process_response(void **op, uint8_t *resp);
-
+qat_comp_process_response(void **op, uint8_t *resp,
+                         uint64_t *dequeue_err_count);
 
 int
 qat_comp_private_xform_create(struct rte_compressdev *dev,
index c343a39..cd15245 100644 (file)
@@ -153,6 +153,11 @@ static int
 aesni_gcm_pmd_qp_release(struct rte_cryptodev *dev, uint16_t qp_id)
 {
        if (dev->data->queue_pairs[qp_id] != NULL) {
+               struct aesni_gcm_qp *qp = dev->data->queue_pairs[qp_id];
+
+               if (qp->processed_pkts)
+                       rte_ring_free(qp->processed_pkts);
+
                rte_free(dev->data->queue_pairs[qp_id]);
                dev->data->queue_pairs[qp_id] = NULL;
        }
index f505adf..190053e 100644 (file)
@@ -2012,7 +2012,7 @@ caam_jr_dev_configure(struct rte_cryptodev *dev,
        PMD_INIT_FUNC_TRACE();
 
        internals = dev->data->dev_private;
-       sprintf(str, "ctx_pool_%d", dev->data->dev_id);
+       snprintf(str, sizeof(str), "ctx_pool_%d", dev->data->dev_id);
        if (!internals->ctx_pool) {
                internals->ctx_pool = rte_mempool_create((const char *)str,
                                                CTX_POOL_NUM_BUFS,
index d94101c..bf872a2 100644 (file)
@@ -284,11 +284,11 @@ uio_map_registers(int uio_device_fd, int uio_device_id,
        memset(uio_map_size_str, 0, sizeof(uio_map_size_str));
 
        /* Compose string: /sys/class/uio/uioX */
-       sprintf(uio_sys_root, "%s/%s%d", SEC_UIO_DEVICE_SYS_ATTR_PATH,
-               "uio", uio_device_id);
+       snprintf(uio_sys_root, sizeof(uio_sys_root), "%s/%s%d",
+                       SEC_UIO_DEVICE_SYS_ATTR_PATH, "uio", uio_device_id);
        /* Compose string: maps/mapY */
-       sprintf(uio_sys_map_subdir, "%s%d", SEC_UIO_DEVICE_SYS_MAP_ATTR,
-               uio_map_id);
+       snprintf(uio_sys_map_subdir, sizeof(uio_sys_map_subdir), "%s%d",
+                       SEC_UIO_DEVICE_SYS_MAP_ATTR, uio_map_id);
 
        /* Read first (and only) line from file
         * /sys/class/uio/uioX/maps/mapY/size
@@ -389,9 +389,8 @@ uio_job_ring *config_job_ring(void)
 
        /* Find UIO device created by SEC kernel driver for this job ring. */
        memset(uio_device_file_name, 0, sizeof(uio_device_file_name));
-
-       sprintf(uio_device_file_name, "%s%d", SEC_UIO_DEVICE_FILE_NAME,
-               job_ring->uio_minor_number);
+       snprintf(uio_device_file_name, sizeof(uio_device_file_name), "%s%d",
+                       SEC_UIO_DEVICE_FILE_NAME, job_ring->uio_minor_number);
 
        /* Open device file */
        job_ring->uio_fd = open(uio_device_file_name, O_RDWR);
index 6095c60..34c14f7 100644 (file)
@@ -108,7 +108,7 @@ build_proto_compound_fd(dpaa2_sec_session *sess,
        /* Configure FD as a FRAME LIST */
        DPAA2_SET_FD_ADDR(fd, DPAA2_VADDR_TO_IOVA(op_fle));
        DPAA2_SET_FD_COMPOUND_FMT(fd);
-       DPAA2_SET_FD_FLC(fd, (ptrdiff_t)flc);
+       DPAA2_SET_FD_FLC(fd, DPAA2_VADDR_TO_IOVA(flc));
 
        /* Configure Output FLE with dst mbuf data  */
        DPAA2_SET_FLE_ADDR(op_fle, DPAA2_MBUF_VADDR_TO_IOVA(dst_mbuf));
@@ -160,7 +160,7 @@ build_proto_fd(dpaa2_sec_session *sess,
        DPAA2_SET_FD_ADDR(fd, DPAA2_MBUF_VADDR_TO_IOVA(sym_op->m_src));
        DPAA2_SET_FD_OFFSET(fd, sym_op->m_src->data_off);
        DPAA2_SET_FD_LEN(fd, sym_op->m_src->pkt_len);
-       DPAA2_SET_FD_FLC(fd, (ptrdiff_t)flc);
+       DPAA2_SET_FD_FLC(fd, DPAA2_VADDR_TO_IOVA(flc));
 
        /* save physical address of mbuf */
        op->sym->aead.digest.phys_addr = mbuf->buf_iova;
@@ -3372,14 +3372,15 @@ dpaa2_sec_dev_init(struct rte_cryptodev *cryptodev)
                             retcode);
                goto init_error;
        }
-       sprintf(cryptodev->data->name, "dpsec-%u", hw_id);
+       snprintf(cryptodev->data->name, sizeof(cryptodev->data->name),
+                       "dpsec-%u", hw_id);
 
        internals->max_nb_queue_pairs = attr.num_tx_queues;
        cryptodev->data->nb_queue_pairs = internals->max_nb_queue_pairs;
        internals->hw = dpseci;
        internals->token = token;
 
-       sprintf(str, "fle_pool_%d", cryptodev->data->dev_id);
+       snprintf(str, sizeof(str), "fle_pool_%d", cryptodev->data->dev_id);
        internals->fle_pool = rte_mempool_create((const char *)str,
                        FLE_POOL_NUM_BUFS,
                        FLE_POOL_BUF_SIZE,
@@ -3410,7 +3411,8 @@ cryptodev_dpaa2_sec_probe(struct rte_dpaa2_driver *dpaa2_drv __rte_unused,
 
        int retval;
 
-       sprintf(cryptodev_name, "dpsec-%d", dpaa2_dev->object_id);
+       snprintf(cryptodev_name, sizeof(cryptodev_name), "dpsec-%d",
+                       dpaa2_dev->object_id);
 
        cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id());
        if (cryptodev == NULL)
index d83e745..8958fd0 100644 (file)
@@ -2185,7 +2185,7 @@ dpaa_sec_dev_configure(struct rte_cryptodev *dev,
        PMD_INIT_FUNC_TRACE();
 
        internals = dev->data->dev_private;
-       sprintf(str, "ctx_pool_%d", dev->data->dev_id);
+       snprintf(str, sizeof(str), "ctx_pool_%d", dev->data->dev_id);
        if (!internals->ctx_pool) {
                internals->ctx_pool = rte_mempool_create((const char *)str,
                                                        CTX_POOL_NUM_BUFS,
@@ -2391,7 +2391,8 @@ cryptodev_dpaa_sec_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused,
 
        int retval;
 
-       sprintf(cryptodev_name, "dpaa_sec-%d", dpaa_dev->id.dev_id);
+       snprintf(cryptodev_name, sizeof(cryptodev_name), "dpaa_sec-%d",
+                       dpaa_dev->id.dev_id);
 
        cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id());
        if (cryptodev == NULL)
index 2bdcd01..319ca34 100644 (file)
@@ -133,6 +133,11 @@ static int
 null_crypto_pmd_qp_release(struct rte_cryptodev *dev, uint16_t qp_id)
 {
        if (dev->data->queue_pairs[qp_id] != NULL) {
+               struct null_crypto_qp *qp = dev->data->queue_pairs[qp_id];
+
+               if (qp->processed_pkts)
+                       rte_ring_free(qp->processed_pkts);
+
                rte_free(dev->data->queue_pairs[qp_id]);
                dev->data->queue_pairs[qp_id] = NULL;
        }
index c2b029e..a65f9e5 100644 (file)
@@ -657,6 +657,11 @@ static int
 openssl_pmd_qp_release(struct rte_cryptodev *dev, uint16_t qp_id)
 {
        if (dev->data->queue_pairs[qp_id] != NULL) {
+               struct openssl_qp *qp = dev->data->queue_pairs[qp_id];
+
+               if (qp->processed_ops)
+                       rte_ring_free(qp->processed_ops);
+
                rte_free(dev->data->queue_pairs[qp_id]);
                dev->data->queue_pairs[qp_id] = NULL;
        }
index 10cdf2e..8801ca5 100644 (file)
@@ -157,6 +157,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
        uint32_t min_ofs = 0;
        uint64_t src_buf_start = 0, dst_buf_start = 0;
        uint8_t do_sgl = 0;
+       uint8_t wireless_auth = 0, in_place = 1;
        struct rte_crypto_op *op = (struct rte_crypto_op *)in_op;
        struct qat_sym_op_cookie *cookie =
                                (struct qat_sym_op_cookie *)op_cookie;
@@ -269,6 +270,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
                        }
                        auth_ofs = op->sym->auth.data.offset >> 3;
                        auth_len = op->sym->auth.data.length >> 3;
+                       wireless_auth = 1;
 
                        auth_param->u1.aad_adr =
                                        rte_crypto_op_ctophys_offset(op,
@@ -438,6 +440,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
                 * Don't align DMA start. DMA the minimum data-set
                 * so as not to overwrite data in dest buffer
                 */
+               in_place = 0;
                src_buf_start =
                        rte_pktmbuf_iova_offset(op->sym->m_src, min_ofs);
                dst_buf_start =
@@ -530,6 +533,18 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
        } else {
                qat_req->comn_mid.src_data_addr = src_buf_start;
                qat_req->comn_mid.dest_data_addr = dst_buf_start;
+               /* handle case of auth-gen-then-cipher with digest encrypted */
+               if (wireless_auth && in_place &&
+                   (op->sym->auth.digest.phys_addr ==
+                               src_buf_start + auth_ofs + auth_len) &&
+                   (auth_ofs + auth_len + ctx->digest_length <=
+                               cipher_ofs + cipher_len)) {
+                       struct icp_qat_fw_comn_req_hdr *header =
+                                               &qat_req->comn_hdr;
+                       ICP_QAT_FW_LA_DIGEST_IN_BUFFER_SET(
+                               header->serv_specif_flags,
+                               ICP_QAT_FW_LA_DIGEST_IN_BUFFER);
+               }
        }
 
 #if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
index 8196e23..4d7ec01 100644 (file)
@@ -1143,8 +1143,8 @@ static int qat_sym_do_precomputes(enum icp_qat_hw_auth_algo hash_alg,
        }
 
        block_size = qat_hash_get_block_size(hash_alg);
-       if (block_size <= 0)
-               return -EFAULT;
+       if (block_size < 0)
+               return block_size;
        /* init ipad and opad from key and xor with fixed values */
        memset(ipad, 0, block_size);
        memset(opad, 0, block_size);
@@ -1488,11 +1488,17 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc,
                || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_KASUMI_F9
                || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_ZUC_3G_128_EIA3
                || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC
+               || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_AES_CBC_MAC
+               || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_NULL
                        )
                hash->auth_counter.counter = 0;
-       else
-               hash->auth_counter.counter = rte_bswap32(
-                               qat_hash_get_block_size(cdesc->qat_hash_alg));
+       else {
+               int block_size = qat_hash_get_block_size(cdesc->qat_hash_alg);
+
+               if (block_size < 0)
+                       return block_size;
+               hash->auth_counter.counter = rte_bswap32(block_size);
+       }
 
        cdesc->cd_cur_ptr += sizeof(struct icp_qat_hw_auth_setup);
 
index cfbc952..a367ee9 100644 (file)
@@ -142,6 +142,11 @@ static int
 snow3g_pmd_qp_release(struct rte_cryptodev *dev, uint16_t qp_id)
 {
        if (dev->data->queue_pairs[qp_id] != NULL) {
+               struct snow3g_qp *qp = dev->data->queue_pairs[qp_id];
+
+               if (qp->processed_ops)
+                       rte_ring_free(qp->processed_ops);
+
                rte_free(dev->data->queue_pairs[qp_id]);
                dev->data->queue_pairs[qp_id] = NULL;
        }
index 6da3965..04d45e4 100644 (file)
@@ -142,6 +142,11 @@ static int
 zuc_pmd_qp_release(struct rte_cryptodev *dev, uint16_t qp_id)
 {
        if (dev->data->queue_pairs[qp_id] != NULL) {
+               struct zuc_qp *qp = dev->data->queue_pairs[qp_id];
+
+               if (qp->processed_ops)
+                       rte_ring_free(qp->processed_ops);
+
                rte_free(dev->data->queue_pairs[qp_id]);
                dev->data->queue_pairs[qp_id] = NULL;
        }
index 95a98c6..264cfc0 100644 (file)
@@ -433,8 +433,7 @@ eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
        int ret;
        int s;
        unsigned int data_size = internals->req.tp_frame_size -
-                                TPACKET2_HDRLEN -
-                                sizeof(struct sockaddr_ll);
+                                TPACKET2_HDRLEN;
 
        if (mtu > data_size)
                return -EINVAL;
index fd90947..40c9137 100644 (file)
@@ -21,6 +21,8 @@
 
 #define ATL_TX_OFFLOAD_MASK (                           \
        PKT_TX_VLAN |                                    \
+       PKT_TX_IPV6 |                                    \
+       PKT_TX_IPV4 |                                    \
        PKT_TX_IP_CKSUM |                                \
        PKT_TX_L4_MASK |                                 \
        PKT_TX_TCP_SEG)
index 1709f31..7954911 100644 (file)
@@ -1435,8 +1435,7 @@ struct avf_aqc_add_remove_cloud_filters_element_data {
 };
 
 /* avf_aqc_add_rm_cloud_filt_elem_ext is used when
- * AVF_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER flag is set. refer to
- * DCR288
+ * AVF_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER flag is set.
  */
 struct avf_aqc_add_rm_cloud_filt_elem_ext {
        struct avf_aqc_add_remove_cloud_filters_element_data element;
index 86c79c2..26b3828 100644 (file)
@@ -4201,6 +4201,9 @@ static uint16_t bnx2x_update_dsb_idx(struct bnx2x_softc *sc)
        struct host_sp_status_block *def_sb = sc->def_sb;
        uint16_t rc = 0;
 
+       if (!def_sb)
+               return 0;
+
        mb();                   /* status block is written to by the chip */
 
        if (sc->def_att_idx != def_sb->atten_status_block.attn_bits_index) {
@@ -4525,6 +4528,10 @@ static void bnx2x_handle_fp_tq(struct bnx2x_fastpath *fp, int scan_fp)
        struct bnx2x_softc *sc = fp->sc;
        uint8_t more_rx = FALSE;
 
+       /* Make sure FP is initialized */
+       if (!fp->sb_running_index)
+               return;
+
        PMD_DEBUG_PERIODIC_LOG(DEBUG, sc,
                               "---> FP TASK QUEUE (%d) <--", fp->index);
 
@@ -5809,7 +5816,7 @@ static int bnx2x_set_power_state(struct bnx2x_softc *sc, uint8_t state)
 
        /* If there is no power capability, silently succeed */
        if (!(sc->devinfo.pcie_cap_flags & BNX2X_PM_CAPABLE_FLAG)) {
-               PMD_DRV_LOG(WARNING, sc, "No power capability");
+               PMD_DRV_LOG(INFO, sc, "No power capability");
                return 0;
        }
 
@@ -6918,19 +6925,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
                return;
        }
 
-       PMD_DRV_LOG(INFO, sc, "Change in link status : cur_data = %lx, last_reported_link = %lx\n",
-                   cur_data.link_report_flags,
-                   sc->last_reported_link.link_report_flags);
+       ELINK_DEBUG_P2(sc, "Change in link status : cur_data = %lx, last_reported_link = %lx",
+                      cur_data.link_report_flags,
+                      sc->last_reported_link.link_report_flags);
 
        sc->link_cnt++;
 
-       PMD_DRV_LOG(INFO, sc, "link status change count = %x\n", sc->link_cnt);
+       ELINK_DEBUG_P1(sc, "link status change count = %x", sc->link_cnt);
        /* report new link params and remember the state for the next time */
        rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
        if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
                         &cur_data.link_report_flags)) {
-               PMD_DRV_LOG(INFO, sc, "NIC Link is Down");
+               ELINK_DEBUG_P0(sc, "NIC Link is Down");
        } else {
                __rte_unused const char *duplex;
                __rte_unused const char *flow;
@@ -6938,8 +6945,10 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
                if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX,
                                           &cur_data.link_report_flags)) {
                        duplex = "full";
+                               ELINK_DEBUG_P0(sc, "link set to full duplex");
                } else {
                        duplex = "half";
+                               ELINK_DEBUG_P0(sc, "link set to half duplex");
                }
 
 /*
@@ -7123,7 +7132,7 @@ void bnx2x_periodic_callout(struct bnx2x_softc *sc)
 {
        if ((sc->state != BNX2X_STATE_OPEN) ||
            (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_STOP)) {
-               PMD_DRV_LOG(INFO, sc, "periodic callout exit (state=0x%x)",
+               PMD_DRV_LOG(DEBUG, sc, "periodic callout exit (state=0x%x)",
                            sc->state);
                return;
        }
@@ -8317,7 +8326,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
                 ((sc->devinfo.bc_ver >> 24) & 0xff),
                 ((sc->devinfo.bc_ver >> 16) & 0xff),
                 ((sc->devinfo.bc_ver >> 8) & 0xff));
-       PMD_DRV_LOG(INFO, sc, "Bootcode version: %s", sc->devinfo.bc_ver_str);
+       PMD_DRV_LOG(DEBUG, sc, "Bootcode version: %s", sc->devinfo.bc_ver_str);
 
        /* get the bootcode shmem address */
        sc->devinfo.mf_cfg_base = bnx2x_get_shmem_mf_cfg_base(sc);
@@ -11743,42 +11752,36 @@ static const char *get_bnx2x_flags(uint32_t flags)
        return flag_str;
 }
 
-/*
- * Prints useful adapter info.
- */
+/* Prints useful adapter info. */
 void bnx2x_print_adapter_info(struct bnx2x_softc *sc)
 {
        int i = 0;
-       __rte_unused uint32_t ext_phy_type;
 
-       PMD_INIT_FUNC_TRACE(sc);
-       if (sc->link_vars.phy_flags & PHY_XGXS_FLAG)
-               ext_phy_type = ELINK_XGXS_EXT_PHY_TYPE(REG_RD(sc,
-                                                             sc->
-                                                             devinfo.shmem_base
-                                                             + offsetof(struct
-                                                                        shmem_region,
-                                                                        dev_info.port_hw_config
-                                                                        [0].external_phy_config)));
-       else
-               ext_phy_type = ELINK_SERDES_EXT_PHY_TYPE(REG_RD(sc,
-                                                               sc->
-                                                               devinfo.shmem_base
-                                                               +
-                                                               offsetof(struct
-                                                                        shmem_region,
-                                                                        dev_info.port_hw_config
-                                                                        [0].external_phy_config)));
-
-       PMD_DRV_LOG(INFO, sc, "\n\n===================================\n");
+       PMD_DRV_LOG(INFO, sc, "========================================");
+       /* DPDK and Driver versions */
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "DPDK",
+                       rte_version());
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "Driver",
+                       bnx2x_pmd_version());
+       /* Firmware versions. */
+       PMD_DRV_LOG(INFO, sc, "%12s : %d.%d.%d",
+                    "Firmware",
+                    BNX2X_5710_FW_MAJOR_VERSION,
+                    BNX2X_5710_FW_MINOR_VERSION,
+                    BNX2X_5710_FW_REVISION_VERSION);
+       PMD_DRV_LOG(INFO, sc, "%12s : %s",
+                    "Bootcode", sc->devinfo.bc_ver_str);
        /* Hardware chip info. */
        PMD_DRV_LOG(INFO, sc, "%12s : %#08x", "ASIC", sc->devinfo.chip_id);
        PMD_DRV_LOG(INFO, sc, "%12s : %c%d", "Rev", (CHIP_REV(sc) >> 12) + 'A',
                     (CHIP_METAL(sc) >> 4));
-
-       /* Bus info. */
-       PMD_DRV_LOG(INFO, sc,
-                   "%12s : %d, ", "Bus PCIe", sc->devinfo.pcie_link_width);
+       /* Bus PCIe info. */
+       PMD_DRV_LOG(INFO, sc, "%12s : 0x%x", "Vendor Id",
+                   sc->devinfo.vendor_id);
+       PMD_DRV_LOG(INFO, sc, "%12s : 0x%x", "Device Id",
+                   sc->devinfo.device_id);
+       PMD_DRV_LOG(INFO, sc, "%12s : width x%d, ", "Bus PCIe",
+                   sc->devinfo.pcie_link_width);
        switch (sc->devinfo.pcie_link_speed) {
        case 1:
                PMD_DRV_LOG(INFO, sc, "%23s", "2.5 Gbps");
@@ -11792,62 +11795,45 @@ void bnx2x_print_adapter_info(struct bnx2x_softc *sc)
        default:
                PMD_DRV_LOG(INFO, sc, "%33s", "Unknown link speed");
        }
-
        /* Device features. */
        PMD_DRV_LOG(INFO, sc, "%12s : ", "Flags");
-
        /* Miscellaneous flags. */
        if (sc->devinfo.pcie_cap_flags & BNX2X_MSI_CAPABLE_FLAG) {
                PMD_DRV_LOG(INFO, sc, "%18s", "MSI");
                i++;
        }
-
        if (sc->devinfo.pcie_cap_flags & BNX2X_MSIX_CAPABLE_FLAG) {
                if (i > 0)
                        PMD_DRV_LOG(INFO, sc, "|");
                PMD_DRV_LOG(INFO, sc, "%20s", "MSI-X");
                i++;
        }
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "OVLAN", (OVLAN(sc) ? "YES" : "NO"));
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "MF", (IS_MF(sc) ? "YES" : "NO"));
+       PMD_DRV_LOG(INFO, sc, "========================================");
+}
 
-       if (IS_PF(sc)) {
-               PMD_DRV_LOG(INFO, sc, "%12s : ", "Queues");
-               switch (sc->sp->rss_rdata.rss_mode) {
-               case ETH_RSS_MODE_DISABLED:
-                       PMD_DRV_LOG(INFO, sc, "%19s", "None");
-                       break;
-               case ETH_RSS_MODE_REGULAR:
-                       PMD_DRV_LOG(INFO, sc,
-                                   "%18s : %d", "RSS", sc->num_queues);
-                       break;
-               default:
-                       PMD_DRV_LOG(INFO, sc, "%22s", "Unknown");
-                       break;
-               }
-       }
-
-       /* RTE and Driver versions */
-       PMD_DRV_LOG(INFO, sc, "%12s : %s", "DPDK",
-                       rte_version());
-       PMD_DRV_LOG(INFO, sc, "%12s : %s", "Driver",
-                       bnx2x_pmd_version());
+/* Prints useful device info. */
+void bnx2x_print_device_info(struct bnx2x_softc *sc)
+{
+       __rte_unused uint32_t ext_phy_type;
+       uint32_t offset, reg_val;
 
-       /* Firmware versions and device features. */
-       PMD_DRV_LOG(INFO, sc, "%12s : %d.%d.%d",
-                    "Firmware",
-                    BNX2X_5710_FW_MAJOR_VERSION,
-                    BNX2X_5710_FW_MINOR_VERSION,
-                    BNX2X_5710_FW_REVISION_VERSION);
-       PMD_DRV_LOG(INFO, sc, "%12s : %s",
-                    "Bootcode", sc->devinfo.bc_ver_str);
+       PMD_INIT_FUNC_TRACE(sc);
+       offset = offsetof(struct shmem_region,
+                         dev_info.port_hw_config[0].external_phy_config);
+       reg_val = REG_RD(sc, sc->devinfo.shmem_base + offset);
+       if (sc->link_vars.phy_flags & PHY_XGXS_FLAG)
+               ext_phy_type = ELINK_XGXS_EXT_PHY_TYPE(reg_val);
+       else
+               ext_phy_type = ELINK_SERDES_EXT_PHY_TYPE(reg_val);
 
-       PMD_DRV_LOG(INFO, sc, "\n\n===================================\n");
+       /* Device features. */
        PMD_DRV_LOG(INFO, sc, "%12s : %u", "Bnx2x Func", sc->pcie_func);
        PMD_DRV_LOG(INFO, sc,
                    "%12s : %s", "Bnx2x Flags", get_bnx2x_flags(sc->flags));
        PMD_DRV_LOG(INFO, sc, "%12s : %s", "DMAE Is",
                     (sc->dmae_ready ? "Ready" : "Not Ready"));
-       PMD_DRV_LOG(INFO, sc, "%12s : %s", "OVLAN", (OVLAN(sc) ? "YES" : "NO"));
-       PMD_DRV_LOG(INFO, sc, "%12s : %s", "MF", (IS_MF(sc) ? "YES" : "NO"));
        PMD_DRV_LOG(INFO, sc, "%12s : %u", "MTU", sc->mtu);
        PMD_DRV_LOG(INFO, sc,
                    "%12s : %s", "PHY Type", get_ext_phy_type(ext_phy_type));
@@ -11863,9 +11849,30 @@ void bnx2x_print_adapter_info(struct bnx2x_softc *sc)
        if (sc->recovery_state)
                PMD_DRV_LOG(INFO, sc, "%12s : %s", "Recovery",
                             get_recovery_state(sc->recovery_state));
+       /* Queue info. */
+       if (IS_PF(sc)) {
+               switch (sc->sp->rss_rdata.rss_mode) {
+               case ETH_RSS_MODE_DISABLED:
+                       PMD_DRV_LOG(INFO, sc, "%12s : %s", "Queues", "RSS mode - None");
+                       break;
+               case ETH_RSS_MODE_REGULAR:
+                       PMD_DRV_LOG(INFO, sc, "%12s : %s,", "Queues", "RSS mode - Regular");
+                       PMD_DRV_LOG(INFO, sc, "%16d", sc->num_queues);
+                       break;
+               default:
+                       PMD_DRV_LOG(INFO, sc, "%12s : %s", "Queues", "RSS mode - Unknown");
+                       break;
+               }
+       }
        PMD_DRV_LOG(INFO, sc, "%12s : CQ = %lx,  EQ = %lx", "SPQ Left",
                     sc->cq_spq_left, sc->eq_spq_left);
+
        PMD_DRV_LOG(INFO, sc,
                    "%12s : %x", "Switch", sc->link_params.switch_cfg);
-       PMD_DRV_LOG(INFO, sc, "\n\n===================================\n");
+       PMD_DRV_LOG(INFO, sc, "pcie_bus=%d, pcie_device=%d",
+                       sc->pcie_bus, sc->pcie_device);
+       PMD_DRV_LOG(INFO, sc, "bar0.addr=%p, bar1.addr=%p",
+                       sc->bar[BAR0].base_addr, sc->bar[BAR1].base_addr);
+       PMD_DRV_LOG(INFO, sc, "port=%d, path=%d, vnic=%d, func=%d",
+                       PORT_ID(sc), PATH_ID(sc), VNIC_ID(sc), FUNC_ID(sc));
 }
index 7478072..32a1229 100644 (file)
@@ -1937,6 +1937,7 @@ void bnx2x_dump_tx_chain(struct bnx2x_fastpath * fp, int bd_prod, int count);
 int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0);
 uint8_t bnx2x_txeof(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp);
 void bnx2x_print_adapter_info(struct bnx2x_softc *sc);
+void bnx2x_print_device_info(struct bnx2x_softc *sc);
 int bnx2x_intr_legacy(struct bnx2x_softc *sc, int scan_fp);
 void bnx2x_link_status_update(struct bnx2x_softc *sc);
 int bnx2x_complete_sp(struct bnx2x_softc *sc);
@@ -1984,7 +1985,7 @@ bnx2x_set_rx_mode(struct bnx2x_softc *sc)
                        bnx2x_vf_set_rx_mode(sc);
                }
        } else {
-               PMD_DRV_LOG(NOTICE, sc, "Card is not ready to change mode");
+               PMD_DRV_LOG(INFO, sc, "Card is not ready to change mode");
        }
 }
 
index 0057843..cc7816d 100644 (file)
@@ -245,8 +245,7 @@ bnx2x_dev_start(struct rte_eth_dev *dev)
                return -3;
        }
 
-       /* Print important adapter info for the user. */
-       bnx2x_print_adapter_info(sc);
+       bnx2x_print_device_info(sc);
 
        return ret;
 }
@@ -574,6 +573,7 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf)
        struct rte_pci_device *pci_dev;
        struct rte_pci_addr pci_addr;
        struct bnx2x_softc *sc;
+       static bool adapter_info = true;
 
        /* Extract key data structures */
        sc = eth_dev->data->dev_private;
@@ -632,8 +632,15 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf)
                return ret;
        }
 
+       /* Print important adapter info for the user. */
+       if (adapter_info) {
+               bnx2x_print_adapter_info(sc);
+               adapter_info = false;
+       }
+
        /* schedule periodic poll for slowpath link events */
        if (IS_PF(sc)) {
+               PMD_DRV_LOG(DEBUG, sc, "Scheduling periodic poll for slowpath link events");
                ret = rte_eal_alarm_set(BNX2X_SP_TIMER_PERIOD,
                                        bnx2x_periodic_start, (void *)eth_dev);
                if (ret) {
@@ -645,15 +652,6 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf)
 
        eth_dev->data->mac_addrs = (struct ether_addr *)sc->link_params.mac_addr;
 
-       PMD_DRV_LOG(INFO, sc, "pcie_bus=%d, pcie_device=%d",
-                       sc->pcie_bus, sc->pcie_device);
-       PMD_DRV_LOG(INFO, sc, "bar0.addr=%p, bar1.addr=%p",
-                       sc->bar[BAR0].base_addr, sc->bar[BAR1].base_addr);
-       PMD_DRV_LOG(INFO, sc, "port=%d, path=%d, vnic=%d, func=%d",
-                       PORT_ID(sc), PATH_ID(sc), VNIC_ID(sc), FUNC_ID(sc));
-       PMD_DRV_LOG(INFO, sc, "portID=%d vendorID=0x%x deviceID=0x%x",
-                       eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id);
-
        if (IS_VF(sc)) {
                rte_spinlock_init(&sc->vf2pf_lock);
 
index 807ba17..45958db 100644 (file)
@@ -47,6 +47,7 @@
 #define FALSE               0
 #define TRUE                1
 
+typedef int bool;
 #define false               0
 #define true                1
 #define min(a,b)        RTE_MIN(a,b)
index ab730ab..6d2bb81 100644 (file)
@@ -530,17 +530,15 @@ static void __ecore_vlan_mac_h_read_unlock(struct bnx2x_softc *sc,
 #endif
        } else {
                o->head_reader--;
-               PMD_DRV_LOG(INFO, sc,
-                           "vlan_mac_lock - decreased readers to %d",
-                           o->head_reader);
+               ECORE_MSG(sc, "vlan_mac_lock - decreased readers to %d",
+                         o->head_reader);
        }
 
        /* It's possible a new pending execution was added, and that this reader
         * was last - if so we need to execute the command.
         */
        if (!o->head_reader && o->head_exe_request) {
-               PMD_DRV_LOG(INFO, sc,
-                           "vlan_mac_lock - reader release encountered a pending request");
+               ECORE_MSG(sc, "vlan_mac_lock - reader release encountered a pending request");
 
                /* Writer release will do the trick */
                __ecore_vlan_mac_h_write_unlock(sc, o);
index 21bcd50..ac084c4 100644 (file)
@@ -19,7 +19,10 @@ int
 check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev)
 {
        /* Check valid pointer */
-       if (eth_dev->device->driver->name == NULL)
+       if (eth_dev == NULL ||
+               eth_dev->device == NULL ||
+               eth_dev->device->driver == NULL ||
+               eth_dev->device->driver->name == NULL)
                return -1;
 
        /* return 0 if driver name matches */
@@ -126,6 +129,12 @@ deactivate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id)
        RTE_ASSERT(active_count < RTE_DIM(internals->active_slaves));
        internals->active_slave_count = active_count;
 
+       /* Resetting active_slave when reaches to max
+        * no of slaves in active list
+        */
+       if (internals->active_slave >= active_count)
+               internals->active_slave = 0;
+
        if (eth_dev->data->dev_started) {
                if (internals->mode == BONDING_MODE_8023AD) {
                        bond_mode_8023ad_start(eth_dev);
index 44deaf1..7ed69b3 100644 (file)
@@ -84,7 +84,7 @@ bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                        active_slave = 0;
        }
 
-       if (++internals->active_slave == slave_count)
+       if (++internals->active_slave >= slave_count)
                internals->active_slave = 0;
        return num_rx_total;
 }
@@ -288,7 +288,7 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
                        active_slave = 0;
        }
 
-       if (++internals->active_slave == slave_count)
+       if (++internals->active_slave >= slave_count)
                internals->active_slave = 0;
 
        return num_rx_total;
@@ -474,7 +474,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
                        idx = 0;
        }
 
-       if (++internals->active_slave == slave_count)
+       if (++internals->active_slave >= slave_count)
                internals->active_slave = 0;
 
        return num_rx_total;
index 4deaff8..7b87bdf 100644 (file)
@@ -732,6 +732,10 @@ cxgbe_rtef_parse_items(struct rte_flow *flow,
                                                "parse items cannot be repeated (except void)");
                        repeat[i->type] = 1;
 
+                       /* No spec found for this pattern item. Skip it */
+                       if (!i->spec)
+                               break;
+
                        /* validate the item */
                        ret = cxgbe_validate_item(i, e);
                        if (ret)
index ec080e5..5fa6cdd 100644 (file)
@@ -122,6 +122,7 @@ int setup_sge_ctrl_txq(struct adapter *adapter)
        int err = 0, i = 0;
 
        for_each_port(adapter, i) {
+               struct port_info *pi = adap2pinfo(adapter, i);
                char name[RTE_ETH_NAME_MAX_LEN];
                struct sge_ctrl_txq *q = &s->ctrlq[i];
 
@@ -135,16 +136,19 @@ int setup_sge_ctrl_txq(struct adapter *adapter)
                                err);
                        goto out;
                }
-               snprintf(name, sizeof(name), "cxgbe_ctrl_pool_%d", i);
+               snprintf(name, sizeof(name), "%s_ctrl_pool_%d",
+                        pi->eth_dev->device->driver->name,
+                        pi->eth_dev->data->port_id);
                q->mb_pool = rte_pktmbuf_pool_create(name, s->ctrlq[i].q.size,
                                                     RTE_CACHE_LINE_SIZE,
                                                     RTE_MBUF_PRIV_ALIGN,
                                                     RTE_MBUF_DEFAULT_BUF_SIZE,
                                                     SOCKET_ID_ANY);
                if (!q->mb_pool) {
-                       dev_err(adapter, "Can't create ctrl pool for port: %d",
-                               i);
-                       err = -ENOMEM;
+                       err = -rte_errno;
+                       dev_err(adapter,
+                               "Can't create ctrl pool for port %d. Err: %d\n",
+                               pi->eth_dev->data->port_id, err);
                        goto out;
                }
        }
@@ -411,7 +415,7 @@ static int tid_init(struct tid_info *t)
                return -ENOMEM;
 
        t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
-       t->ftid_tab = (struct filter_entry *)&t->tid_tab[t->natids];
+       t->ftid_tab = (struct filter_entry *)&t->atid_tab[t->natids];
        t->ftid_bmap_array = t4_os_alloc(ftid_bmap_size);
        if (!t->ftid_bmap_array) {
                tid_free(t);
index d0572b3..5448a2c 100644 (file)
@@ -1223,8 +1223,12 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
        PMD_INIT_FUNC_TRACE();
 
        /* For secondary processes, the primary has done all the work */
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               eth_dev->dev_ops = &dpaa_devops;
+               /* Plugging of UCODE burst API not supported in Secondary */
+               eth_dev->rx_pkt_burst = dpaa_eth_queue_rx;
                return 0;
+       }
 
        dpaa_device = DEV_TO_DPAA_DEVICE(eth_dev->device);
        dev_id = dpaa_device->id.dev_id;
index fa71807..39f85ae 100644 (file)
@@ -311,8 +311,7 @@ dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev)
                /* cleanup tx queue cscn */
                for (i = 0; i < priv->nb_tx_queues; i++) {
                        dpaa2_q = (struct dpaa2_queue *)priv->tx_vq[i];
-                       if (!dpaa2_q->cscn)
-                               rte_free(dpaa2_q->cscn);
+                       rte_free(dpaa2_q->cscn);
                }
                /*free memory for all queues (RX+TX) */
                rte_free(priv->rx_vq[0]);
@@ -1919,8 +1918,15 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev)
        PMD_INIT_FUNC_TRACE();
 
        /* For secondary processes, the primary has done all the work */
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               /* In case of secondary, only burst and ops API need to be
+                * plugged.
+                */
+               eth_dev->dev_ops = &dpaa2_ethdev_ops;
+               eth_dev->rx_pkt_burst = dpaa2_dev_prefetch_rx;
+               eth_dev->tx_pkt_burst = dpaa2_dev_tx;
                return 0;
+       }
 
        dpaa2_dev = container_of(dev, struct rte_dpaa2_device, device);
 
index a9cd765..005e1ea 100644 (file)
@@ -50,6 +50,8 @@
 #define E1000_RXDCTL_GRAN      0x01000000 /* RXDCTL Granularity */
 
 #define E1000_TX_OFFLOAD_MASK ( \
+               PKT_TX_IPV6 |           \
+               PKT_TX_IPV4 |           \
                PKT_TX_IP_CKSUM |       \
                PKT_TX_L4_MASK |        \
                PKT_TX_VLAN_PKT)
index d9d29d2..87c9aed 100644 (file)
@@ -68,6 +68,9 @@
 #define E1000_VET_VET_EXT            0xFFFF0000
 #define E1000_VET_VET_EXT_SHIFT      16
 
+/* MSI-X other interrupt vector */
+#define IGB_MSIX_OTHER_INTR_VEC      0
+
 static int  eth_igb_configure(struct rte_eth_dev *dev);
 static int  eth_igb_start(struct rte_eth_dev *dev);
 static void eth_igb_stop(struct rte_eth_dev *dev);
@@ -138,7 +141,7 @@ static void igb_vlan_hw_extend_disable(struct rte_eth_dev *dev);
 static int eth_igb_led_on(struct rte_eth_dev *dev);
 static int eth_igb_led_off(struct rte_eth_dev *dev);
 
-static void igb_intr_disable(struct e1000_hw *hw);
+static void igb_intr_disable(struct rte_eth_dev *dev);
 static int  igb_get_rx_buffer_size(struct e1000_hw *hw);
 static int eth_igb_rar_set(struct rte_eth_dev *dev,
                           struct ether_addr *mac_addr,
@@ -538,14 +541,31 @@ igb_intr_enable(struct rte_eth_dev *dev)
                E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+
+       if (rte_intr_allow_others(intr_handle) &&
+               dev->data->dev_conf.intr_conf.lsc != 0) {
+               E1000_WRITE_REG(hw, E1000_EIMS, 1 << IGB_MSIX_OTHER_INTR_VEC);
+       }
 
        E1000_WRITE_REG(hw, E1000_IMS, intr->mask);
        E1000_WRITE_FLUSH(hw);
 }
 
 static void
-igb_intr_disable(struct e1000_hw *hw)
+igb_intr_disable(struct rte_eth_dev *dev)
 {
+       struct e1000_hw *hw =
+               E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+
+       if (rte_intr_allow_others(intr_handle) &&
+               dev->data->dev_conf.intr_conf.lsc != 0) {
+               E1000_WRITE_REG(hw, E1000_EIMC, 1 << IGB_MSIX_OTHER_INTR_VEC);
+       }
+
        E1000_WRITE_REG(hw, E1000_IMC, ~0);
        E1000_WRITE_FLUSH(hw);
 }
@@ -1486,7 +1506,7 @@ eth_igb_stop(struct rte_eth_dev *dev)
 
        eth_igb_rxtx_control(dev, false);
 
-       igb_intr_disable(hw);
+       igb_intr_disable(dev);
 
        /* disable intr eventfd mapping */
        rte_intr_disable(intr_handle);
@@ -2768,12 +2788,15 @@ static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev)
        uint32_t mask, regval;
        struct e1000_hw *hw =
                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+       int misc_shift = rte_intr_allow_others(intr_handle) ? 1 : 0;
        struct rte_eth_dev_info dev_info;
 
        memset(&dev_info, 0, sizeof(dev_info));
        eth_igb_infos_get(dev, &dev_info);
 
-       mask = 0xFFFFFFFF >> (32 - dev_info.max_rx_queues);
+       mask = (0xFFFFFFFF >> (32 - dev_info.max_rx_queues)) << misc_shift;
        regval = E1000_READ_REG(hw, E1000_EIMS);
        E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
 
@@ -2800,7 +2823,7 @@ eth_igb_interrupt_get_status(struct rte_eth_dev *dev)
        struct e1000_interrupt *intr =
                E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
 
-       igb_intr_disable(hw);
+       igb_intr_disable(dev);
 
        /* read-on-clear nic registers here */
        icr = E1000_READ_REG(hw, E1000_ICR);
@@ -5583,13 +5606,17 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
                                        E1000_GPIE_NSICR);
                intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) <<
                        misc_shift;
+
+               if (dev->data->dev_conf.intr_conf.lsc != 0)
+                       intr_mask |= (1 << IGB_MSIX_OTHER_INTR_VEC);
+
                regval = E1000_READ_REG(hw, E1000_EIAC);
                E1000_WRITE_REG(hw, E1000_EIAC, regval | intr_mask);
 
                /* enable msix_other interrupt */
                regval = E1000_READ_REG(hw, E1000_EIMS);
                E1000_WRITE_REG(hw, E1000_EIMS, regval | intr_mask);
-               tmpval = (dev->data->nb_rx_queues | E1000_IVAR_VALID) << 8;
+               tmpval = (IGB_MSIX_OTHER_INTR_VEC | E1000_IVAR_VALID) << 8;
                E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmpval);
        }
 
@@ -5598,6 +5625,10 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev)
         */
        intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) <<
                misc_shift;
+
+       if (dev->data->dev_conf.intr_conf.lsc != 0)
+               intr_mask |= (1 << IGB_MSIX_OTHER_INTR_VEC);
+
        regval = E1000_READ_REG(hw, E1000_EIAM);
        E1000_WRITE_REG(hw, E1000_EIAM, regval | intr_mask);
 
index a07bd2b..6f42433 100644 (file)
@@ -189,6 +189,8 @@ static const struct ena_stats ena_stats_ena_com_strings[] = {
 
 #define        ENA_TX_OFFLOAD_MASK     (\
        PKT_TX_L4_MASK |         \
+       PKT_TX_IPV6 |            \
+       PKT_TX_IPV4 |            \
        PKT_TX_IP_CKSUM |        \
        PKT_TX_TCP_SEG)
 
@@ -240,10 +242,12 @@ static void ena_tx_queue_release_bufs(struct ena_ring *ring);
 static int ena_link_update(struct rte_eth_dev *dev,
                           int wait_to_complete);
 static int ena_create_io_queue(struct ena_ring *ring);
-static void ena_free_io_queues_all(struct ena_adapter *adapter);
-static int ena_queue_restart(struct ena_ring *ring);
-static int ena_queue_restart_all(struct rte_eth_dev *dev,
-                                enum ena_ring_type ring_type);
+static void ena_queue_stop(struct ena_ring *ring);
+static void ena_queue_stop_all(struct rte_eth_dev *dev,
+                             enum ena_ring_type ring_type);
+static int ena_queue_start(struct ena_ring *ring);
+static int ena_queue_start_all(struct rte_eth_dev *dev,
+                              enum ena_ring_type ring_type);
 static void ena_stats_restart(struct rte_eth_dev *dev);
 static void ena_infos_get(struct rte_eth_dev *dev,
                          struct rte_eth_dev_info *dev_info);
@@ -256,6 +260,8 @@ static int ena_rss_reta_query(struct rte_eth_dev *dev,
 static int ena_get_sset_count(struct rte_eth_dev *dev, int sset);
 static void ena_interrupt_handler_rte(void *cb_arg);
 static void ena_timer_wd_callback(struct rte_timer *timer, void *arg);
+static void ena_destroy_device(struct rte_eth_dev *eth_dev);
+static int eth_ena_dev_init(struct rte_eth_dev *eth_dev);
 
 static const struct eth_dev_ops ena_dev_ops = {
        .dev_configure        = ena_dev_configure,
@@ -539,64 +545,14 @@ static void ena_close(struct rte_eth_dev *dev)
 static int
 ena_dev_reset(struct rte_eth_dev *dev)
 {
-       struct rte_mempool *mb_pool_rx[ENA_MAX_NUM_QUEUES];
-       struct rte_eth_dev *eth_dev;
-       struct rte_pci_device *pci_dev;
-       struct rte_intr_handle *intr_handle;
-       struct ena_com_dev *ena_dev;
-       struct ena_com_dev_get_features_ctx get_feat_ctx;
-       struct ena_adapter *adapter;
-       int nb_queues;
-       int rc, i;
-       bool wd_state;
-
-       adapter = (struct ena_adapter *)(dev->data->dev_private);
-       ena_dev = &adapter->ena_dev;
-       eth_dev = adapter->rte_dev;
-       pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
-       intr_handle = &pci_dev->intr_handle;
-       nb_queues = eth_dev->data->nb_rx_queues;
-
-       ena_com_set_admin_running_state(ena_dev, false);
+       int rc = 0;
 
-       rc = ena_com_dev_reset(ena_dev, adapter->reset_reason);
+       ena_destroy_device(dev);
+       rc = eth_ena_dev_init(dev);
        if (rc)
-               RTE_LOG(ERR, PMD, "Device reset failed\n");
-
-       for (i = 0; i < nb_queues; i++)
-               mb_pool_rx[i] = adapter->rx_ring[i].mb_pool;
-
-       ena_rx_queue_release_all(eth_dev);
-       ena_tx_queue_release_all(eth_dev);
-
-       rte_intr_disable(intr_handle);
-
-       ena_com_abort_admin_commands(ena_dev);
-       ena_com_wait_for_abort_completion(ena_dev);
-       ena_com_admin_destroy(ena_dev);
-       ena_com_mmio_reg_read_request_destroy(ena_dev);
-
-       rc = ena_device_init(ena_dev, &get_feat_ctx, &wd_state);
-       if (rc) {
                PMD_INIT_LOG(CRIT, "Cannot initialize device\n");
-               return rc;
-       }
-       adapter->wd_state = wd_state;
-
-       rte_intr_enable(intr_handle);
-       ena_com_set_admin_polling_mode(ena_dev, false);
-       ena_com_admin_aenq_enable(ena_dev);
-
-       for (i = 0; i < nb_queues; ++i)
-               ena_rx_queue_setup(eth_dev, i, adapter->rx_ring_size, 0, NULL,
-                       mb_pool_rx[i]);
-
-       for (i = 0; i < nb_queues; ++i)
-               ena_tx_queue_setup(eth_dev, i, adapter->tx_ring_size, 0, NULL);
 
-       adapter->trigger_reset = false;
-
-       return 0;
+       return rc;
 }
 
 static int ena_rss_reta_update(struct rte_eth_dev *dev,
@@ -766,11 +722,6 @@ static void ena_rx_queue_release(void *queue)
 {
        struct ena_ring *ring = (struct ena_ring *)queue;
 
-       ena_assert_msg(ring->configured,
-                      "API violation - releasing not configured queue");
-       ena_assert_msg(ring->adapter->state != ENA_ADAPTER_STATE_RUNNING,
-                      "API violation");
-
        /* Free ring resources */
        if (ring->rx_buffer_info)
                rte_free(ring->rx_buffer_info);
@@ -794,14 +745,6 @@ static void ena_tx_queue_release(void *queue)
 {
        struct ena_ring *ring = (struct ena_ring *)queue;
 
-       ena_assert_msg(ring->configured,
-                      "API violation. Releasing not configured queue");
-       ena_assert_msg(ring->adapter->state != ENA_ADAPTER_STATE_RUNNING,
-                      "API violation");
-
-       /* Free all bufs */
-       ena_tx_queue_release_bufs(ring);
-
        /* Free ring resources */
        if (ring->tx_buffer_info)
                rte_free(ring->tx_buffer_info);
@@ -820,17 +763,13 @@ static void ena_tx_queue_release(void *queue)
 
 static void ena_rx_queue_release_bufs(struct ena_ring *ring)
 {
-       unsigned int ring_mask = ring->ring_size - 1;
-
-       while (ring->next_to_clean != ring->next_to_use) {
-               struct rte_mbuf *m =
-                       ring->rx_buffer_info[ring->next_to_clean & ring_mask];
-
-               if (m)
-                       rte_mbuf_raw_free(m);
+       unsigned int i;
 
-               ring->next_to_clean++;
-       }
+       for (i = 0; i < ring->ring_size; ++i)
+               if (ring->rx_buffer_info[i]) {
+                       rte_mbuf_raw_free(ring->rx_buffer_info[i]);
+                       ring->rx_buffer_info[i] = NULL;
+               }
 }
 
 static void ena_tx_queue_release_bufs(struct ena_ring *ring)
@@ -842,8 +781,6 @@ static void ena_tx_queue_release_bufs(struct ena_ring *ring)
 
                if (tx_buf->mbuf)
                        rte_pktmbuf_free(tx_buf->mbuf);
-
-               ring->next_to_clean++;
        }
 }
 
@@ -862,8 +799,8 @@ static int ena_link_update(struct rte_eth_dev *dev,
        return 0;
 }
 
-static int ena_queue_restart_all(struct rte_eth_dev *dev,
-                                enum ena_ring_type ring_type)
+static int ena_queue_start_all(struct rte_eth_dev *dev,
+                              enum ena_ring_type ring_type)
 {
        struct ena_adapter *adapter =
                (struct ena_adapter *)(dev->data->dev_private);
@@ -891,18 +828,25 @@ static int ena_queue_restart_all(struct rte_eth_dev *dev,
                                        "Inconsistent state of tx queues\n");
                        }
 
-                       rc = ena_queue_restart(&queues[i]);
+                       rc = ena_queue_start(&queues[i]);
 
                        if (rc) {
                                PMD_INIT_LOG(ERR,
-                                            "failed to restart queue %d type(%d)",
+                                            "failed to start queue %d type(%d)",
                                             i, ring_type);
-                               return rc;
+                               goto err;
                        }
                }
        }
 
        return 0;
+
+err:
+       while (i--)
+               if (queues[i].configured)
+                       ena_queue_stop(&queues[i]);
+
+       return rc;
 }
 
 static uint32_t ena_get_mtu_conf(struct ena_adapter *adapter)
@@ -1051,19 +995,19 @@ static int ena_start(struct rte_eth_dev *dev)
        if (rc)
                return rc;
 
-       rc = ena_queue_restart_all(dev, ENA_RING_TYPE_RX);
+       rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX);
        if (rc)
                return rc;
 
-       rc = ena_queue_restart_all(dev, ENA_RING_TYPE_TX);
+       rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX);
        if (rc)
-               return rc;
+               goto err_start_tx;
 
        if (adapter->rte_dev->data->dev_conf.rxmode.mq_mode &
            ETH_MQ_RX_RSS_FLAG && adapter->rte_dev->data->nb_rx_queues > 0) {
                rc = ena_rss_init_default(adapter);
                if (rc)
-                       return rc;
+                       goto err_rss_init;
        }
 
        ena_stats_restart(dev);
@@ -1078,15 +1022,30 @@ static int ena_start(struct rte_eth_dev *dev)
        adapter->state = ENA_ADAPTER_STATE_RUNNING;
 
        return 0;
+
+err_rss_init:
+       ena_queue_stop_all(dev, ENA_RING_TYPE_TX);
+err_start_tx:
+       ena_queue_stop_all(dev, ENA_RING_TYPE_RX);
+       return rc;
 }
 
 static void ena_stop(struct rte_eth_dev *dev)
 {
        struct ena_adapter *adapter =
                (struct ena_adapter *)(dev->data->dev_private);
+       struct ena_com_dev *ena_dev = &adapter->ena_dev;
+       int rc;
 
        rte_timer_stop_sync(&adapter->timer_wd);
-       ena_free_io_queues_all(adapter);
+       ena_queue_stop_all(dev, ENA_RING_TYPE_TX);
+       ena_queue_stop_all(dev, ENA_RING_TYPE_RX);
+
+       if (adapter->trigger_reset) {
+               rc = ena_com_dev_reset(ena_dev, adapter->reset_reason);
+               if (rc)
+                       RTE_LOG(ERR, PMD, "Device reset failed rc=%d\n", rc);
+       }
 
        adapter->state = ENA_ADAPTER_STATE_STOPPED;
 }
@@ -1149,36 +1108,46 @@ static int ena_create_io_queue(struct ena_ring *ring)
        return 0;
 }
 
-static void ena_free_io_queues_all(struct ena_adapter *adapter)
+static void ena_queue_stop(struct ena_ring *ring)
 {
-       struct rte_eth_dev *eth_dev = adapter->rte_dev;
-       struct ena_com_dev *ena_dev = &adapter->ena_dev;
-       int i;
-       uint16_t ena_qid;
-       uint16_t nb_rxq = eth_dev->data->nb_rx_queues;
-       uint16_t nb_txq = eth_dev->data->nb_tx_queues;
+       struct ena_com_dev *ena_dev = &ring->adapter->ena_dev;
 
-       for (i = 0; i < nb_txq; ++i) {
-               ena_qid = ENA_IO_TXQ_IDX(i);
-               ena_com_destroy_io_queue(ena_dev, ena_qid);
-
-               ena_tx_queue_release_bufs(&adapter->tx_ring[i]);
+       if (ring->type == ENA_RING_TYPE_RX) {
+               ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id));
+               ena_rx_queue_release_bufs(ring);
+       } else {
+               ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id));
+               ena_tx_queue_release_bufs(ring);
        }
+}
 
-       for (i = 0; i < nb_rxq; ++i) {
-               ena_qid = ENA_IO_RXQ_IDX(i);
-               ena_com_destroy_io_queue(ena_dev, ena_qid);
+static void ena_queue_stop_all(struct rte_eth_dev *dev,
+                             enum ena_ring_type ring_type)
+{
+       struct ena_adapter *adapter =
+               (struct ena_adapter *)(dev->data->dev_private);
+       struct ena_ring *queues = NULL;
+       uint16_t nb_queues, i;
 
-               ena_rx_queue_release_bufs(&adapter->rx_ring[i]);
+       if (ring_type == ENA_RING_TYPE_RX) {
+               queues = adapter->rx_ring;
+               nb_queues = dev->data->nb_rx_queues;
+       } else {
+               queues = adapter->tx_ring;
+               nb_queues = dev->data->nb_tx_queues;
        }
+
+       for (i = 0; i < nb_queues; ++i)
+               if (queues[i].configured)
+                       ena_queue_stop(&queues[i]);
 }
 
-static int ena_queue_restart(struct ena_ring *ring)
+static int ena_queue_start(struct ena_ring *ring)
 {
        int rc, bufs_num;
 
        ena_assert_msg(ring->configured == 1,
-                      "Trying to restart unconfigured queue\n");
+                      "Trying to start unconfigured queue\n");
 
        rc = ena_create_io_queue(ring);
        if (rc) {
@@ -1195,6 +1164,8 @@ static int ena_queue_restart(struct ena_ring *ring)
        bufs_num = ring->ring_size - 1;
        rc = ena_populate_rx_queue(ring, bufs_num);
        if (rc != bufs_num) {
+               ena_com_destroy_io_queue(&ring->adapter->ena_dev,
+                                        ENA_IO_RXQ_IDX(ring->id));
                PMD_INIT_LOG(ERR, "Failed to populate rx ring !");
                return ENA_COM_FAULT;
        }
@@ -1346,7 +1317,7 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
        }
 
        for (i = 0; i < nb_desc; i++)
-               rxq->empty_tx_reqs[i] = i;
+               rxq->empty_rx_reqs[i] = i;
 
        /* Store pointer to this queue in upper layer */
        rxq->configured = 1;
@@ -1603,19 +1574,20 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev)
        static int adapters_found;
        bool wd_state;
 
-       memset(adapter, 0, sizeof(struct ena_adapter));
-       ena_dev = &adapter->ena_dev;
-
        eth_dev->dev_ops = &ena_dev_ops;
        eth_dev->rx_pkt_burst = &eth_ena_recv_pkts;
        eth_dev->tx_pkt_burst = &eth_ena_xmit_pkts;
        eth_dev->tx_pkt_prepare = &eth_ena_prep_pkts;
-       adapter->rte_eth_dev_data = eth_dev->data;
-       adapter->rte_dev = eth_dev;
 
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return 0;
 
+       memset(adapter, 0, sizeof(struct ena_adapter));
+       ena_dev = &adapter->ena_dev;
+
+       adapter->rte_eth_dev_data = eth_dev->data;
+       adapter->rte_dev = eth_dev;
+
        pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
        adapter->pdev = pci_dev;
 
@@ -1726,24 +1698,43 @@ err:
        return rc;
 }
 
-static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev)
+static void ena_destroy_device(struct rte_eth_dev *eth_dev)
 {
        struct ena_adapter *adapter =
                (struct ena_adapter *)(eth_dev->data->dev_private);
+       struct ena_com_dev *ena_dev = &adapter->ena_dev;
 
-       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               return 0;
+       if (adapter->state == ENA_ADAPTER_STATE_FREE)
+               return;
+
+       ena_com_set_admin_running_state(ena_dev, false);
 
        if (adapter->state != ENA_ADAPTER_STATE_CLOSED)
                ena_close(eth_dev);
 
+       ena_com_delete_debug_area(ena_dev);
+       ena_com_delete_host_info(ena_dev);
+
+       ena_com_abort_admin_commands(ena_dev);
+       ena_com_wait_for_abort_completion(ena_dev);
+       ena_com_admin_destroy(ena_dev);
+       ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+       adapter->state = ENA_ADAPTER_STATE_FREE;
+}
+
+static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev)
+{
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
+       ena_destroy_device(eth_dev);
+
        eth_dev->dev_ops = NULL;
        eth_dev->rx_pkt_burst = NULL;
        eth_dev->tx_pkt_burst = NULL;
        eth_dev->tx_pkt_prepare = NULL;
 
-       adapter->state = ENA_ADAPTER_STATE_FREE;
-
        return 0;
 }
 
@@ -1843,6 +1834,9 @@ static void ena_infos_get(struct rte_eth_dev *dev,
        dev_info->tx_offload_capa = tx_feat;
        dev_info->tx_queue_offload_capa = tx_feat;
 
+       dev_info->flow_type_rss_offloads = ETH_RSS_IP | ETH_RSS_TCP |
+                                          ETH_RSS_UDP;
+
        dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN;
        dev_info->max_rx_pktlen  = adapter->max_mtu;
        dev_info->max_mac_addrs = 1;
@@ -1907,6 +1901,8 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                                    &ena_rx_ctx);
                if (unlikely(rc)) {
                        RTE_LOG(ERR, PMD, "ena_com_rx_pkt error %d\n", rc);
+                       rx_ring->adapter->reset_reason =
+                               ENA_REGS_RESET_TOO_MANY_RX_DESCS;
                        rx_ring->adapter->trigger_reset = true;
                        return 0;
                }
@@ -1917,10 +1913,14 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                while (segments < ena_rx_ctx.descs) {
                        req_id = ena_rx_ctx.ena_bufs[segments].req_id;
                        rc = validate_rx_req_id(rx_ring, req_id);
-                       if (unlikely(rc))
+                       if (unlikely(rc)) {
+                               if (segments != 0)
+                                       rte_mbuf_raw_free(mbuf_head);
                                break;
+                       }
 
                        mbuf = rx_buff_info[req_id];
+                       rx_buff_info[req_id] = NULL;
                        mbuf->data_len = ena_rx_ctx.ena_bufs[segments].len;
                        mbuf->data_off = RTE_PKTMBUF_HEADROOM;
                        mbuf->refcnt = 1;
@@ -1942,6 +1942,8 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                        segments++;
                        next_to_clean++;
                }
+               if (unlikely(rc))
+                       break;
 
                /* fill mbuf attributes if any */
                ena_rx_mbuf_prepare(mbuf_head, &ena_rx_ctx);
@@ -1956,8 +1958,10 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
        desc_in_use = desc_in_use - completed + 1;
        /* Burst refill to save doorbells, memory barriers, const interval */
-       if (ring_size - desc_in_use > ENA_RING_DESCS_RATIO(ring_size))
+       if (ring_size - desc_in_use > ENA_RING_DESCS_RATIO(ring_size)) {
+               ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
                ena_populate_rx_queue(rx_ring, ring_size - desc_in_use);
+       }
 
        return recv_idx;
 }
@@ -2004,14 +2008,14 @@ eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
                                (ol_flags & PKT_TX_L4_MASK) ==
                                PKT_TX_SCTP_CKSUM) {
-                       rte_errno = -ENOTSUP;
+                       rte_errno = ENOTSUP;
                        return i;
                }
 
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
                ret = rte_validate_tx_offload(m);
                if (ret != 0) {
-                       rte_errno = ret;
+                       rte_errno = -ret;
                        return i;
                }
 #endif
@@ -2024,7 +2028,7 @@ eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                ret = rte_net_intel_cksum_flags_prepare(m,
                        ol_flags & ~PKT_TX_TCP_SEG);
                if (ret != 0) {
-                       rte_errno = ret;
+                       rte_errno = -ret;
                        return i;
                }
        }
@@ -2207,8 +2211,9 @@ static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
        if (total_tx_descs > 0) {
                /* acknowledge completion of sent packets */
-               ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs);
                tx_ring->next_to_clean = next_to_clean;
+               ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs);
+               ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
        }
 
        return sent_idx;
index 9d95201..9e9e548 100644 (file)
@@ -3,8 +3,6 @@
  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
  */
 
-#include <libgen.h>
-
 #include <rte_ethdev_driver.h>
 #include <rte_malloc.h>
 #include <rte_hash.h>
index c3869de..406f92a 100644 (file)
@@ -8,7 +8,6 @@
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <fcntl.h>
-#include <libgen.h>
 
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
index 85fb6c5..caf4d1b 100644 (file)
@@ -3003,6 +3003,7 @@ fm10k_params_init(struct rte_eth_dev *dev)
        hw->bus.payload = fm10k_bus_payload_256;
 
        info->rx_vec_allowed = true;
+       info->sm_down = false;
 }
 
 static int
index 4a5b46e..1d0f09d 100644 (file)
@@ -39,6 +39,8 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd)
 
 #define FM10K_TX_OFFLOAD_MASK (  \
                PKT_TX_VLAN_PKT |        \
+               PKT_TX_IPV6 |            \
+               PKT_TX_IPV4 |            \
                PKT_TX_IP_CKSUM |        \
                PKT_TX_L4_MASK |         \
                PKT_TX_TCP_SEG)
index 77562f2..7ba62cc 100644 (file)
@@ -670,6 +670,7 @@ struct i40e_hw {
        u8 revision_id;
        u8 port;
        bool adapter_stopped;
+       bool adapter_closed;
 
        /* capabilities for entire device and PCI func */
        struct i40e_hw_capabilities dev_caps;
index 7030eb1..dca61f0 100644 (file)
@@ -1273,7 +1273,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_vsi *vsi;
        int ret;
-       uint32_t len;
+       uint32_t len, val;
        uint8_t aq_fail = 0;
 
        PMD_INIT_FUNC_TRACE();
@@ -1316,6 +1316,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
        hw->bus.device = pci_dev->addr.devid;
        hw->bus.func = pci_dev->addr.function;
        hw->adapter_stopped = 0;
+       hw->adapter_closed = 0;
 
        /*
         * Switch Tag value should not be identical to either the First Tag
@@ -1324,6 +1325,15 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
         */
        hw->switch_tag = 0xffff;
 
+       val = I40E_READ_REG(hw, I40E_GL_FWSTS);
+       if (val & I40E_GL_FWSTS_FWS1B_MASK) {
+               PMD_INIT_LOG(ERR, "\nERROR: "
+                       "Firmware recovery mode detected. Limiting functionality.\n"
+                       "Refer to the Intel(R) Ethernet Adapters and Devices "
+                       "User Guide for details on firmware recovery mode.");
+               return -EIO;
+       }
+
        /* Check if need to support multi-driver */
        i40e_support_multi_driver(dev);
        /* Check if users want the latest supported vec path */
@@ -1483,9 +1493,6 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
                goto err_setup_pf_switch;
        }
 
-       /* reset all stats of the device, including pf and main vsi */
-       i40e_dev_stats_reset(dev);
-
        vsi = pf->main_vsi;
 
        /* Disable double vlan by default */
@@ -1580,6 +1587,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
        memset(&pf->rss_info, 0,
                sizeof(struct i40e_rte_flow_rss_conf));
 
+       /* reset all stats of the device, including pf and main vsi */
+       i40e_dev_stats_reset(dev);
+
        return 0;
 
 err_init_fdir_filter_list:
@@ -1704,7 +1714,7 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
        if (ret)
                PMD_INIT_LOG(WARNING, "failed to free switch domain: %d", ret);
 
-       if (hw->adapter_stopped == 0)
+       if (hw->adapter_closed == 0)
                i40e_dev_close(dev);
 
        dev->dev_ops = NULL;
@@ -2444,6 +2454,8 @@ i40e_dev_stop(struct rte_eth_dev *dev)
        pf->tm_conf.committed = false;
 
        hw->adapter_stopped = 1;
+
+       pf->adapter->rss_reta_updated = 0;
 }
 
 static void
@@ -2523,6 +2535,8 @@ i40e_dev_close(struct rte_eth_dev *dev)
        I40E_WRITE_REG(hw, I40E_PFGEN_CTRL,
                        (reg | I40E_PFGEN_CTRL_PFSWR_MASK));
        I40E_WRITE_FLUSH(hw);
+
+       hw->adapter_closed = 1;
 }
 
 /*
@@ -3160,20 +3174,20 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct i40e_hw_port_stats *ns = &pf->stats; /* new stats */
+       struct i40e_vsi *vsi;
        unsigned i;
 
        /* call read registers - updates values, now write them to struct */
        i40e_read_stats_registers(pf, hw);
 
-       stats->ipackets = ns->eth.rx_unicast +
-                       ns->eth.rx_multicast +
-                       ns->eth.rx_broadcast -
-                       ns->eth.rx_discards -
+       stats->ipackets = pf->main_vsi->eth_stats.rx_unicast +
+                       pf->main_vsi->eth_stats.rx_multicast +
+                       pf->main_vsi->eth_stats.rx_broadcast -
                        pf->main_vsi->eth_stats.rx_discards;
        stats->opackets = ns->eth.tx_unicast +
                        ns->eth.tx_multicast +
                        ns->eth.tx_broadcast;
-       stats->ibytes   = ns->eth.rx_bytes;
+       stats->ibytes   = pf->main_vsi->eth_stats.rx_bytes;
        stats->obytes   = ns->eth.tx_bytes;
        stats->oerrors  = ns->eth.tx_errors +
                        pf->main_vsi->eth_stats.tx_errors;
@@ -3185,6 +3199,21 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
                        ns->rx_length_errors + ns->rx_undersize +
                        ns->rx_oversize + ns->rx_fragments + ns->rx_jabber;
 
+       if (pf->vfs) {
+               for (i = 0; i < pf->vf_num; i++) {
+                       vsi = pf->vfs[i].vsi;
+                       i40e_update_vsi_stats(vsi);
+
+                       stats->ipackets += (vsi->eth_stats.rx_unicast +
+                                       vsi->eth_stats.rx_multicast +
+                                       vsi->eth_stats.rx_broadcast -
+                                       vsi->eth_stats.rx_discards);
+                       stats->ibytes   += vsi->eth_stats.rx_bytes;
+                       stats->oerrors  += vsi->eth_stats.tx_errors;
+                       stats->imissed  += vsi->eth_stats.rx_discards;
+               }
+       }
+
        PMD_DRV_LOG(DEBUG, "***************** PF stats start *******************");
        PMD_DRV_LOG(DEBUG, "rx_bytes:            %"PRIu64"", ns->eth.rx_bytes);
        PMD_DRV_LOG(DEBUG, "rx_unicast:          %"PRIu64"", ns->eth.rx_unicast);
@@ -3431,6 +3460,31 @@ i40e_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size)
                return 0;
 }
 
+/*
+ * When using NVM 6.01(for X710 XL710 XXV710)/3.33(for X722) or later,
+ * the Rx data path does not hang if the FW LLDP is stopped.
+ * return true if lldp need to stop
+ * return false if we cannot disable the LLDP to avoid Rx data path blocking.
+ */
+static bool
+i40e_need_stop_lldp(struct rte_eth_dev *dev)
+{
+       double nvm_ver;
+       char ver_str[64] = {0};
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       i40e_fw_version_get(dev, ver_str, 64);
+       nvm_ver = atof(ver_str);
+       if ((hw->mac.type == I40E_MAC_X722 ||
+            hw->mac.type == I40E_MAC_X722_VF) &&
+            ((uint32_t)(nvm_ver * 1000) >= (uint32_t)(3.33 * 1000)))
+               return true;
+       else if ((uint32_t)(nvm_ver * 1000) >= (uint32_t)(6.01 * 1000))
+               return true;
+
+       return false;
+}
+
 static void
 i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
@@ -4154,7 +4208,8 @@ i40e_get_rss_lut(struct i40e_vsi *vsi, uint8_t *lut, uint16_t lut_size)
                return -EINVAL;
 
        if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
-               ret = i40e_aq_get_rss_lut(hw, vsi->vsi_id, TRUE,
+               ret = i40e_aq_get_rss_lut(hw, vsi->vsi_id,
+                                         vsi->type != I40E_VSI_SRIOV,
                                          lut, lut_size);
                if (ret) {
                        PMD_DRV_LOG(ERR, "Failed to get RSS lookup table");
@@ -4193,7 +4248,8 @@ i40e_set_rss_lut(struct i40e_vsi *vsi, uint8_t *lut, uint16_t lut_size)
        hw = I40E_VSI_TO_HW(vsi);
 
        if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
-               ret = i40e_aq_set_rss_lut(hw, vsi->vsi_id, TRUE,
+               ret = i40e_aq_set_rss_lut(hw, vsi->vsi_id,
+                                         vsi->type != I40E_VSI_SRIOV,
                                          lut, lut_size);
                if (ret) {
                        PMD_DRV_LOG(ERR, "Failed to set RSS lookup table");
@@ -4255,6 +4311,8 @@ i40e_dev_rss_reta_update(struct rte_eth_dev *dev,
        }
        ret = i40e_set_rss_lut(pf->main_vsi, lut, reta_size);
 
+       pf->adapter->rss_reta_updated = 1;
+
 out:
        rte_free(lut);
 
@@ -7376,7 +7434,7 @@ i40e_get_rss_key(struct i40e_vsi *vsi, uint8_t *key, uint8_t *key_len)
        int ret;
 
        if (!key || !key_len)
-               return -EINVAL;
+               return 0;
 
        if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
                ret = i40e_aq_get_rss_key(hw, vsi->vsi_id,
@@ -7459,9 +7517,15 @@ i40e_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
        struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint64_t hena;
+       int ret;
+
+       if (!rss_conf)
+               return -EINVAL;
 
-       i40e_get_rss_key(pf->main_vsi, rss_conf->rss_key,
+       ret = i40e_get_rss_key(pf->main_vsi, rss_conf->rss_key,
                         &rss_conf->rss_key_len);
+       if (ret)
+               return ret;
 
        hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0));
        hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32;
@@ -8489,13 +8553,16 @@ i40e_pf_config_rss(struct i40e_pf *pf)
                return -ENOTSUP;
        }
 
-       for (i = 0, j = 0; i < hw->func_caps.rss_table_size; i++, j++) {
-               if (j == num)
-                       j = 0;
-               lut = (lut << 8) | (j & ((0x1 <<
-                       hw->func_caps.rss_table_entry_width) - 1));
-               if ((i & 3) == 3)
-                       I40E_WRITE_REG(hw, I40E_PFQF_HLUT(i >> 2), lut);
+       if (pf->adapter->rss_reta_updated == 0) {
+               for (i = 0, j = 0; i < hw->func_caps.rss_table_size; i++, j++) {
+                       if (j == num)
+                               j = 0;
+                       lut = (lut << 8) | (j & ((0x1 <<
+                               hw->func_caps.rss_table_entry_width) - 1));
+                       if ((i & 3) == 3)
+                               I40E_WRITE_REG(hw, I40E_PFQF_HLUT(i >> 2),
+                                              rte_bswap32(lut));
+               }
        }
 
        rss_conf = pf->dev_data->dev_conf.rx_adv_conf.rss_conf;
@@ -11385,11 +11452,7 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb)
         * LLDP MIB change event.
         */
        if (sw_dcb == TRUE) {
-               /* When using NVM 6.01 or later, the RX data path does
-                * not hang if the FW LLDP is stopped.
-                */
-               if (((hw->nvm.version >> 12) & 0xf) >= 6 &&
-                   ((hw->nvm.version >> 4) & 0xff) >= 1) {
+               if (i40e_need_stop_lldp(dev)) {
                        ret = i40e_aq_stop_lldp(hw, TRUE, NULL);
                        if (ret != I40E_SUCCESS)
                                PMD_INIT_LOG(DEBUG, "Failed to stop lldp");
index 11ecfc3..930eb9a 100644 (file)
@@ -1081,6 +1081,9 @@ struct i40e_adapter {
 
        /* For devargs */
        uint8_t use_latest_vec;
+
+       /* For RSS reta table update */
+       uint8_t rss_reta_updated;
 };
 
 /**
index ae55b9b..100e71c 100644 (file)
@@ -1080,9 +1080,11 @@ i40evf_enable_irq0(struct i40e_hw *hw)
 }
 
 static int
-i40evf_check_vf_reset_done(struct i40e_hw *hw)
+i40evf_check_vf_reset_done(struct rte_eth_dev *dev)
 {
        int i, reset;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
        for (i = 0; i < MAX_RESET_WAIT_CNT; i++) {
                reset = I40E_READ_REG(hw, I40E_VFGEN_RSTAT) &
@@ -1097,12 +1099,16 @@ i40evf_check_vf_reset_done(struct i40e_hw *hw)
        if (i >= MAX_RESET_WAIT_CNT)
                return -1;
 
+       vf->vf_reset = false;
+       vf->pend_msg &= ~PFMSG_RESET_IMPENDING;
+
        return 0;
 }
 static int
-i40evf_reset_vf(struct i40e_hw *hw)
+i40evf_reset_vf(struct rte_eth_dev *dev)
 {
        int ret;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        if (i40e_vf_reset(hw) != I40E_SUCCESS) {
                PMD_INIT_LOG(ERR, "Reset VF NIC failed");
@@ -1119,7 +1125,7 @@ i40evf_reset_vf(struct i40e_hw *hw)
          */
        rte_delay_ms(200);
 
-       ret = i40evf_check_vf_reset_done(hw);
+       ret = i40evf_check_vf_reset_done(dev);
        if (ret) {
                PMD_INIT_LOG(ERR, "VF is still resetting");
                return ret;
@@ -1145,7 +1151,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
                goto err;
        }
 
-       err = i40evf_check_vf_reset_done(hw);
+       err = i40evf_check_vf_reset_done(dev);
        if (err)
                goto err;
 
@@ -1157,7 +1163,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
        }
 
        /* Reset VF and wait until it's complete */
-       if (i40evf_reset_vf(hw)) {
+       if (i40evf_reset_vf(dev)) {
                PMD_INIT_LOG(ERR, "reset NIC failed");
                goto err_aq;
        }
@@ -1256,7 +1262,7 @@ i40evf_uninit_vf(struct rte_eth_dev *dev)
 
        PMD_INIT_FUNC_TRACE();
 
-       if (hw->adapter_stopped == 0)
+       if (hw->adapter_closed == 0)
                i40evf_dev_close(dev);
        rte_free(vf->vf_res);
        vf->vf_res = NULL;
@@ -1438,6 +1444,7 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
        hw->bus.func = pci_dev->addr.function;
        hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
        hw->adapter_stopped = 0;
+       hw->adapter_closed = 0;
 
        if(i40evf_init_vf(eth_dev) != 0) {
                PMD_INIT_LOG(ERR, "Init vf failed");
@@ -2256,10 +2263,11 @@ i40evf_dev_close(struct rte_eth_dev *dev)
        i40evf_dev_promiscuous_disable(dev);
        i40evf_dev_allmulticast_disable(dev);
 
-       i40evf_reset_vf(hw);
+       i40evf_reset_vf(dev);
        i40e_shutdown_adminq(hw);
        i40evf_disable_irq0(hw);
        rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
+       hw->adapter_closed = 1;
 }
 
 /*
index dd3962d..1e2d174 100644 (file)
@@ -1232,6 +1232,7 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
        uint16_t vf_id = abs_vf_id - hw->func_caps.vf_base_id;
        struct rte_pmd_i40e_mb_event_param ret_param;
        bool b_op = TRUE;
+       int ret;
 
        if (vf_id > pf->vf_num - 1 || !pf->vfs) {
                PMD_DRV_LOG(ERR, "invalid argument");
@@ -1246,6 +1247,30 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev,
                return;
        }
 
+       /* perform basic checks on the msg */
+       ret = virtchnl_vc_validate_vf_msg(&vf->version, opcode, msg, msglen);
+
+       /* perform additional checks specific to this driver */
+       if (opcode == VIRTCHNL_OP_CONFIG_RSS_KEY) {
+               struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg;
+
+               if (vrk->key_len != ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4))
+                       ret = VIRTCHNL_ERR_PARAM;
+       } else if (opcode == VIRTCHNL_OP_CONFIG_RSS_LUT) {
+               struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
+
+               if (vrl->lut_entries != ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4))
+                       ret = VIRTCHNL_ERR_PARAM;
+       }
+
+       if (ret) {
+               PMD_DRV_LOG(ERR, "Invalid message from VF %u, opcode %u, len %u",
+                           vf_id, opcode, msglen);
+               i40e_pf_host_send_msg_to_vf(vf, opcode,
+                                           I40E_ERR_PARAM, NULL, 0);
+               return;
+       }
+
        /**
         * initialise structure to send to user application
         * will return response from user in retval field
index e1152ff..8f727fa 100644 (file)
@@ -69,7 +69,7 @@
                I40E_TX_IEEE1588_TMST)
 
 #define I40E_TX_OFFLOAD_NOTSUP_MASK \
-               ~(PKT_TX_OFFLOAD_MASK & I40E_TX_OFFLOAD_MASK)
+               (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
 
 static inline void
 i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
@@ -2753,7 +2753,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev)
                i40e_dev_rx_queue_release(dev->data->rx_queues[i]);
                dev->data->rx_queues[i] = NULL;
        }
-       dev->data->nb_rx_queues = 0;
 
        for (i = 0; i < dev->data->nb_tx_queues; i++) {
                if (!dev->data->tx_queues[i])
@@ -2761,7 +2760,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev)
                i40e_dev_tx_queue_release(dev->data->tx_queues[i]);
                dev->data->tx_queues[i] = NULL;
        }
-       dev->data->nb_tx_queues = 0;
 }
 
 #define I40E_FDIR_NUM_TX_DESC  I40E_MIN_RING_DESC
@@ -3184,7 +3182,7 @@ i40e_set_default_pctype_table(struct rte_eth_dev *dev)
        }
 }
 
-/* Stubs needed for linkage when CONFIG_RTE_I40E_INC_VECTOR is set to 'n' */
+/* Stubs needed for linkage when CONFIG_RTE_LIBRTE_I40E_INC_VECTOR is set to 'n' */
 __rte_weak int
 i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
 {
index 7ce5d02..c49c872 100644 (file)
@@ -2818,13 +2818,23 @@ i40e_queue_region_dcb_configure(struct i40e_hw *hw,
        struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;
        int32_t ret = -EINVAL;
        uint16_t i, j, prio_index, region_index;
-       uint8_t tc_map, tc_bw, bw_lf;
+       uint8_t tc_map, tc_bw, bw_lf, dcb_flag = 0;
 
        if (!info->queue_region_number) {
                PMD_DRV_LOG(ERR, "No queue region been set before");
                return ret;
        }
 
+       for (i = 0; i < info->queue_region_number; i++) {
+               if (info->region[i].user_priority_num) {
+                       dcb_flag = 1;
+                       break;
+               }
+       }
+
+       if (dcb_flag == 0)
+               return 0;
+
        dcb_cfg = &dcb_cfg_local;
        memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));
 
index f026c70..c15c691 100644 (file)
@@ -121,7 +121,7 @@ struct ifcvf_hw {
        u8     notify_region;
        u32    notify_off_multiplier;
        struct ifcvf_pci_common_cfg *common_cfg;
-       struct ifcvf_net_device_config *dev_cfg;
+       struct ifcvf_net_config *dev_cfg;
        u8     *isr;
        u16    *notify_base;
        u16    *notify_addr[IFCVF_MAX_QUEUES * 2];
index 97a57f1..698d14f 100644 (file)
@@ -773,15 +773,15 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        internal->dev_addr.type = PCI_ADDR;
        list->internal = internal;
 
-       pthread_mutex_lock(&internal_list_lock);
-       TAILQ_INSERT_TAIL(&internal_list, list, next);
-       pthread_mutex_unlock(&internal_list_lock);
-
        internal->did = rte_vdpa_register_device(&internal->dev_addr,
                                &ifcvf_ops);
        if (internal->did < 0)
                goto error;
 
+       pthread_mutex_lock(&internal_list_lock);
+       TAILQ_INSERT_TAIL(&internal_list, list, next);
+       pthread_mutex_unlock(&internal_list_lock);
+
        rte_atomic32_set(&internal->started, 1);
        update_datapath(internal);
 
index 7de753f..96bdde6 100644 (file)
@@ -392,6 +392,8 @@ s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
        /* Check if 1G SFP module. */
        if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
            hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+           hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core0 ||
+           hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core1 ||
            hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
            hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
            hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
index 6cdd8fb..dd118f9 100644 (file)
@@ -1402,6 +1402,13 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
                                else
                                        hw->phy.sfp_type =
                                                ixgbe_sfp_type_1g_lx_core1;
+                       } else if (comp_codes_1g & IXGBE_SFF_1GBASELHA_CAPABLE) {
+                               if (hw->bus.lan_id == 0)
+                                       hw->phy.sfp_type =
+                                               ixgbe_sfp_type_1g_lha_core0;
+                               else
+                                       hw->phy.sfp_type =
+                                               ixgbe_sfp_type_1g_lha_core1;
                        } else {
                                hw->phy.sfp_type = ixgbe_sfp_type_unknown;
                        }
@@ -1489,6 +1496,8 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
                if (comp_codes_10g == 0 &&
                    !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
                      hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+                     hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core0 ||
+                     hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core1 ||
                      hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
                      hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
                      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
@@ -1508,6 +1517,8 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
                if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) &&
                    !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
                      hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+                     hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core0 ||
+                     hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core1 ||
                      hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
                      hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
                      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
@@ -1835,11 +1846,13 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
         */
        if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 ||
            sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+           sfp_type == ixgbe_sfp_type_1g_lha_core0 ||
            sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
            sfp_type == ixgbe_sfp_type_1g_sx_core0)
                sfp_type = ixgbe_sfp_type_srlr_core0;
        else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 ||
                 sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
+                sfp_type == ixgbe_sfp_type_1g_lha_core1 ||
                 sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
                 sfp_type == ixgbe_sfp_type_1g_sx_core1)
                sfp_type = ixgbe_sfp_type_srlr_core1;
index 132fa54..f1605f2 100644 (file)
@@ -41,6 +41,7 @@
 #define IXGBE_SFF_1GBASESX_CAPABLE     0x1
 #define IXGBE_SFF_1GBASELX_CAPABLE     0x2
 #define IXGBE_SFF_1GBASET_CAPABLE      0x8
+#define IXGBE_SFF_1GBASELHA_CAPABLE    0x10
 #define IXGBE_SFF_10GBASESR_CAPABLE    0x10
 #define IXGBE_SFF_10GBASELR_CAPABLE    0x20
 #define IXGBE_SFF_SOFT_RS_SELECT_MASK  0x8
index cee6ba2..077b8f0 100644 (file)
@@ -3724,6 +3724,8 @@ enum ixgbe_sfp_type {
        ixgbe_sfp_type_1g_sx_core1 = 12,
        ixgbe_sfp_type_1g_lx_core0 = 13,
        ixgbe_sfp_type_1g_lx_core1 = 14,
+       ixgbe_sfp_type_1g_lha_core0 = 15,
+       ixgbe_sfp_type_1g_lha_core1 = 16,
        ixgbe_sfp_type_not_present = 0xFFFE,
        ixgbe_sfp_type_unknown = 0xFFFF
 };
index f7b98af..a920a14 100644 (file)
@@ -1534,6 +1534,8 @@ STATIC s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear)
        case ixgbe_sfp_type_1g_sx_core1:
        case ixgbe_sfp_type_1g_lx_core0:
        case ixgbe_sfp_type_1g_lx_core1:
+       case ixgbe_sfp_type_1g_lha_core0:
+       case ixgbe_sfp_type_1g_lha_core1:
                *linear = false;
                break;
        case ixgbe_sfp_type_unknown:
@@ -1874,6 +1876,8 @@ s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
                /* Check if 1G SFP module. */
                if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
                    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1
+                   || hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core0 ||
+                   hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core1
                    || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
                    hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) {
                        *speed = IXGBE_LINK_SPEED_1GB_FULL;
index 91ba620..e9533e5 100644 (file)
@@ -1336,6 +1336,9 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
                rte_delay_ms(100);
        } while (retries++ < (10 + IXGBE_LINK_UP_TIME));
 
+       /* cancel the delay handler before remove dev */
+       rte_eal_alarm_cancel(ixgbe_dev_interrupt_delayed_handler, eth_dev);
+
        /* uninitialize PF if max_vfs not zero */
        ixgbe_pf_host_uninit(eth_dev);
 
@@ -2790,6 +2793,8 @@ static void
 ixgbe_dev_stop(struct rte_eth_dev *dev)
 {
        struct rte_eth_link link;
+       struct ixgbe_adapter *adapter =
+               (struct ixgbe_adapter *)dev->data->dev_private;
        struct ixgbe_hw *hw =
                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct ixgbe_vf_info *vfinfo =
@@ -2850,6 +2855,8 @@ ixgbe_dev_stop(struct rte_eth_dev *dev)
 
        /* reset hierarchy commit */
        tm_conf->committed = false;
+
+       adapter->rss_reta_updated = 0;
 }
 
 /*
@@ -4779,6 +4786,8 @@ ixgbe_dev_rss_reta_update(struct rte_eth_dev *dev,
        uint8_t j, mask;
        uint32_t reta, r;
        uint16_t idx, shift;
+       struct ixgbe_adapter *adapter =
+               (struct ixgbe_adapter *)dev->data->dev_private;
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint32_t reta_reg;
 
@@ -4820,6 +4829,7 @@ ixgbe_dev_rss_reta_update(struct rte_eth_dev *dev,
                }
                IXGBE_WRITE_REG(hw, reta_reg, reta);
        }
+       adapter->rss_reta_updated = 1;
 
        return 0;
 }
@@ -5143,6 +5153,8 @@ static void
 ixgbevf_dev_stop(struct rte_eth_dev *dev)
 {
        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct ixgbe_adapter *adapter =
+               (struct ixgbe_adapter *)dev->data->dev_private;
        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
        struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
@@ -5172,6 +5184,8 @@ ixgbevf_dev_stop(struct rte_eth_dev *dev)
                rte_free(intr_handle->intr_vec);
                intr_handle->intr_vec = NULL;
        }
+
+       adapter->rss_reta_updated = 0;
 }
 
 static void
index d0b9396..565c69c 100644 (file)
@@ -490,6 +490,9 @@ struct ixgbe_adapter {
        struct rte_timecounter      rx_tstamp_tc;
        struct rte_timecounter      tx_tstamp_tc;
        struct ixgbe_tm_conf        tm_conf;
+
+       /* For RSS reta table update */
+       uint8_t rss_reta_updated;
 };
 
 struct ixgbe_vf_representor {
index 4b833ff..be0c076 100644 (file)
@@ -351,7 +351,7 @@ ixgbe_vf_reset_event(struct rte_eth_dev *dev, uint16_t vf)
        int rar_entry = hw->mac.num_rar_entries - (vf + 1);
        uint32_t vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
 
-       vmolr |= (IXGBE_VMOLR_ROPE | IXGBE_VMOLR_ROMPE |
+       vmolr |= (IXGBE_VMOLR_ROPE |
                        IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE);
        IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
 
@@ -503,6 +503,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
        const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT) - 1;
        uint32_t reg_val;
        int i;
+       u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
 
        /* Disable multicast promiscuous first */
        ixgbe_disable_vf_mc_promisc(dev, vf);
@@ -516,6 +517,12 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
                vfinfo->vf_mc_hashes[i] = hash_list[i];
        }
 
+       if (nb_entries == 0) {
+               vmolr &= ~IXGBE_VMOLR_ROMPE;
+               IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
+               return 0;
+       }
+
        for (i = 0; i < vfinfo->num_vf_mc_hashes; i++) {
                mta_idx = (vfinfo->vf_mc_hashes[i] >> IXGBE_MTA_BIT_SHIFT)
                                & IXGBE_MTA_INDEX_MASK;
@@ -525,6 +532,9 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf)
                IXGBE_WRITE_REG(hw, IXGBE_MTA(mta_idx), reg_val);
        }
 
+       vmolr |= IXGBE_VMOLR_ROMPE;
+       IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
+
        return 0;
 }
 
index ddc7efa..9a79d18 100644 (file)
@@ -2867,7 +2867,8 @@ ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
         * mode.
         */
        if ((hw->mac.type == ixgbe_mac_82599EB ||
-            hw->mac.type == ixgbe_mac_X540) &&
+            hw->mac.type == ixgbe_mac_X540 ||
+            hw->mac.type == ixgbe_mac_X550) &&
            !RTE_ETH_DEV_SRIOV(dev).active)
                offloads |= DEV_RX_OFFLOAD_TCP_LRO;
 
@@ -3417,6 +3418,7 @@ static void
 ixgbe_rss_configure(struct rte_eth_dev *dev)
 {
        struct rte_eth_rss_conf rss_conf;
+       struct ixgbe_adapter *adapter;
        struct ixgbe_hw *hw;
        uint32_t reta;
        uint16_t i;
@@ -3425,6 +3427,7 @@ ixgbe_rss_configure(struct rte_eth_dev *dev)
        uint32_t reta_reg;
 
        PMD_INIT_FUNC_TRACE();
+       adapter = (struct ixgbe_adapter *)dev->data->dev_private;
        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
        sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
@@ -3434,16 +3437,18 @@ ixgbe_rss_configure(struct rte_eth_dev *dev)
         * The byte-swap is needed because NIC registers are in
         * little-endian order.
         */
-       reta = 0;
-       for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
-               reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
-
-               if (j == dev->data->nb_rx_queues)
-                       j = 0;
-               reta = (reta << 8) | j;
-               if ((i & 3) == 3)
-                       IXGBE_WRITE_REG(hw, reta_reg,
-                                       rte_bswap32(reta));
+       if (adapter->rss_reta_updated == 0) {
+               reta = 0;
+               for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
+                       reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
+
+                       if (j == dev->data->nb_rx_queues)
+                               j = 0;
+                       reta = (reta << 8) | j;
+                       if ((i & 3) == 3)
+                               IXGBE_WRITE_REG(hw, reta_reg,
+                                               rte_bswap32(reta));
+               }
        }
 
        /*
index 895cdfe..63dfa45 100644 (file)
@@ -117,6 +117,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
                infiniband/verbs.h \
                enum IBV_FLOW_SPEC_MPLS \
                $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING \
+               infiniband/verbs.h \
+               enum IBV_WQ_FLAGS_PCI_WRITE_END_PADDING \
+               $(AUTOCONF_OUTPUT)
        $Q sh -- '$<' '$@' \
                HAVE_IBV_WQ_FLAG_RX_END_PADDING \
                infiniband/verbs.h \
index 28938db..de0c32b 100644 (file)
@@ -102,6 +102,8 @@ if build
                'mlx5dv_create_flow_action_packet_reformat' ],
                [ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h',
                'IBV_FLOW_SPEC_MPLS' ],
+               [ 'HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING', 'infiniband/verbs.h',
+               'IBV_WQ_FLAGS_PCI_WRITE_END_PADDING' ],
                [ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING', 'infiniband/verbs.h',
                'IBV_WQ_FLAG_RX_END_PADDING' ],
                [ 'HAVE_SUPPORTED_40000baseKR4_Full', 'linux/ethtool.h',
index 9e5cab1..e7668bd 100644 (file)
@@ -54,6 +54,9 @@
 /* Device parameter to enable RX completion entry padding to 128B. */
 #define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en"
 
+/* Device parameter to enable padding Rx packet to cacheline size. */
+#define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en"
+
 /* Device parameter to enable Multi-Packet Rx queue. */
 #define MLX5_RX_MPRQ_EN "mprq_en"
 
@@ -486,6 +489,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
                config->cqe_comp = !!tmp;
        } else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) {
                config->cqe_pad = !!tmp;
+       } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
+               config->hw_padding = !!tmp;
        } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
                config->mprq.enabled = !!tmp;
        } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) {
@@ -541,6 +546,7 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
        const char **params = (const char *[]){
                MLX5_RXQ_CQE_COMP_EN,
                MLX5_RXQ_CQE_PAD_EN,
+               MLX5_RXQ_PKT_PAD_EN,
                MLX5_RX_MPRQ_EN,
                MLX5_RX_MPRQ_LOG_STRIDE_NUM,
                MLX5_RX_MPRQ_MAX_MEMCPY_LEN,
@@ -735,6 +741,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        struct rte_eth_dev *eth_dev = NULL;
        struct priv *priv = NULL;
        int err = 0;
+       unsigned int hw_padding = 0;
        unsigned int mps;
        unsigned int cqe_comp;
        unsigned int cqe_pad = 0;
@@ -1053,11 +1060,18 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                                 IBV_RAW_PACKET_CAP_SCATTER_FCS);
        DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported",
                (config.hw_fcs_strip ? "" : "not "));
-#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
-       config.hw_padding = !!attr.rx_pad_end_addr_align;
+#if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
+       hw_padding = !!attr.rx_pad_end_addr_align;
+#elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
+       hw_padding = !!(attr.device_cap_flags_ex &
+                       IBV_DEVICE_PCI_WRITE_END_PADDING);
 #endif
-       DRV_LOG(DEBUG, "hardware Rx end alignment padding is %ssupported",
-               (config.hw_padding ? "" : "not "));
+       if (config.hw_padding && !hw_padding) {
+               DRV_LOG(DEBUG, "Rx end alignment padding isn't supported");
+               config.hw_padding = 0;
+       } else if (config.hw_padding) {
+               DRV_LOG(DEBUG, "Rx end alignment padding is enabled");
+       }
        config.tso = (attr.tso_caps.max_tso > 0 &&
                      (attr.tso_caps.supported_qpts &
                       (1 << IBV_QPT_RAW_PACKET)));
@@ -1434,6 +1448,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                qsort(list, n, sizeof(*list), mlx5_dev_spawn_data_cmp);
        /* Default configuration. */
        dev_config = (struct mlx5_dev_config){
+               .hw_padding = 0,
                .mps = MLX5_ARG_UNSET,
                .tx_vec_en = 1,
                .rx_vec_en = 1,
index 97dc3e1..ee129b9 100644 (file)
@@ -2314,7 +2314,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
        struct rte_flow_error error;
        unsigned int i;
 
-       if (!priv->reta_idx_n) {
+       if (!priv->reta_idx_n || !priv->rxqs_n) {
                rte_errno = EINVAL;
                return -rte_errno;
        }
index fb284c3..96b9dd7 100644 (file)
@@ -28,6 +28,7 @@
 #include <rte_flow.h>
 #include <rte_malloc.h>
 #include <rte_common.h>
+#include <rte_cycles.h>
 
 #include "mlx5.h"
 #include "mlx5_flow.h"
@@ -320,6 +321,11 @@ struct tc_tunnel_key {
 #define MLX5_VXLAN_PORT_MIN 30000
 #define MLX5_VXLAN_PORT_MAX 60000
 #define MLX5_VXLAN_DEVICE_PFX "vmlx_"
+/**
+ * Timeout in milliseconds to wait VXLAN UDP offloaded port
+ * registration  completed within the mlx5 driver.
+ */
+#define MLX5_VXLAN_WAIT_PORT_REG_MS 250
 
 /** Tunnel action type, used for @p type in header structure. */
 enum flow_tcf_tunact_type {
@@ -403,7 +409,8 @@ struct tcf_vtep {
        unsigned int ifindex; /**< Own interface index. */
        unsigned int ifouter; /**< Index of device attached to. */
        uint16_t port;
-       uint8_t created;
+       uint32_t created:1; /**< Actually created by PMD. */
+       uint32_t waitreg:1; /**< Wait for VXLAN UDP port registration. */
 };
 
 /** Tunnel descriptor header, common for all tunnel types. */
@@ -436,7 +443,7 @@ struct flow_tcf_vxlan_encap {
                        uint8_t src[IPV6_ADDR_LEN];
                } ipv6;
        };
-struct {
+       struct {
                rte_be16_t src;
                rte_be16_t dst;
        } udp;
@@ -463,7 +470,9 @@ static const union {
        struct rte_flow_item_tcp tcp;
        struct rte_flow_item_udp udp;
        struct rte_flow_item_vxlan vxlan;
-} flow_tcf_mask_empty;
+} flow_tcf_mask_empty = {
+       {0},
+};
 
 /** Supported masks for known item types. */
 static const struct {
@@ -1279,7 +1288,7 @@ flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item,
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  **/
 static int
 flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
@@ -1365,7 +1374,7 @@ flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item,
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  **/
 static int
 flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
@@ -1433,7 +1442,7 @@ flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item,
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  **/
 static int
 flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
@@ -1481,7 +1490,7 @@ flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item,
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  **/
 static int
 flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
@@ -1584,141 +1593,8 @@ flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action,
 }
 
 /**
- * Validate RTE_FLOW_ITEM_TYPE_IPV4 item if VXLAN_DECAP action
- * is present in actions list.
- *
- * @param[in] ipv4
- *   Outer IPv4 address item (if any, NULL otherwise).
- * @param[out] error
- *   Pointer to the error structure.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
- **/
-static int
-flow_tcf_validate_vxlan_decap_ipv4(const struct rte_flow_item *ipv4,
-                                  struct rte_flow_error *error)
-{
-       const struct rte_flow_item_ipv4 *spec = ipv4->spec;
-       const struct rte_flow_item_ipv4 *mask = ipv4->mask;
-
-       if (!spec) {
-               /*
-                * Specification for IP addresses cannot be empty
-                * because it is required as decap parameter.
-                */
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
-                                         "NULL outer ipv4 address"
-                                         " specification for vxlan"
-                                         " for vxlan decapsulation");
-       }
-       if (!mask)
-               mask = &rte_flow_item_ipv4_mask;
-       if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) {
-               if (mask->hdr.dst_addr != RTE_BE32(0xffffffff))
-                       return rte_flow_error_set
-                                       (error, ENOTSUP,
-                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
-                                        "no support for partial mask on"
-                                        " \"ipv4.hdr.dst_addr\" field");
-               /* More IP address validations can be put here. */
-       } else {
-               /*
-                * Kernel uses the destination IP address
-                * to determine the ingress network interface
-                * for traffic being decapsulated.
-                */
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM, ipv4,
-                                         "outer ipv4 destination address"
-                                         " must be specified for"
-                                         " vxlan decapsulation");
-       }
-       /* Source IP address is optional for decap. */
-       if (mask->hdr.src_addr != RTE_BE32(0x00000000) &&
-           mask->hdr.src_addr != RTE_BE32(0xffffffff))
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
-                                         "no support for partial mask on"
-                                         " \"ipv4.hdr.src_addr\" field");
-       return 0;
-}
-
-/**
- * Validate RTE_FLOW_ITEM_TYPE_IPV6 item if VXLAN_DECAP action
- * is present in actions list.
- *
- * @param[in] ipv6
- *   Outer IPv6 address item (if any, NULL otherwise).
- * @param[out] error
- *   Pointer to the error structure.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
- **/
-static int
-flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6,
-                                  struct rte_flow_error *error)
-{
-       const struct rte_flow_item_ipv6 *spec = ipv6->spec;
-       const struct rte_flow_item_ipv6 *mask = ipv6->mask;
-
-       if (!spec) {
-               /*
-                * Specification for IP addresses cannot be empty
-                * because it is required as decap parameter.
-                */
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
-                                         "NULL outer ipv6 address"
-                                         " specification for vxlan"
-                                         " decapsulation");
-       }
-       if (!mask)
-               mask = &rte_flow_item_ipv6_mask;
-       if (memcmp(&mask->hdr.dst_addr,
-                  &flow_tcf_mask_empty.ipv6.hdr.dst_addr,
-                  IPV6_ADDR_LEN)) {
-               if (memcmp(&mask->hdr.dst_addr,
-                       &rte_flow_item_ipv6_mask.hdr.dst_addr,
-                       IPV6_ADDR_LEN))
-                       return rte_flow_error_set
-                                       (error, ENOTSUP,
-                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
-                                        "no support for partial mask on"
-                                        " \"ipv6.hdr.dst_addr\" field");
-               /* More IP address validations can be put here. */
-       } else {
-               /*
-                * Kernel uses the destination IP address
-                * to determine the ingress network interface
-                * for traffic being decapsulated.
-                */
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM, ipv6,
-                                         "outer ipv6 destination address must be "
-                                         "specified for vxlan decapsulation");
-       }
-       /* Source IP address is optional for decap. */
-       if (memcmp(&mask->hdr.src_addr,
-                  &flow_tcf_mask_empty.ipv6.hdr.src_addr,
-                  IPV6_ADDR_LEN)) {
-               if (memcmp(&mask->hdr.src_addr,
-                          &rte_flow_item_ipv6_mask.hdr.src_addr,
-                          IPV6_ADDR_LEN))
-                       return rte_flow_error_set
-                                       (error, ENOTSUP,
-                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
-                                        "no support for partial mask on"
-                                        " \"ipv6.hdr.src_addr\" field");
-       }
-       return 0;
-}
-
-/**
- * Validate RTE_FLOW_ITEM_TYPE_UDP item if VXLAN_DECAP action
- * is present in actions list.
+ * Validate outer RTE_FLOW_ITEM_TYPE_UDP item if tunnel item
+ * RTE_FLOW_ITEM_TYPE_VXLAN is present in item list.
  *
  * @param[in] udp
  *   Outer UDP layer item (if any, NULL otherwise).
@@ -1726,7 +1602,7 @@ flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6,
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  **/
 static int
 flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp,
@@ -1794,7 +1670,7 @@ flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp,
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 flow_tcf_validate(struct rte_eth_dev *dev,
@@ -1825,9 +1701,13 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                const struct rte_flow_action_set_ipv4 *set_ipv4;
                const struct rte_flow_action_set_ipv6 *set_ipv6;
        } conf;
+       const struct rte_flow_item *outer_udp = NULL;
+       rte_be16_t inner_etype = RTE_BE16(ETH_P_ALL);
+       rte_be16_t outer_etype = RTE_BE16(ETH_P_ALL);
+       rte_be16_t vlan_etype = RTE_BE16(ETH_P_ALL);
        uint64_t item_flags = 0;
        uint64_t action_flags = 0;
-       uint8_t next_protocol = -1;
+       uint8_t next_protocol = 0xff;
        unsigned int tcm_ifindex = 0;
        uint8_t pedit_validated = 0;
        struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
@@ -2011,17 +1891,16 @@ flow_tcf_validate(struct rte_eth_dev *dev,
        for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
                unsigned int i;
 
-               if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
-                   items->type != RTE_FLOW_ITEM_TYPE_ETH)
-                       return rte_flow_error_set(error, ENOTSUP,
-                                                 RTE_FLOW_ERROR_TYPE_ITEM,
-                                                 items,
-                                                 "only L2 inner item"
-                                                 " is supported");
                switch (items->type) {
                case RTE_FLOW_ITEM_TYPE_VOID:
                        break;
                case RTE_FLOW_ITEM_TYPE_PORT_ID:
+                       if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM, items,
+                                        "inner tunnel port id"
+                                        " item is not supported");
                        mask.port_id = flow_tcf_item_mask
                                (items, &rte_flow_item_port_id_mask,
                                 &flow_tcf_mask_supported.port_id,
@@ -2072,8 +1951,8 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                        if (ret < 0)
                                return ret;
                        item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
-                                       MLX5_FLOW_LAYER_INNER_L2 :
-                                       MLX5_FLOW_LAYER_OUTER_L2;
+                                     MLX5_FLOW_LAYER_INNER_L2 :
+                                     MLX5_FLOW_LAYER_OUTER_L2;
                        /* TODO:
                         * Redundant check due to different supported mask.
                         * Same for the rest of items.
@@ -2094,8 +1973,40 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                         mask.eth,
                                         "no support for partial mask on"
                                         " \"type\" field");
+                       assert(items->spec);
+                       spec.eth = items->spec;
+                       if (mask.eth->type &&
+                           (item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
+                           inner_etype != RTE_BE16(ETH_P_ALL) &&
+                           inner_etype != spec.eth->type)
+                               return rte_flow_error_set
+                                       (error, EINVAL,
+                                        RTE_FLOW_ERROR_TYPE_ITEM,
+                                        items,
+                                        "inner eth_type conflict");
+                       if (mask.eth->type &&
+                           !(item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
+                           outer_etype != RTE_BE16(ETH_P_ALL) &&
+                           outer_etype != spec.eth->type)
+                               return rte_flow_error_set
+                                       (error, EINVAL,
+                                        RTE_FLOW_ERROR_TYPE_ITEM,
+                                        items,
+                                        "outer eth_type conflict");
+                       if (mask.eth->type) {
+                               if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+                                       inner_etype = spec.eth->type;
+                               else
+                                       outer_etype = spec.eth->type;
+                       }
                        break;
                case RTE_FLOW_ITEM_TYPE_VLAN:
+                       if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM, items,
+                                        "inner tunnel VLAN"
+                                        " is not supported");
                        ret = mlx5_flow_validate_item_vlan(items, item_flags,
                                                           error);
                        if (ret < 0)
@@ -2124,13 +2035,36 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                         "no support for partial masks on"
                                         " \"tci\" (PCP and VID parts) and"
                                         " \"inner_type\" fields");
+                       if (outer_etype != RTE_BE16(ETH_P_ALL) &&
+                           outer_etype != RTE_BE16(ETH_P_8021Q))
+                               return rte_flow_error_set
+                                       (error, EINVAL,
+                                        RTE_FLOW_ERROR_TYPE_ITEM,
+                                        items,
+                                        "outer eth_type conflict,"
+                                        " must be 802.1Q");
+                       outer_etype = RTE_BE16(ETH_P_8021Q);
+                       assert(items->spec);
+                       spec.vlan = items->spec;
+                       if (mask.vlan->inner_type &&
+                           vlan_etype != RTE_BE16(ETH_P_ALL) &&
+                           vlan_etype != spec.vlan->inner_type)
+                               return rte_flow_error_set
+                                       (error, EINVAL,
+                                        RTE_FLOW_ERROR_TYPE_ITEM,
+                                        items,
+                                        "vlan eth_type conflict");
+                       if (mask.vlan->inner_type)
+                               vlan_etype = spec.vlan->inner_type;
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV4:
                        ret = mlx5_flow_validate_item_ipv4(items, item_flags,
                                                           error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+                                     MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+                                     MLX5_FLOW_LAYER_OUTER_L3_IPV4;
                        mask.ipv4 = flow_tcf_item_mask
                                (items, &rte_flow_item_ipv4_mask,
                                 &flow_tcf_mask_supported.ipv4,
@@ -2151,11 +2085,36 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                next_protocol =
                                        ((const struct rte_flow_item_ipv4 *)
                                         (items->spec))->hdr.next_proto_id;
-                       if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
-                               ret = flow_tcf_validate_vxlan_decap_ipv4
-                                                               (items, error);
-                               if (ret < 0)
-                                       return ret;
+                       if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
+                               if (inner_etype != RTE_BE16(ETH_P_ALL) &&
+                                   inner_etype != RTE_BE16(ETH_P_IP))
+                                       return rte_flow_error_set
+                                               (error, EINVAL,
+                                                RTE_FLOW_ERROR_TYPE_ITEM,
+                                                items,
+                                                "inner eth_type conflict,"
+                                                " IPv4 is required");
+                               inner_etype = RTE_BE16(ETH_P_IP);
+                       } else if (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN) {
+                               if (vlan_etype != RTE_BE16(ETH_P_ALL) &&
+                                   vlan_etype != RTE_BE16(ETH_P_IP))
+                                       return rte_flow_error_set
+                                               (error, EINVAL,
+                                                RTE_FLOW_ERROR_TYPE_ITEM,
+                                                items,
+                                                "vlan eth_type conflict,"
+                                                " IPv4 is required");
+                               vlan_etype = RTE_BE16(ETH_P_IP);
+                       } else {
+                               if (outer_etype != RTE_BE16(ETH_P_ALL) &&
+                                   outer_etype != RTE_BE16(ETH_P_IP))
+                                       return rte_flow_error_set
+                                               (error, EINVAL,
+                                                RTE_FLOW_ERROR_TYPE_ITEM,
+                                                items,
+                                                "eth_type conflict,"
+                                                " IPv4 is required");
+                               outer_etype = RTE_BE16(ETH_P_IP);
                        }
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV6:
@@ -2163,7 +2122,9 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                                           error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+                                     MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+                                     MLX5_FLOW_LAYER_OUTER_L3_IPV6;
                        mask.ipv6 = flow_tcf_item_mask
                                (items, &rte_flow_item_ipv6_mask,
                                 &flow_tcf_mask_supported.ipv6,
@@ -2184,11 +2145,36 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                next_protocol =
                                        ((const struct rte_flow_item_ipv6 *)
                                         (items->spec))->hdr.proto;
-                       if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
-                               ret = flow_tcf_validate_vxlan_decap_ipv6
-                                                               (items, error);
-                               if (ret < 0)
-                                       return ret;
+                       if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
+                               if (inner_etype != RTE_BE16(ETH_P_ALL) &&
+                                   inner_etype != RTE_BE16(ETH_P_IPV6))
+                                       return rte_flow_error_set
+                                               (error, EINVAL,
+                                                RTE_FLOW_ERROR_TYPE_ITEM,
+                                                items,
+                                                "inner eth_type conflict,"
+                                                " IPv6 is required");
+                               inner_etype = RTE_BE16(ETH_P_IPV6);
+                       } else if (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN) {
+                               if (vlan_etype != RTE_BE16(ETH_P_ALL) &&
+                                   vlan_etype != RTE_BE16(ETH_P_IPV6))
+                                       return rte_flow_error_set
+                                               (error, EINVAL,
+                                                RTE_FLOW_ERROR_TYPE_ITEM,
+                                                items,
+                                                "vlan eth_type conflict,"
+                                                " IPv6 is required");
+                               vlan_etype = RTE_BE16(ETH_P_IPV6);
+                       } else {
+                               if (outer_etype != RTE_BE16(ETH_P_ALL) &&
+                                   outer_etype != RTE_BE16(ETH_P_IPV6))
+                                       return rte_flow_error_set
+                                               (error, EINVAL,
+                                                RTE_FLOW_ERROR_TYPE_ITEM,
+                                                items,
+                                                "eth_type conflict,"
+                                                " IPv6 is required");
+                               outer_etype = RTE_BE16(ETH_P_IPV6);
                        }
                        break;
                case RTE_FLOW_ITEM_TYPE_UDP:
@@ -2196,7 +2182,9 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                                          next_protocol, error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+                                     MLX5_FLOW_LAYER_INNER_L4_UDP :
+                                     MLX5_FLOW_LAYER_OUTER_L4_UDP;
                        mask.udp = flow_tcf_item_mask
                                (items, &rte_flow_item_udp_mask,
                                 &flow_tcf_mask_supported.udp,
@@ -2205,12 +2193,12 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                 error);
                        if (!mask.udp)
                                return -rte_errno;
-                       if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
-                               ret = flow_tcf_validate_vxlan_decap_udp
-                                                               (items, error);
-                               if (ret < 0)
-                                       return ret;
-                       }
+                       /*
+                        * Save the presumed outer UDP item for extra check
+                        * if the tunnel item will be found later in the list.
+                        */
+                       if (!(item_flags & MLX5_FLOW_LAYER_TUNNEL))
+                               outer_udp = items;
                        break;
                case RTE_FLOW_ITEM_TYPE_TCP:
                        ret = mlx5_flow_validate_item_tcp
@@ -2220,7 +2208,9 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                              error);
                        if (ret < 0)
                                return ret;
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+                                     MLX5_FLOW_LAYER_INNER_L4_TCP :
+                                     MLX5_FLOW_LAYER_OUTER_L4_TCP;
                        mask.tcp = flow_tcf_item_mask
                                (items, &rte_flow_item_tcp_mask,
                                 &flow_tcf_mask_supported.tcp,
@@ -2231,13 +2221,12 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                return -rte_errno;
                        break;
                case RTE_FLOW_ITEM_TYPE_VXLAN:
-                       if (!(action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP))
+                       if (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)
                                return rte_flow_error_set
                                        (error, ENOTSUP,
-                                        RTE_FLOW_ERROR_TYPE_ITEM,
-                                        items,
-                                        "vni pattern should be followed by"
-                                        " vxlan decapsulation action");
+                                        RTE_FLOW_ERROR_TYPE_ITEM, items,
+                                        "vxlan tunnel over vlan"
+                                        " is not supported");
                        ret = mlx5_flow_validate_item_vxlan(items,
                                                            item_flags, error);
                        if (ret < 0)
@@ -2259,6 +2248,45 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                         mask.vxlan,
                                         "no support for partial or "
                                         "empty mask on \"vxlan.vni\" field");
+                       /*
+                        * The VNI item assumes the VXLAN tunnel, it requires
+                        * at least the outer destination UDP port must be
+                        * specified without wildcards to allow kernel select
+                        * the virtual VXLAN device by port. Also outer IPv4
+                        * or IPv6 item must be specified (wilcards or even
+                        * zero mask are allowed) to let driver know the tunnel
+                        * IP version and process UDP traffic correctly.
+                        */
+                       if (!(item_flags &
+                            (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
+                             MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
+                               return rte_flow_error_set
+                                                (error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 NULL,
+                                                 "no outer IP pattern found"
+                                                 " for vxlan tunnel");
+                       if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+                               return rte_flow_error_set
+                                                (error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 NULL,
+                                                 "no outer UDP pattern found"
+                                                 " for vxlan tunnel");
+                       /*
+                        * All items preceding the tunnel item become outer
+                        * ones and we should do extra validation for them
+                        * due to tc limitations for tunnel outer parameters.
+                        * Currently only outer UDP item requres extra check,
+                        * use the saved pointer instead of item list rescan.
+                        */
+                       assert(outer_udp);
+                       ret = flow_tcf_validate_vxlan_decap_udp
+                                               (outer_udp, error);
+                       if (ret < 0)
+                               return ret;
+                       /* Reset L4 protocol for inner parameters. */
+                       next_protocol = 0xff;
                        break;
                default:
                        return rte_flow_error_set(error, ENOTSUP,
@@ -2361,28 +2389,20 @@ flow_tcf_validate(struct rte_eth_dev *dev,
                                                  "no ethernet found in"
                                                  " pattern");
        }
-       if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) {
-               if (!(item_flags &
-                    (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
-                     MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
-                       return rte_flow_error_set(error, EINVAL,
-                                                 RTE_FLOW_ERROR_TYPE_ACTION,
-                                                 NULL,
-                                                 "no outer IP pattern found"
-                                                 " for vxlan decap action");
-               if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
-                       return rte_flow_error_set(error, EINVAL,
-                                                 RTE_FLOW_ERROR_TYPE_ACTION,
-                                                 NULL,
-                                                 "no outer UDP pattern found"
-                                                 " for vxlan decap action");
-               if (!(item_flags & MLX5_FLOW_LAYER_VXLAN))
-                       return rte_flow_error_set(error, EINVAL,
-                                                 RTE_FLOW_ERROR_TYPE_ACTION,
-                                                 NULL,
-                                                 "no VNI pattern found"
-                                                 " for vxlan decap action");
-       }
+       if ((action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) &&
+           !(item_flags & MLX5_FLOW_LAYER_VXLAN))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         NULL,
+                                         "no VNI pattern found"
+                                         " for vxlan decap action");
+       if ((action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) &&
+           (item_flags & MLX5_FLOW_LAYER_TUNNEL))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         NULL,
+                                         "vxlan encap not supported"
+                                         " for tunneled traffic");
        return 0;
 }
 
@@ -2393,17 +2413,21 @@ flow_tcf_validate(struct rte_eth_dev *dev,
  *   Pointer to the flow attributes.
  * @param[in] items
  *   Pointer to the list of items.
+ * @param[out] action_flags
+ *   Pointer to the detected actions.
  *
  * @return
  *   Maximum size of memory for items.
  */
 static int
 flow_tcf_get_items_size(const struct rte_flow_attr *attr,
-                       const struct rte_flow_item items[])
+                       const struct rte_flow_item items[],
+                       uint64_t *action_flags)
 {
        int size = 0;
 
        size += SZ_NLATTR_STRZ_OF("flower") +
+               SZ_NLATTR_TYPE_OF(uint16_t) + /* Outer ether type. */
                SZ_NLATTR_NEST + /* TCA_OPTIONS. */
                SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
        if (attr->group > 0)
@@ -2415,26 +2439,22 @@ flow_tcf_get_items_size(const struct rte_flow_attr *attr,
                case RTE_FLOW_ITEM_TYPE_PORT_ID:
                        break;
                case RTE_FLOW_ITEM_TYPE_ETH:
-                       size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
-                               SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
+                       size += SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
                                /* dst/src MAC addr and mask. */
                        break;
                case RTE_FLOW_ITEM_TYPE_VLAN:
-                       size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
-                               SZ_NLATTR_TYPE_OF(uint16_t) +
+                       size += SZ_NLATTR_TYPE_OF(uint16_t) +
                                /* VLAN Ether type. */
                                SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
                                SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV4:
-                       size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
-                               SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+                       size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
                                SZ_NLATTR_TYPE_OF(uint32_t) * 4;
                                /* dst/src IP addr and mask. */
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV6:
-                       size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
-                               SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+                       size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
                                SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4;
                                /* dst/src IP addr and mask. */
                        break;
@@ -2450,6 +2470,16 @@ flow_tcf_get_items_size(const struct rte_flow_attr *attr,
                        break;
                case RTE_FLOW_ITEM_TYPE_VXLAN:
                        size += SZ_NLATTR_TYPE_OF(uint32_t);
+                       /*
+                        * There might be no VXLAN decap action in the action
+                        * list, nonetheless the VXLAN tunnel flow requires
+                        * the decap structure to be correctly applied to
+                        * VXLAN device, set the flag to create the structure.
+                        * Translation routine will not put the decap action
+                        * in tne Netlink message if there is no actual action
+                        * in the list.
+                        */
+                       *action_flags |= MLX5_FLOW_ACTION_VXLAN_DECAP;
                        break;
                default:
                        DRV_LOG(WARNING,
@@ -2542,7 +2572,7 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
                              uint64_t *action_flags)
 {
        int size = 0;
-       uint64_t flags = 0;
+       uint64_t flags = *action_flags;
 
        size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
        for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
@@ -2642,27 +2672,6 @@ action_of_vlan:
        return size;
 }
 
-/**
- * Brand rtnetlink buffer with unique handle.
- *
- * This handle should be unique for a given network interface to avoid
- * collisions.
- *
- * @param nlh
- *   Pointer to Netlink message.
- * @param handle
- *   Unique 32-bit handle to use.
- */
-static void
-flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
-{
-       struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
-
-       tcm->tcm_handle = handle;
-       DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
-               (void *)nlh, handle);
-}
-
 /**
  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
  * memory required, allocates the memory, initializes Netlink message headers
@@ -2679,7 +2688,7 @@ flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
  *
  * @return
  *   Pointer to mlx5_flow object on success,
- *   otherwise NULL and rte_ernno is set.
+ *   otherwise NULL and rte_errno is set.
  */
 static struct mlx5_flow *
 flow_tcf_prepare(const struct rte_flow_attr *attr,
@@ -2698,7 +2707,7 @@ flow_tcf_prepare(const struct rte_flow_attr *attr,
        struct tcmsg *tcm;
        uint8_t *sp, *tun = NULL;
 
-       size += flow_tcf_get_items_size(attr, items);
+       size += flow_tcf_get_items_size(attr, items, &action_flags);
        size += flow_tcf_get_actions_and_size(actions, &action_flags);
        dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
        if (!dev_flow) {
@@ -2753,20 +2762,6 @@ flow_tcf_prepare(const struct rte_flow_attr *attr,
                dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_DECAP;
        else if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
                dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_ENCAP;
-       /*
-        * Generate a reasonably unique handle based on the address of the
-        * target buffer.
-        *
-        * This is straightforward on 32-bit systems where the flow pointer can
-        * be used directly. Otherwise, its least significant part is taken
-        * after shifting it by the previous power of two of the pointed buffer
-        * size.
-        */
-       if (sizeof(dev_flow) <= 4)
-               flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
-       else
-               flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
-                                      rte_log2_u32(rte_align32prevpow2(size)));
        return dev_flow;
 }
 
@@ -3054,7 +3049,7 @@ flow_tcf_vxlan_encap_parse(const struct rte_flow_action *action,
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
@@ -3098,10 +3093,11 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
        struct nlmsghdr *nlh = dev_flow->tcf.nlh;
        struct tcmsg *tcm = dev_flow->tcf.tcm;
        uint32_t na_act_index_cur;
-       bool eth_type_set = 0;
-       bool vlan_present = 0;
-       bool vlan_eth_type_set = 0;
+       rte_be16_t inner_etype = RTE_BE16(ETH_P_ALL);
+       rte_be16_t outer_etype = RTE_BE16(ETH_P_ALL);
+       rte_be16_t vlan_etype = RTE_BE16(ETH_P_ALL);
        bool ip_proto_set = 0;
+       bool tunnel_outer = 0;
        struct nlattr *na_flower;
        struct nlattr *na_flower_act;
        struct nlattr *na_vlan_id = NULL;
@@ -3115,6 +3111,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                switch (dev_flow->tcf.tunnel->type) {
                case FLOW_TCF_TUNACT_VXLAN_DECAP:
                        decap.vxlan = dev_flow->tcf.vxlan_decap;
+                       tunnel_outer = 1;
                        break;
                case FLOW_TCF_TUNACT_VXLAN_ENCAP:
                        encap.vxlan = dev_flow->tcf.vxlan_encap;
@@ -3136,8 +3133,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
         * Priority cannot be zero to prevent the kernel from picking one
         * automatically.
         */
-       tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
-                                 RTE_BE16(ETH_P_ALL));
+       tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16, outer_etype);
        if (attr->group > 0)
                mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
        mnl_attr_put_strz(nlh, TCA_KIND, "flower");
@@ -3169,7 +3165,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        tcm->tcm_ifindex = ptoi[i].ifindex;
                        break;
                case RTE_FLOW_ITEM_TYPE_ETH:
-                       item_flags |= (item_flags & MLX5_FLOW_LAYER_VXLAN) ?
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
                                      MLX5_FLOW_LAYER_INNER_L2 :
                                      MLX5_FLOW_LAYER_OUTER_L2;
                        mask.eth = flow_tcf_item_mask
@@ -3182,19 +3178,19 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        if (mask.eth == &flow_tcf_mask_empty.eth)
                                break;
                        spec.eth = items->spec;
-                       if (decap.vxlan &&
-                           !(item_flags & MLX5_FLOW_LAYER_VXLAN)) {
+                       if (mask.eth->type) {
+                               if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+                                       inner_etype = spec.eth->type;
+                               else
+                                       outer_etype = spec.eth->type;
+                       }
+                       if (tunnel_outer) {
                                DRV_LOG(WARNING,
-                                       "outer L2 addresses cannot be forced"
-                                       " for vxlan decapsulation, parameter"
-                                       " ignored");
+                                       "outer L2 addresses cannot be"
+                                       " forced is outer ones for tunnel,"
+                                       " parameter is ignored");
                                break;
                        }
-                       if (mask.eth->type) {
-                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
-                                                spec.eth->type);
-                               eth_type_set = 1;
-                       }
                        if (!is_zero_ether_addr(&mask.eth->dst)) {
                                mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
                                             ETHER_ADDR_LEN,
@@ -3216,6 +3212,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                case RTE_FLOW_ITEM_TYPE_VLAN:
                        assert(!encap.hdr);
                        assert(!decap.hdr);
+                       assert(!tunnel_outer);
                        item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
                        mask.vlan = flow_tcf_item_mask
                                (items, &rte_flow_item_vlan_mask,
@@ -3224,20 +3221,14 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                 sizeof(flow_tcf_mask_supported.vlan),
                                 error);
                        assert(mask.vlan);
-                       if (!eth_type_set)
-                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
-                                                RTE_BE16(ETH_P_8021Q));
-                       eth_type_set = 1;
-                       vlan_present = 1;
                        if (mask.vlan == &flow_tcf_mask_empty.vlan)
                                break;
                        spec.vlan = items->spec;
-                       if (mask.vlan->inner_type) {
-                               mnl_attr_put_u16(nlh,
-                                                TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-                                                spec.vlan->inner_type);
-                               vlan_eth_type_set = 1;
-                       }
+                       assert(outer_etype == RTE_BE16(ETH_P_ALL) ||
+                              outer_etype == RTE_BE16(ETH_P_8021Q));
+                       outer_etype = RTE_BE16(ETH_P_8021Q);
+                       if (mask.vlan->inner_type)
+                               vlan_etype = spec.vlan->inner_type;
                        if (mask.vlan->tci & RTE_BE16(0xe000))
                                mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
                                                (rte_be_to_cpu_16
@@ -3250,7 +3241,9 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                case RTE_FLOW_ITEM_TYPE_IPV4:
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+                                     MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+                                     MLX5_FLOW_LAYER_OUTER_L3_IPV4;
                        mask.ipv4 = flow_tcf_item_mask
                                (items, &rte_flow_item_ipv4_mask,
                                 &flow_tcf_mask_supported.ipv4,
@@ -3258,57 +3251,83 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                 sizeof(flow_tcf_mask_supported.ipv4),
                                 error);
                        assert(mask.ipv4);
+                       if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
+                               assert(inner_etype == RTE_BE16(ETH_P_ALL) ||
+                                      inner_etype == RTE_BE16(ETH_P_IP));
+                               inner_etype = RTE_BE16(ETH_P_IP);
+                       } else if (outer_etype == RTE_BE16(ETH_P_8021Q)) {
+                               assert(vlan_etype == RTE_BE16(ETH_P_ALL) ||
+                                      vlan_etype == RTE_BE16(ETH_P_IP));
+                               vlan_etype = RTE_BE16(ETH_P_IP);
+                       } else {
+                               assert(outer_etype == RTE_BE16(ETH_P_ALL) ||
+                                      outer_etype == RTE_BE16(ETH_P_IP));
+                               outer_etype = RTE_BE16(ETH_P_IP);
+                       }
                        spec.ipv4 = items->spec;
-                       if (!decap.vxlan) {
-                               if (!eth_type_set ||
-                                   (!vlan_eth_type_set && vlan_present))
-                                       mnl_attr_put_u16
-                                               (nlh,
-                                                vlan_present ?
-                                                TCA_FLOWER_KEY_VLAN_ETH_TYPE :
-                                                TCA_FLOWER_KEY_ETH_TYPE,
-                                                RTE_BE16(ETH_P_IP));
-                               eth_type_set = 1;
-                               vlan_eth_type_set = 1;
-                               if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
+                       if (!tunnel_outer && mask.ipv4->hdr.next_proto_id) {
+                               /*
+                                * No way to set IP protocol for outer tunnel
+                                * layers. Usually it is fixed, for example,
+                                * to UDP for VXLAN/GPE.
+                                */
+                               assert(spec.ipv4); /* Mask is not empty. */
+                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+                                               spec.ipv4->hdr.next_proto_id);
+                               ip_proto_set = 1;
+                       }
+                       if (mask.ipv4 == &flow_tcf_mask_empty.ipv4 ||
+                            (!mask.ipv4->hdr.src_addr &&
+                             !mask.ipv4->hdr.dst_addr)) {
+                               if (!tunnel_outer)
                                        break;
-                               if (mask.ipv4->hdr.next_proto_id) {
-                                       mnl_attr_put_u8
-                                               (nlh, TCA_FLOWER_KEY_IP_PROTO,
-                                                spec.ipv4->hdr.next_proto_id);
-                                       ip_proto_set = 1;
-                               }
-                       } else {
-                               assert(mask.ipv4 != &flow_tcf_mask_empty.ipv4);
+                               /*
+                                * For tunnel outer we must set outer IP key
+                                * anyway, even if the specification/mask is
+                                * empty. There is no another way to tell
+                                * kernel about he outer layer protocol.
+                                */
+                               mnl_attr_put_u32
+                                       (nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC,
+                                        mask.ipv4->hdr.src_addr);
+                               mnl_attr_put_u32
+                                       (nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+                                        mask.ipv4->hdr.src_addr);
+                               assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
+                               break;
                        }
                        if (mask.ipv4->hdr.src_addr) {
                                mnl_attr_put_u32
-                                       (nlh, decap.vxlan ?
+                                       (nlh, tunnel_outer ?
                                         TCA_FLOWER_KEY_ENC_IPV4_SRC :
                                         TCA_FLOWER_KEY_IPV4_SRC,
                                         spec.ipv4->hdr.src_addr);
                                mnl_attr_put_u32
-                                       (nlh, decap.vxlan ?
+                                       (nlh, tunnel_outer ?
                                         TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK :
                                         TCA_FLOWER_KEY_IPV4_SRC_MASK,
                                         mask.ipv4->hdr.src_addr);
                        }
                        if (mask.ipv4->hdr.dst_addr) {
                                mnl_attr_put_u32
-                                       (nlh, decap.vxlan ?
+                                       (nlh, tunnel_outer ?
                                         TCA_FLOWER_KEY_ENC_IPV4_DST :
                                         TCA_FLOWER_KEY_IPV4_DST,
                                         spec.ipv4->hdr.dst_addr);
                                mnl_attr_put_u32
-                                       (nlh, decap.vxlan ?
+                                       (nlh, tunnel_outer ?
                                         TCA_FLOWER_KEY_ENC_IPV4_DST_MASK :
                                         TCA_FLOWER_KEY_IPV4_DST_MASK,
                                         mask.ipv4->hdr.dst_addr);
                        }
                        assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
-               case RTE_FLOW_ITEM_TYPE_IPV6:
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+               case RTE_FLOW_ITEM_TYPE_IPV6: {
+                       bool ipv6_src, ipv6_dst;
+
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+                                     MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+                                     MLX5_FLOW_LAYER_OUTER_L3_IPV6;
                        mask.ipv6 = flow_tcf_item_mask
                                (items, &rte_flow_item_ipv6_mask,
                                 &flow_tcf_mask_supported.ipv6,
@@ -3316,48 +3335,75 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                 sizeof(flow_tcf_mask_supported.ipv6),
                                 error);
                        assert(mask.ipv6);
+                       if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
+                               assert(inner_etype == RTE_BE16(ETH_P_ALL) ||
+                                      inner_etype == RTE_BE16(ETH_P_IPV6));
+                               inner_etype = RTE_BE16(ETH_P_IPV6);
+                       } else if (outer_etype == RTE_BE16(ETH_P_8021Q)) {
+                               assert(vlan_etype == RTE_BE16(ETH_P_ALL) ||
+                                      vlan_etype == RTE_BE16(ETH_P_IPV6));
+                               vlan_etype = RTE_BE16(ETH_P_IPV6);
+                       } else {
+                               assert(outer_etype == RTE_BE16(ETH_P_ALL) ||
+                                      outer_etype == RTE_BE16(ETH_P_IPV6));
+                               outer_etype = RTE_BE16(ETH_P_IPV6);
+                       }
                        spec.ipv6 = items->spec;
-                       if (!decap.vxlan) {
-                               if (!eth_type_set ||
-                                   (!vlan_eth_type_set && vlan_present))
-                                       mnl_attr_put_u16
-                                               (nlh,
-                                                vlan_present ?
-                                                TCA_FLOWER_KEY_VLAN_ETH_TYPE :
-                                                TCA_FLOWER_KEY_ETH_TYPE,
-                                                RTE_BE16(ETH_P_IPV6));
-                               eth_type_set = 1;
-                               vlan_eth_type_set = 1;
-                               if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
+                       if (!tunnel_outer && mask.ipv6->hdr.proto) {
+                               /*
+                                * No way to set IP protocol for outer tunnel
+                                * layers. Usually it is fixed, for example,
+                                * to UDP for VXLAN/GPE.
+                                */
+                               assert(spec.ipv6); /* Mask is not empty. */
+                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+                                               spec.ipv6->hdr.proto);
+                               ip_proto_set = 1;
+                       }
+                       ipv6_dst = !IN6_IS_ADDR_UNSPECIFIED
+                                               (mask.ipv6->hdr.dst_addr);
+                       ipv6_src = !IN6_IS_ADDR_UNSPECIFIED
+                                               (mask.ipv6->hdr.src_addr);
+                       if (mask.ipv6 == &flow_tcf_mask_empty.ipv6 ||
+                            (!ipv6_dst && !ipv6_src)) {
+                               if (!tunnel_outer)
                                        break;
-                               if (mask.ipv6->hdr.proto) {
-                                       mnl_attr_put_u8
-                                               (nlh, TCA_FLOWER_KEY_IP_PROTO,
-                                                spec.ipv6->hdr.proto);
-                                       ip_proto_set = 1;
-                               }
-                       } else {
-                               assert(mask.ipv6 != &flow_tcf_mask_empty.ipv6);
+                               /*
+                                * For tunnel outer we must set outer IP key
+                                * anyway, even if the specification/mask is
+                                * empty. There is no another way to tell
+                                * kernel about he outer layer protocol.
+                                */
+                               mnl_attr_put(nlh,
+                                            TCA_FLOWER_KEY_ENC_IPV6_SRC,
+                                            IPV6_ADDR_LEN,
+                                            mask.ipv6->hdr.src_addr);
+                               mnl_attr_put(nlh,
+                                            TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+                                            IPV6_ADDR_LEN,
+                                            mask.ipv6->hdr.src_addr);
+                               assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
+                               break;
                        }
-                       if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
-                               mnl_attr_put(nlh, decap.vxlan ?
+                       if (ipv6_src) {
+                               mnl_attr_put(nlh, tunnel_outer ?
                                             TCA_FLOWER_KEY_ENC_IPV6_SRC :
                                             TCA_FLOWER_KEY_IPV6_SRC,
                                             IPV6_ADDR_LEN,
                                             spec.ipv6->hdr.src_addr);
-                               mnl_attr_put(nlh, decap.vxlan ?
+                               mnl_attr_put(nlh, tunnel_outer ?
                                             TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK :
                                             TCA_FLOWER_KEY_IPV6_SRC_MASK,
                                             IPV6_ADDR_LEN,
                                             mask.ipv6->hdr.src_addr);
                        }
-                       if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
-                               mnl_attr_put(nlh, decap.vxlan ?
+                       if (ipv6_dst) {
+                               mnl_attr_put(nlh, tunnel_outer ?
                                             TCA_FLOWER_KEY_ENC_IPV6_DST :
                                             TCA_FLOWER_KEY_IPV6_DST,
                                             IPV6_ADDR_LEN,
                                             spec.ipv6->hdr.dst_addr);
-                               mnl_attr_put(nlh, decap.vxlan ?
+                               mnl_attr_put(nlh, tunnel_outer ?
                                             TCA_FLOWER_KEY_ENC_IPV6_DST_MASK :
                                             TCA_FLOWER_KEY_IPV6_DST_MASK,
                                             IPV6_ADDR_LEN,
@@ -3365,8 +3411,11 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        }
                        assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
+               }
                case RTE_FLOW_ITEM_TYPE_UDP:
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+                                     MLX5_FLOW_LAYER_INNER_L4_UDP :
+                                     MLX5_FLOW_LAYER_OUTER_L4_UDP;
                        mask.udp = flow_tcf_item_mask
                                (items, &rte_flow_item_udp_mask,
                                 &flow_tcf_mask_supported.udp,
@@ -3375,7 +3424,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                 error);
                        assert(mask.udp);
                        spec.udp = items->spec;
-                       if (!decap.vxlan) {
+                       if (!tunnel_outer) {
                                if (!ip_proto_set)
                                        mnl_attr_put_u8
                                                (nlh, TCA_FLOWER_KEY_IP_PROTO,
@@ -3390,24 +3439,24 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        }
                        if (mask.udp->hdr.src_port) {
                                mnl_attr_put_u16
-                                       (nlh, decap.vxlan ?
+                                       (nlh, tunnel_outer ?
                                         TCA_FLOWER_KEY_ENC_UDP_SRC_PORT :
                                         TCA_FLOWER_KEY_UDP_SRC,
                                         spec.udp->hdr.src_port);
                                mnl_attr_put_u16
-                                       (nlh, decap.vxlan ?
+                                       (nlh, tunnel_outer ?
                                         TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK :
                                         TCA_FLOWER_KEY_UDP_SRC_MASK,
                                         mask.udp->hdr.src_port);
                        }
                        if (mask.udp->hdr.dst_port) {
                                mnl_attr_put_u16
-                                       (nlh, decap.vxlan ?
+                                       (nlh, tunnel_outer ?
                                         TCA_FLOWER_KEY_ENC_UDP_DST_PORT :
                                         TCA_FLOWER_KEY_UDP_DST,
                                         spec.udp->hdr.dst_port);
                                mnl_attr_put_u16
-                                       (nlh, decap.vxlan ?
+                                       (nlh, tunnel_outer ?
                                         TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK :
                                         TCA_FLOWER_KEY_UDP_DST_MASK,
                                         mask.udp->hdr.dst_port);
@@ -3415,7 +3464,9 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
                        break;
                case RTE_FLOW_ITEM_TYPE_TCP:
-                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ?
+                                     MLX5_FLOW_LAYER_INNER_L4_TCP :
+                                     MLX5_FLOW_LAYER_OUTER_L4_TCP;
                        mask.tcp = flow_tcf_item_mask
                                (items, &rte_flow_item_tcp_mask,
                                 &flow_tcf_mask_supported.tcp,
@@ -3459,6 +3510,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                        break;
                case RTE_FLOW_ITEM_TYPE_VXLAN:
                        assert(decap.vxlan);
+                       tunnel_outer = 0;
                        item_flags |= MLX5_FLOW_LAYER_VXLAN;
                        spec.vxlan = items->spec;
                        mnl_attr_put_u32(nlh,
@@ -3472,6 +3524,34 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                                  NULL, "item not supported");
                }
        }
+       /*
+        * Set the ether_type flower key and tc rule protocol:
+        * - if there is nor VLAN neither VXLAN the key is taken from
+        *   eth item directly or deduced from L3 items.
+        * - if there is vlan item then key is fixed to 802.1q.
+        * - if there is vxlan item then key is set to inner tunnel type.
+        * - simultaneous vlan and vxlan items are prohibited.
+        */
+       if (outer_etype != RTE_BE16(ETH_P_ALL)) {
+               tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
+                                          outer_etype);
+               if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
+                       if (inner_etype != RTE_BE16(ETH_P_ALL))
+                               mnl_attr_put_u16(nlh,
+                                                TCA_FLOWER_KEY_ETH_TYPE,
+                                                inner_etype);
+               } else {
+                       mnl_attr_put_u16(nlh,
+                                        TCA_FLOWER_KEY_ETH_TYPE,
+                                        outer_etype);
+                       if (outer_etype == RTE_BE16(ETH_P_8021Q) &&
+                           vlan_etype != RTE_BE16(ETH_P_ALL))
+                               mnl_attr_put_u16(nlh,
+                                                TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+                                                vlan_etype);
+               }
+               assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len);
+       }
        na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
        na_act_index_cur = 1;
        for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
@@ -3505,6 +3585,10 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
                                        mnl_attr_get_payload
                                        (mnl_nlmsg_get_payload_tail
                                                (nlh)))->ifindex;
+                       } else if (decap.hdr) {
+                               assert(dev_flow->tcf.tunnel);
+                               dev_flow->tcf.tunnel->ifindex_ptr =
+                                       (unsigned int *)&tcm->tcm_ifindex;
                        }
                        mnl_attr_put(nlh, TCA_MIRRED_PARMS,
                                     sizeof(struct tc_mirred),
@@ -4266,8 +4350,8 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg)
 
 /**
  * Cleanup the outer interface. Removes all found vxlan devices
- * attached to specified index, flushes the meigh and local IP
- * datavase.
+ * attached to specified index, flushes the neigh and local IP
+ * database.
  *
  * @param[in] tcf
  *   Context object initialized by mlx5_flow_tcf_context_create().
@@ -4815,6 +4899,7 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf,
                 * when we do not need it anymore.
                 */
                vtep->created = 1;
+               vtep->waitreg = 1;
        }
        /* Try to get ifindex of created of pre-existing device. */
        ret = if_nametoindex(name);
@@ -5240,6 +5325,7 @@ flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
        struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
        struct mlx5_flow *dev_flow;
        struct nlmsghdr *nlh;
+       struct tcmsg *tcm;
 
        if (!flow)
                return;
@@ -5260,10 +5346,53 @@ flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
                                dev_flow);
                        dev_flow->tcf.tunnel->vtep = NULL;
                }
+               /* Cleanup the rule handle value. */
+               tcm = mnl_nlmsg_get_payload(nlh);
+               tcm->tcm_handle = 0;
                dev_flow->tcf.applied = 0;
        }
 }
 
+/**
+ * Fetch the applied rule handle. This is callback routine called by
+ * libmnl mnl_cb_run() in loop for every message in received packet.
+ * When the NLM_F_ECHO flag i sspecified the kernel sends the created
+ * rule descriptor back to the application and we can retrieve the
+ * actual rule handle from updated descriptor.
+ *
+ * @param[in] nlh
+ *   Pointer to reply header.
+ * @param[in, out] arg
+ *   Context pointer for this callback.
+ *
+ * @return
+ *   A positive, nonzero value on success (required by libmnl
+ *   to continue messages processing).
+ */
+static int
+flow_tcf_collect_apply_cb(const struct nlmsghdr *nlh, void *arg)
+{
+       struct nlmsghdr *nlhrq = arg;
+       struct tcmsg *tcmrq = mnl_nlmsg_get_payload(nlhrq);
+       struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
+       struct nlattr *na;
+
+       if (nlh->nlmsg_type != RTM_NEWTFILTER ||
+           nlh->nlmsg_seq != nlhrq->nlmsg_seq)
+               return 1;
+       mnl_attr_for_each(na, nlh, sizeof(*tcm)) {
+               switch (mnl_attr_get_type(na)) {
+               case TCA_KIND:
+                       if (strcmp(mnl_attr_get_payload(na), "flower")) {
+                               /* Not flower filter, drop entire message. */
+                               return 1;
+                       }
+                       tcmrq->tcm_handle = tcm->tcm_handle;
+                       return 1;
+               }
+       }
+       return 1;
+}
 /**
  * Apply flow to E-Switch by sending Netlink message.
  *
@@ -5275,7 +5404,7 @@ flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
  *   Pointer to the error structure.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
@@ -5285,6 +5414,10 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
        struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
        struct mlx5_flow *dev_flow;
        struct nlmsghdr *nlh;
+       struct tcmsg *tcm;
+       uint64_t start = 0;
+       uint64_t twait = 0;
+       int ret;
 
        dev_flow = LIST_FIRST(&flow->dev_flows);
        /* E-Switch flow can't be expanded. */
@@ -5293,7 +5426,11 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                return 0;
        nlh = dev_flow->tcf.nlh;
        nlh->nlmsg_type = RTM_NEWTFILTER;
-       nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+       nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+                          NLM_F_EXCL | NLM_F_ECHO;
+       tcm = mnl_nlmsg_get_payload(nlh);
+       /* Allow kernel to assign handle on its own. */
+       tcm->tcm_handle = 0;
        if (dev_flow->tcf.tunnel) {
                /*
                 * Replace the interface index, target for
@@ -5313,8 +5450,52 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                                dev_flow->tcf.tunnel->ifindex_org);
                *dev_flow->tcf.tunnel->ifindex_ptr =
                        dev_flow->tcf.tunnel->vtep->ifindex;
+               if (dev_flow->tcf.tunnel->vtep->waitreg) {
+                       /* Clear wait flag for VXLAN port registration. */
+                       dev_flow->tcf.tunnel->vtep->waitreg = 0;
+                       twait = rte_get_timer_hz();
+                       assert(twait > MS_PER_S);
+                       twait = twait * MLX5_VXLAN_WAIT_PORT_REG_MS;
+                       twait = twait / MS_PER_S;
+                       start = rte_get_timer_cycles();
+               }
        }
-       if (!flow_tcf_nl_ack(ctx, nlh, NULL, NULL)) {
+       /*
+        * Kernel creates the VXLAN devices and registers UDP ports to
+        * be hardware offloaded within the NIC kernel drivers. The
+        * registration process is being performed into context of
+        * working kernel thread and the race conditions might happen.
+        * The VXLAN device is created and success is returned to
+        * calling application, but the UDP port registration process
+        * is not completed yet. The next applied rule may be rejected
+        * by the driver with ENOSUP code. We are going to wait a bit,
+        * allowing registration process to be completed. The waiting
+        * is performed once after device been created.
+        */
+       do {
+               struct timespec onems;
+
+               ret = flow_tcf_nl_ack(ctx, nlh,
+                                     flow_tcf_collect_apply_cb, nlh);
+               if (!ret || ret != -ENOTSUP || !twait)
+                       break;
+               /* Wait one millisecond and try again till timeout. */
+               onems.tv_sec = 0;
+               onems.tv_nsec = NS_PER_S / MS_PER_S;
+               nanosleep(&onems, 0);
+               if ((rte_get_timer_cycles() - start) > twait) {
+                       /* Timeout elapsed, try once more and exit. */
+                       twait = 0;
+               }
+       } while (true);
+       if (!ret) {
+               if (!tcm->tcm_handle) {
+                       flow_tcf_remove(dev, flow);
+                       return rte_flow_error_set
+                               (error, ENOENT,
+                                RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                "netlink: rule zero handle returned");
+               }
                dev_flow->tcf.applied = 1;
                if (*dev_flow->tcf.ptc_flags & TCA_CLS_FLAGS_SKIP_SW)
                        return 0;
index 81ec59d..409e1cd 100644 (file)
@@ -121,13 +121,13 @@ flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
        struct mlx5_flow_counter *cnt;
        int ret;
 
-       LIST_FOREACH(cnt, &priv->flow_counters, next) {
-               if (!cnt->shared || cnt->shared != shared)
-                       continue;
-               if (cnt->id != id)
-                       continue;
-               cnt->ref_cnt++;
-               return cnt;
+       if (shared) {
+               LIST_FOREACH(cnt, &priv->flow_counters, next) {
+                       if (cnt->shared && cnt->id == id) {
+                               cnt->ref_cnt++;
+                               return cnt;
+                       }
+               }
        }
        cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
        if (!cnt) {
index 183da0e..10b6ce0 100644 (file)
@@ -881,12 +881,15 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
                attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
                attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
        }
-#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
        if (config->hw_padding) {
+#if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
                attr.wq.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
                attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
-       }
+#elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
+               attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
+               attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
 #endif
+       }
 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
        attr.wq.mlx5 = (struct mlx5dv_wq_init_attr){
                .comp_mask = 0,
@@ -1179,6 +1182,7 @@ mlx5_mprq_free_mp(struct rte_eth_dev *dev)
                        continue;
                rxq->mprq_mp = NULL;
        }
+       priv->mprq_mp = NULL;
        return 0;
 }
 
index f47d327..75194a3 100644 (file)
@@ -733,10 +733,6 @@ mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
  *   Pointer to the Tx queue.
  * @param buf
  *   Pointer to the mbuf.
- * @param tso
- *   TSO offloads enabled.
- * @param vlan
- *   VLAN offloads enabled
  * @param offsets
  *   Pointer to the SWP header offsets.
  * @param swp_types
index b330bf3..da76b0d 100644 (file)
@@ -732,6 +732,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
        hv->chim_res  = &vmbus->resource[HV_SEND_BUF_MAP];
        hv->port_id = eth_dev->data->port_id;
        hv->latency = HN_CHAN_LATENCY_NS;
+       hv->max_queues = 1;
 
        err = hn_parse_args(eth_dev);
        if (err)
@@ -770,6 +771,10 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
        if (err)
                goto failed;
 
+       /* Multi queue requires later versions of windows server */
+       if (hv->nvs_ver < NVS_VERSION_5)
+               return 0;
+
        max_chan = rte_vmbus_max_channels(vmbus);
        PMD_INIT_LOG(DEBUG, "VMBus max channels %d", max_chan);
        if (max_chan <= 0)
@@ -786,7 +791,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 
                err = hn_vf_add(eth_dev, hv);
                if (err)
-                       goto failed;
+                       hv->vf_present = 0;
        }
 
        return 0;
@@ -794,6 +799,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 failed:
        PMD_INIT_LOG(NOTICE, "device init failed");
 
+       hn_tx_pool_uninit(eth_dev);
        hn_detach(hv);
        return err;
 }
@@ -816,6 +822,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev)
        eth_dev->rx_pkt_burst = NULL;
 
        hn_detach(hv);
+       hn_tx_pool_uninit(eth_dev);
        rte_vmbus_chan_close(hv->primary->chan);
        rte_free(hv->primary);
        rte_eth_dev_owner_delete(hv->owner.id);
index 9690c5f..d58770e 100644 (file)
@@ -326,9 +326,9 @@ hn_nvs_conf_ndis(struct hn_data *hv, unsigned int mtu)
        conf.mtu = mtu + ETHER_HDR_LEN;
        conf.caps = NVS_NDIS_CONF_VLAN;
 
-       /* TODO enable SRIOV */
-       //if (hv->nvs_ver >= NVS_VERSION_5)
-       //      conf.caps |= NVS_NDIS_CONF_SRIOV;
+       /* enable SRIOV */
+       if (hv->nvs_ver >= NVS_VERSION_5)
+               conf.caps |= NVS_NDIS_CONF_SRIOV;
 
        /* NOTE: No response. */
        error = hn_nvs_req_send(hv, &conf, sizeof(conf));
index f4a3664..487f764 100644 (file)
@@ -199,6 +199,17 @@ hn_tx_pool_init(struct rte_eth_dev *dev)
        return 0;
 }
 
+void
+hn_tx_pool_uninit(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+
+       if (hv->tx_pool) {
+               rte_mempool_free(hv->tx_pool);
+               hv->tx_pool = NULL;
+       }
+}
+
 static void hn_reset_txagg(struct hn_tx_queue *txq)
 {
        txq->agg_szleft = txq->agg_szmax;
index e1072c7..a6516c1 100644 (file)
@@ -149,6 +149,7 @@ uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                      uint16_t nb_pkts);
 
 int    hn_tx_pool_init(struct rte_eth_dev *dev);
+void   hn_tx_pool_uninit(struct rte_eth_dev *dev);
 int    hn_dev_link_update(struct rte_eth_dev *dev, int wait);
 int    hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                              uint16_t nb_desc, unsigned int socket_id,
index 0e33be1..eda19b2 100644 (file)
@@ -35,6 +35,52 @@ static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
        return 0;
 }
 
+#define QEDE_MAX_BULK_ALLOC_COUNT 512
+
+static inline int qede_alloc_rx_bulk_mbufs(struct qede_rx_queue *rxq, int count)
+{
+       void *obj_p[QEDE_MAX_BULK_ALLOC_COUNT] __rte_cache_aligned;
+       struct rte_mbuf *mbuf = NULL;
+       struct eth_rx_bd *rx_bd;
+       dma_addr_t mapping;
+       int i, ret = 0;
+       uint16_t idx;
+
+       idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
+
+       if (count > QEDE_MAX_BULK_ALLOC_COUNT)
+               count = QEDE_MAX_BULK_ALLOC_COUNT;
+
+       ret = rte_mempool_get_bulk(rxq->mb_pool, obj_p, count);
+       if (unlikely(ret)) {
+               PMD_RX_LOG(ERR, rxq,
+                          "Failed to allocate %d rx buffers "
+                           "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u",
+                           count, idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq),
+                           rte_mempool_avail_count(rxq->mb_pool),
+                           rte_mempool_in_use_count(rxq->mb_pool));
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < count; i++) {
+               mbuf = obj_p[i];
+               if (likely(i < count - 1))
+                       rte_prefetch0(obj_p[i + 1]);
+
+               idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
+               rxq->sw_rx_ring[idx].mbuf = mbuf;
+               rxq->sw_rx_ring[idx].page_offset = 0;
+               mapping = rte_mbuf_data_iova_default(mbuf);
+               rx_bd = (struct eth_rx_bd *)
+                       ecore_chain_produce(&rxq->rx_bd_ring);
+               rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
+               rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
+               rxq->sw_rx_prod++;
+       }
+
+       return 0;
+}
+
 /* Criterias for calculating Rx buffer size -
  * 1) rx_buf_size should not exceed the size of mbuf
  * 2) In scattered_rx mode - minimum rx_buf_size should be
@@ -1131,7 +1177,7 @@ qede_reuse_page(__rte_unused struct qede_dev *qdev,
                struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons)
 {
        struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring);
-       uint16_t idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
+       uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
        struct qede_rx_entry *curr_prod;
        dma_addr_t new_mapping;
 
@@ -1364,7 +1410,6 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        uint8_t bitfield_val;
 #endif
        uint8_t tunn_parse_flag;
-       uint8_t j;
        struct eth_fast_path_rx_tpa_start_cqe *cqe_start_tpa;
        uint64_t ol_flags;
        uint32_t packet_type;
@@ -1373,6 +1418,7 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        uint8_t offset, tpa_agg_idx, flags;
        struct qede_agg_info *tpa_info = NULL;
        uint32_t rss_hash;
+       int rx_alloc_count = 0;
 
        hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
        sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
@@ -1382,6 +1428,25 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        if (hw_comp_cons == sw_comp_cons)
                return 0;
 
+       /* Allocate buffers that we used in previous loop */
+       if (rxq->rx_alloc_count) {
+               if (unlikely(qede_alloc_rx_bulk_mbufs(rxq,
+                            rxq->rx_alloc_count))) {
+                       struct rte_eth_dev *dev;
+
+                       PMD_RX_LOG(ERR, rxq,
+                                  "New buffer allocation failed,"
+                                  "dropping incoming packetn");
+                       dev = &rte_eth_devices[rxq->port_id];
+                       dev->data->rx_mbuf_alloc_failed +=
+                                                       rxq->rx_alloc_count;
+                       rxq->rx_alloc_errors += rxq->rx_alloc_count;
+                       return 0;
+               }
+               qede_update_rx_prod(qdev, rxq);
+               rxq->rx_alloc_count = 0;
+       }
+
        while (sw_comp_cons != hw_comp_cons) {
                ol_flags = 0;
                packet_type = RTE_PTYPE_UNKNOWN;
@@ -1553,16 +1618,7 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        rx_mb->hash.rss = rss_hash;
                }
 
-               if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) {
-                       PMD_RX_LOG(ERR, rxq,
-                                  "New buffer allocation failed,"
-                                  "dropping incoming packet\n");
-                       qede_recycle_rx_bd_ring(rxq, qdev, fp_cqe->bd_num);
-                       rte_eth_devices[rxq->port_id].
-                           data->rx_mbuf_alloc_failed++;
-                       rxq->rx_alloc_errors++;
-                       break;
-               }
+               rx_alloc_count++;
                qede_rx_bd_ring_consume(rxq);
 
                if (!tpa_start_flg && fp_cqe->bd_num > 1) {
@@ -1574,17 +1630,9 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                        if (qede_process_sg_pkts(p_rxq, seg1, num_segs,
                                                 pkt_len - len))
                                goto next_cqe;
-                       for (j = 0; j < num_segs; j++) {
-                               if (qede_alloc_rx_buffer(rxq)) {
-                                       PMD_RX_LOG(ERR, rxq,
-                                               "Buffer allocation failed");
-                                       rte_eth_devices[rxq->port_id].
-                                               data->rx_mbuf_alloc_failed++;
-                                       rxq->rx_alloc_errors++;
-                                       break;
-                               }
-                               rxq->rx_segs++;
-                       }
+
+                       rx_alloc_count += num_segs;
+                       rxq->rx_segs += num_segs;
                }
                rxq->rx_segs++; /* for the first segment */
 
@@ -1626,7 +1674,8 @@ next_cqe:
                }
        }
 
-       qede_update_rx_prod(qdev, rxq);
+       /* Request number of bufferes to be allocated in next loop */
+       rxq->rx_alloc_count = rx_alloc_count;
 
        rxq->rcv_pkts += rx_pkt;
 
@@ -2132,7 +2181,6 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
                txq->nb_tx_avail -= bd1->data.nbds;
                txq->sw_tx_prod++;
-               rte_prefetch0(txq->sw_tx_ring[TX_PROD(txq)].mbuf);
                bd_prod =
                    rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
index 454daa0..5b249cb 100644 (file)
@@ -192,6 +192,8 @@ struct qede_rx_queue {
        uint16_t queue_id;
        uint16_t port_id;
        uint16_t rx_buf_size;
+       uint16_t rx_alloc_count;
+       uint16_t unused;
        uint64_t rcv_pkts;
        uint64_t rx_segs;
        uint64_t rx_hw_errors;
index bf1180a..9fa9e2e 100644 (file)
@@ -926,7 +926,8 @@ siena_tx_qcreate(
        EFX_STATIC_ASSERT(ISP2(EFX_TXQ_MINNDESCS));
 
        if (!ISP2(ndescs) ||
-           (ndescs < EFX_TXQ_MINNDESCS) || (ndescs > EFX_EVQ_MAXNEVS)) {
+           (ndescs < EFX_TXQ_MINNDESCS) ||
+           (ndescs > encp->enc_txq_max_ndescs)) {
                rc = EINVAL;
                goto fail1;
        }
index 0b4795d..f7bcc74 100644 (file)
@@ -84,7 +84,7 @@ typedef bool boolean_t;
 #define P2ALIGN(_x, _a)                ((_x) & -(_a))
 #endif
 
-#ifndef IS2P
+#ifndef ISP2
 #define ISP2(x)                        rte_is_power_of_2(x)
 #endif
 
index 51be440..a94ca8e 100644 (file)
@@ -149,6 +149,8 @@ struct sfc_port {
        uint64_t                        mac_stats_last_request_timestamp;
 
        uint32_t                mac_stats_mask[EFX_MAC_STATS_MASK_NPAGES];
+
+       uint64_t                        ipackets;
 };
 
 struct sfc_rss_hf_rte_to_efx {
@@ -251,7 +253,7 @@ struct sfc_adapter {
 
        /*
         * Shared memory copy of the Tx datapath name to be used by
-        * the secondary process to find Rx datapath to be used.
+        * the secondary process to find Tx datapath to be used.
         */
        char                            *dp_tx_name;
        const struct sfc_dp_tx          *dp_tx;
index bcd3153..ff6d5b4 100644 (file)
@@ -381,6 +381,9 @@ sfc_ef10_xmit_tso_pkt(struct sfc_ef10_txq * const txq, struct rte_mbuf *m_seg,
                hdr_addr = rte_pktmbuf_mtod(m_seg, uint8_t *);
                hdr_iova = rte_mbuf_data_iova(m_seg);
                if (rte_pktmbuf_data_len(m_seg) == header_len) {
+                       /* Cannot send a packet that consists only of header */
+                       if (unlikely(m_seg->next == NULL))
+                               return EMSGSIZE;
                        /*
                         * Associate header mbuf with header descriptor
                         * which is located after TSO descriptors.
@@ -409,6 +412,10 @@ sfc_ef10_xmit_tso_pkt(struct sfc_ef10_txq * const txq, struct rte_mbuf *m_seg,
                copied_segs = sfc_tso_prepare_header(hdr_addr, header_len,
                                                     &m_seg, &in_off);
 
+               /* Cannot send a packet that consists only of header */
+               if (unlikely(m_seg == NULL))
+                       return EMSGSIZE;
+
                m_seg_to_free_up_to = m_seg;
                /*
                 * Reduce the number of needed descriptors by the number of
index 3886daf..a7322a1 100644 (file)
@@ -503,6 +503,29 @@ sfc_tx_queue_release(void *queue)
        sfc_adapter_unlock(sa);
 }
 
+/*
+ * Some statistics are computed as A - B where A and B each increase
+ * monotonically with some hardware counter(s) and the counters are read
+ * asynchronously.
+ *
+ * If packet X is counted in A, but not counted in B yet, computed value is
+ * greater than real.
+ *
+ * If packet X is not counted in A at the moment of reading the counter,
+ * but counted in B at the moment of reading the counter, computed value
+ * is less than real.
+ *
+ * However, counter which grows backward is worse evil than slightly wrong
+ * value. So, let's try to guarantee that it never happens except may be
+ * the case when the MAC stats are zeroed as a result of a NIC reset.
+ */
+static void
+sfc_update_diff_stat(uint64_t *stat, uint64_t newval)
+{
+       if ((int64_t)(newval - *stat) > 0 || newval == 0)
+               *stat = newval;
+}
+
 static int
 sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
@@ -537,11 +560,9 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
                        mac_stats[EFX_MAC_VADAPTER_TX_UNICAST_BYTES] +
                        mac_stats[EFX_MAC_VADAPTER_TX_MULTICAST_BYTES] +
                        mac_stats[EFX_MAC_VADAPTER_TX_BROADCAST_BYTES];
-               stats->imissed = mac_stats[EFX_MAC_VADAPTER_RX_OVERFLOW];
-               stats->ierrors = mac_stats[EFX_MAC_VADAPTER_RX_BAD_PACKETS];
+               stats->imissed = mac_stats[EFX_MAC_VADAPTER_RX_BAD_PACKETS];
                stats->oerrors = mac_stats[EFX_MAC_VADAPTER_TX_BAD_PACKETS];
        } else {
-               stats->ipackets = mac_stats[EFX_MAC_RX_PKTS];
                stats->opackets = mac_stats[EFX_MAC_TX_PKTS];
                stats->ibytes = mac_stats[EFX_MAC_RX_OCTETS];
                stats->obytes = mac_stats[EFX_MAC_TX_OCTETS];
@@ -567,6 +588,13 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
                        mac_stats[EFX_MAC_RX_ALIGN_ERRORS] +
                        mac_stats[EFX_MAC_RX_JABBER_PKTS];
                /* no oerrors counters supported on EF10 */
+
+               /* Exclude missed, errors and pauses from Rx packets */
+               sfc_update_diff_stat(&port->ipackets,
+                       mac_stats[EFX_MAC_RX_PKTS] -
+                       mac_stats[EFX_MAC_RX_PAUSE_PKTS] -
+                       stats->imissed - stats->ierrors);
+               stats->ipackets = port->ipackets;
        }
 
 unlock:
@@ -1863,13 +1891,13 @@ sfc_eth_dev_secondary_set_ops(struct rte_eth_dev *dev)
 
        dp_rx = sfc_dp_find_rx_by_name(&sfc_dp_head, sa->dp_rx_name);
        if (dp_rx == NULL) {
-               sfc_err(sa, "cannot find %s Rx datapath", sa->dp_tx_name);
+               sfc_err(sa, "cannot find %s Rx datapath", sa->dp_rx_name);
                rc = ENOENT;
                goto fail_dp_rx;
        }
        if (~dp_rx->features & SFC_DP_RX_FEAT_MULTI_PROCESS) {
                sfc_err(sa, "%s Rx datapath does not support multi-process",
-                       sa->dp_tx_name);
+                       sa->dp_rx_name);
                rc = EINVAL;
                goto fail_dp_rx_multi_process;
        }
index d6f3435..b11fbfb 100644 (file)
@@ -37,7 +37,8 @@ extern uint32_t sfc_logtype_driver;
                const struct sfc_adapter *__sa = (sa);                  \
                                                                        \
                rte_log(level, type,                                    \
-                       RTE_FMT("PMD: sfc_efx " PCI_PRI_FMT " #%" PRIu8 \
+                       RTE_FMT("PMD: sfc_efx "                         \
+                               PCI_PRI_FMT " #%" PRIu16                \
                                ": " RTE_FMT_HEAD(__VA_ARGS__ ,) "\n",  \
                                __sa->pci_addr.domain,                  \
                                __sa->pci_addr.bus,                     \
index 5384dbb..5eb4b3a 100644 (file)
@@ -87,6 +87,18 @@ sfc_port_update_mac_stats(struct sfc_adapter *sa)
        return 0;
 }
 
+static void
+sfc_port_reset_sw_stats(struct sfc_adapter *sa)
+{
+       struct sfc_port *port = &sa->port;
+
+       /*
+        * Reset diff stats explicitly since check which does not allow
+        * the statistics to grow backward could deny it.
+        */
+       port->ipackets = 0;
+}
+
 int
 sfc_port_reset_mac_stats(struct sfc_adapter *sa)
 {
@@ -95,6 +107,8 @@ sfc_port_reset_mac_stats(struct sfc_adapter *sa)
 
        rte_spinlock_lock(&port->mac_stats_lock);
        rc = efx_mac_stats_clear(sa->nic);
+       if (rc == 0)
+               sfc_port_reset_sw_stats(sa);
        rte_spinlock_unlock(&port->mac_stats_lock);
 
        return rc;
index 3d2faf5..f89aef0 100644 (file)
@@ -7,6 +7,13 @@
  * for Solarflare) and Solarflare Communications, Inc.
  */
 
+#ifndef _SFC_TSO_H
+#define _SFC_TSO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /** Standard TSO header length */
 #define SFC_TSOH_STD_LEN       256
 
@@ -21,3 +28,9 @@
 
 unsigned int sfc_tso_prepare_header(uint8_t *tsoh, size_t header_len,
                                    struct rte_mbuf **in_seg, size_t *in_off);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _SFC_TSO_H */
index 147f933..aa73d26 100644 (file)
@@ -451,7 +451,7 @@ sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
        if (txq->offloads & DEV_TX_OFFLOAD_TCP_TSO)
                flags |= EFX_TXQ_FATSOV2;
 
-       rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem,
+       rc = efx_tx_qcreate(sa->nic, txq->hw_index, 0, &txq->mem,
                            txq_info->entries, 0 /* not used on EF10 */,
                            flags, evq->common,
                            &txq->common, &desc_index);
@@ -712,6 +712,7 @@ sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        for (pkts_sent = 0, pktp = &tx_pkts[0];
             (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill);
             pkts_sent++, pktp++) {
+               uint16_t                hw_vlan_tci_prev = txq->hw_vlan_tci;
                struct rte_mbuf         *m_seg = *pktp;
                size_t                  pkt_len = m_seg->pkt_len;
                unsigned int            pkt_descs = 0;
@@ -750,6 +751,7 @@ sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                                 * mbuf shouldn't be orphaned
                                 */
                                pend -= pkt_descs;
+                               txq->hw_vlan_tci = hw_vlan_tci_prev;
 
                                rte_pktmbuf_free(*pktp);
 
@@ -819,10 +821,12 @@ sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                                fill_level = added - txq->completed;
                                if (fill_level > hard_max_fill) {
                                        pend -= pkt_descs;
+                                       txq->hw_vlan_tci = hw_vlan_tci_prev;
                                        break;
                                }
                        } else {
                                pend -= pkt_descs;
+                               txq->hw_vlan_tci = hw_vlan_tci_prev;
                                break;
                        }
                }
index 49afd38..a934299 100644 (file)
@@ -78,9 +78,6 @@ static const char *valid_arguments[] = {
        NULL
 };
 
-static unsigned int tap_unit;
-static unsigned int tun_unit;
-
 static char tuntap_name[8];
 
 static volatile uint32_t tap_trigger;  /* Rx trigger */
@@ -150,8 +147,6 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive)
                IFF_TAP : IFF_TUN | IFF_POINTOPOINT;
        snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name);
 
-       TAP_LOG(DEBUG, "ifr_name '%s'", ifr.ifr_name);
-
        fd = open(TUN_TAP_DEV_PATH, O_RDWR);
        if (fd < 0) {
                TAP_LOG(ERR, "Unable to create %s interface", tuntap_name);
@@ -185,6 +180,13 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive)
                goto error;
        }
 
+       /*
+        * Name passed to kernel might be wildcard like dtun%d
+        * and need to find the resulting device.
+        */
+       TAP_LOG(DEBUG, "Device name is '%s'", ifr.ifr_name);
+       strlcpy(pmd->name, ifr.ifr_name, RTE_ETH_NAME_MAX_LEN);
+
        if (is_keepalive) {
                /*
                 * Detach the TUN/TAP keep-alive queue
@@ -281,13 +283,27 @@ tap_verify_csum(struct rte_mbuf *mbuf)
                l3_len = 4 * (iph->version_ihl & 0xf);
                if (unlikely(l2_len + l3_len > rte_pktmbuf_data_len(mbuf)))
                        return;
+               /* check that the total length reported by header is not
+                * greater than the total received size
+                */
+               if (l2_len + rte_be_to_cpu_16(iph->total_length) >
+                               rte_pktmbuf_data_len(mbuf))
+                       return;
 
                cksum = ~rte_raw_cksum(iph, l3_len);
                mbuf->ol_flags |= cksum ?
                        PKT_RX_IP_CKSUM_BAD :
                        PKT_RX_IP_CKSUM_GOOD;
        } else if (l3 == RTE_PTYPE_L3_IPV6) {
+               struct ipv6_hdr *iph = l3_hdr;
+
                l3_len = sizeof(struct ipv6_hdr);
+               /* check that the total length reported by header is not
+                * greater than the total received size
+                */
+               if (l2_len + l3_len + rte_be_to_cpu_16(iph->payload_len) >
+                               rte_pktmbuf_data_len(mbuf))
+                       return;
        } else {
                /* IPv6 extensions are not supported */
                return;
@@ -1741,6 +1757,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name,
                TAP_LOG(ERR, "Unable to create %s interface", tuntap_name);
                goto error_exit;
        }
+       TAP_LOG(DEBUG, "allocated %s", pmd->name);
 
        ifr.ifr_mtu = dev->data->mtu;
        if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE) < 0)
@@ -1878,10 +1895,10 @@ set_interface_name(const char *key __rte_unused,
        char *name = (char *)extra_args;
 
        if (value)
-               strlcpy(name, value, RTE_ETH_NAME_MAX_LEN - 1);
+               strlcpy(name, value, RTE_ETH_NAME_MAX_LEN);
        else
-               snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
-                        DEFAULT_TAP_NAME, (tap_unit - 1));
+               /* use tap%d which causes kernel to choose next available */
+               strlcpy(name, DEFAULT_TAP_NAME "%d", RTE_ETH_NAME_MAX_LEN);
 
        return 0;
 }
@@ -1988,8 +2005,8 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
                return 0;
        }
 
-       snprintf(tun_name, sizeof(tun_name), "%s%u",
-                DEFAULT_TUN_NAME, tun_unit++);
+       /* use tun%d which causes kernel to choose next available */
+       strlcpy(tun_name, DEFAULT_TUN_NAME "%d", RTE_ETH_NAME_MAX_LEN);
 
        if (params && (params[0] != '\0')) {
                TAP_LOG(DEBUG, "parameters (%s)", params);
@@ -2009,17 +2026,15 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
        }
        pmd_link.link_speed = ETH_SPEED_NUM_10G;
 
-       TAP_LOG(NOTICE, "Initializing pmd_tun for %s as %s",
-               name, tun_name);
+       TAP_LOG(NOTICE, "Initializing pmd_tun for %s", name);
 
        ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0,
-               ETH_TUNTAP_TYPE_TUN);
+                                ETH_TUNTAP_TYPE_TUN);
 
 leave:
        if (ret == -1) {
                TAP_LOG(ERR, "Failed to create pmd for %s as %s",
                        name, tun_name);
-               tun_unit--; /* Restore the unit number */
        }
        rte_kvargs_free(kvlist);
 
@@ -2175,8 +2190,9 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
        }
 
        speed = ETH_SPEED_NUM_10G;
-       snprintf(tap_name, sizeof(tap_name), "%s%u",
-                DEFAULT_TAP_NAME, tap_unit++);
+
+       /* use tap%d which causes kernel to choose next available */
+       strlcpy(tap_name, DEFAULT_TAP_NAME "%d", RTE_ETH_NAME_MAX_LEN);
        memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN);
 
        if (params && (params[0] != '\0')) {
@@ -2240,7 +2256,6 @@ leave:
                                rte_mp_action_unregister(TAP_MP_KEY);
                        tap_devices_count--;
                }
-               tap_unit--;             /* Restore the unit number */
        }
        rte_kvargs_free(kvlist);
 
index 3c9d036..b478b59 100644 (file)
@@ -116,7 +116,7 @@ error:
 int
 qdisc_add_multiq(int nlsk_fd, uint16_t ifindex)
 {
-       struct tc_multiq_qopt opt;
+       struct tc_multiq_qopt opt = {0};
        struct nlmsg msg;
 
        tc_init_msg(&msg, ifindex, RTM_NEWQDISC,
index 42bdfcb..b2cda04 100644 (file)
@@ -1000,7 +1000,6 @@ eth_dev_close(struct rte_eth_dev *dev)
                for (i = 0; i < dev->data->nb_tx_queues; i++)
                        rte_free(dev->data->tx_queues[i]);
 
-       rte_free(dev->data->mac_addrs);
        free(internal->dev_name);
        free(internal->iface_name);
        rte_free(internal);
index 20816c9..9c8bcd2 100644 (file)
@@ -624,7 +624,7 @@ virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx)
 
                /* Update used ring */
                uep = &vring->used->ring[avail_idx];
-               uep->id = avail_idx;
+               uep->id = desc_idx;
                uep->len = n_descs;
 
                vring->used->idx++;
index 26518ed..2e2abf1 100644 (file)
@@ -336,6 +336,11 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
 static inline int
 virtqueue_kick_prepare(struct virtqueue *vq)
 {
+       /*
+        * Ensure updated avail->idx is visible to vhost before reading
+        * the used->flags.
+        */
+       virtio_mb();
        return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY);
 }
 
index cf85f3d..d30914a 100644 (file)
@@ -50,6 +50,8 @@
 
 #define        VMXNET3_TX_OFFLOAD_MASK ( \
                PKT_TX_VLAN_PKT | \
+               PKT_TX_IPV6 |     \
+               PKT_TX_IPV4 |     \
                PKT_TX_L4_MASK |  \
                PKT_TX_TCP_SEG)
 
index f474442..60621eb 100644 (file)
@@ -313,7 +313,7 @@ rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags)
                qdma_vqs[i].exclusive_hw_queue = 1;
        } else {
                /* Allocate a Ring for Virutal Queue in VQ mode */
-               sprintf(ring_name, "status ring %d", i);
+               snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
                qdma_vqs[i].status_ring = rte_ring_create(ring_name,
                        qdma_dev.fle_pool_count, rte_socket_id(), 0);
                if (!qdma_vqs[i].status_ring) {
index a533dfe..1541b67 100644 (file)
@@ -303,25 +303,25 @@ static struct opae_adapter_ops *match_ops(struct opae_adapter *adapter)
 }
 
 /**
- * opae_adapter_data_alloc - alloc opae_adapter_data data structure
+ * opae_adapter_init - init opae_adapter data structure
+ * @adapter: pointer of opae_adapter data structure
  * @name: adapter name.
  * @data: private data of this adapter.
  *
- * Return: opae_adapter on success, otherwise NULL.
+ * Return: 0 on success.
  */
-struct opae_adapter *opae_adapter_alloc(const char *name, void *data)
+int opae_adapter_init(struct opae_adapter *adapter,
+               const char *name, void *data)
 {
-       struct opae_adapter *adapter = opae_zmalloc(sizeof(*adapter));
-
        if (!adapter)
-               return NULL;
+               return -ENOMEM;
 
        TAILQ_INIT(&adapter->acc_list);
        adapter->data = data;
        adapter->name = name;
        adapter->ops = match_ops(adapter);
 
-       return adapter;
+       return 0;
 }
 
 /**
index 4bbc9df..332e0f3 100644 (file)
@@ -225,7 +225,8 @@ struct opae_adapter {
 void *opae_adapter_data_alloc(enum opae_adapter_type type);
 #define opae_adapter_data_free(data) opae_free(data)
 
-struct opae_adapter *opae_adapter_alloc(const char *name, void *data);
+int opae_adapter_init(struct opae_adapter *adapter,
+               const char *name, void *data);
 #define opae_adapter_free(adapter) opae_free(adapter)
 
 int opae_adapter_enumerate(struct opae_adapter *adapter);
index 32e318f..da772d0 100644 (file)
@@ -409,9 +409,10 @@ ifpga_rawdev_create(struct rte_pci_device *pci_dev,
        data->device_id = pci_dev->id.device_id;
        data->vendor_id = pci_dev->id.vendor_id;
 
+       adapter = rawdev->dev_private;
        /* create a opae_adapter based on above device data */
-       adapter = opae_adapter_alloc(pci_dev->device.name, data);
-       if (!adapter) {
+       ret = opae_adapter_init(adapter, pci_dev->device.name, data);
+       if (ret) {
                ret = -ENOMEM;
                goto free_adapter_data;
        }
@@ -420,12 +421,10 @@ ifpga_rawdev_create(struct rte_pci_device *pci_dev,
        rawdev->device = &pci_dev->device;
        rawdev->driver_name = pci_dev->device.driver->name;
 
-       rawdev->dev_private = adapter;
-
        /* must enumerate the adapter before use it */
        ret = opae_adapter_enumerate(adapter);
        if (ret)
-               goto free_adapter;
+               goto free_adapter_data;
 
        /* get opae_manager to rawdev */
        mgr = opae_adapter_get_mgr(adapter);
@@ -436,9 +435,6 @@ ifpga_rawdev_create(struct rte_pci_device *pci_dev,
 
        return ret;
 
-free_adapter:
-       if (adapter)
-               opae_adapter_free(adapter);
 free_adapter_data:
        if (data)
                opae_adapter_data_free(data);
index b282e68..ef86194 100644 (file)
@@ -220,6 +220,7 @@ bond_port_init(struct rte_mempool *mbuf_pool)
        struct rte_eth_rxconf rxq_conf;
        struct rte_eth_txconf txq_conf;
        struct rte_eth_conf local_port_conf = port_conf;
+       uint16_t wait_counter = 20;
 
        retval = rte_eth_bond_create("net_bonding0", BONDING_MODE_ALB,
                        0 /*SOCKET_ID_ANY*/);
@@ -243,6 +244,13 @@ bond_port_init(struct rte_mempool *mbuf_pool)
                rte_exit(EXIT_FAILURE, "port %u: rte_eth_dev_adjust_nb_rx_tx_desc "
                                "failed (res=%d)\n", BOND_PORT, retval);
 
+       for (i = 0; i < slaves_count; i++) {
+               if (rte_eth_bond_slave_add(BOND_PORT, slaves[i]) == -1)
+                       rte_exit(-1, "Oooops! adding slave (%u) to bond (%u) failed!\n",
+                                       slaves[i], BOND_PORT);
+
+       }
+
        /* RX setup */
        rxq_conf = dev_info.default_rxconf;
        rxq_conf.offloads = local_port_conf.rxmode.offloads;
@@ -263,17 +271,24 @@ bond_port_init(struct rte_mempool *mbuf_pool)
                rte_exit(retval, "port %u: TX queue 0 setup failed (res=%d)",
                                BOND_PORT, retval);
 
-       for (i = 0; i < slaves_count; i++) {
-               if (rte_eth_bond_slave_add(BOND_PORT, slaves[i]) == -1)
-                       rte_exit(-1, "Oooops! adding slave (%u) to bond (%u) failed!\n",
-                                       slaves[i], BOND_PORT);
-
-       }
-
        retval  = rte_eth_dev_start(BOND_PORT);
        if (retval < 0)
                rte_exit(retval, "Start port %d failed (res=%d)", BOND_PORT, retval);
 
+       printf("Waiting for slaves to become active...");
+       while (wait_counter) {
+               uint16_t act_slaves[16] = {0};
+               if (rte_eth_bond_active_slaves_get(BOND_PORT, act_slaves, 16) ==
+                               slaves_count) {
+                       printf("\n");
+                       break;
+               }
+               sleep(1);
+               printf("...");
+               if (--wait_counter == 0)
+                       rte_exit(-1, "\nFailed to activate slaves\n");
+       }
+
        rte_eth_promiscuous_enable(BOND_PORT);
 
        struct ether_addr addr;
index bae7116..1edf6f9 100644 (file)
@@ -2,7 +2,8 @@
  * Copyright 2017 Mellanox Technologies, Ltd
  */
 
-#define MAX_PATTERN_NUM                4
+#define MAX_PATTERN_NUM                3
+#define MAX_ACTION_NUM         2
 
 struct rte_flow *
 generate_ipv4_flow(uint16_t port_id, uint16_t rx_q,
@@ -41,11 +42,9 @@ generate_ipv4_flow(uint16_t port_id, uint16_t rx_q,
 {
        struct rte_flow_attr attr;
        struct rte_flow_item pattern[MAX_PATTERN_NUM];
-       struct rte_flow_action action[MAX_PATTERN_NUM];
+       struct rte_flow_action action[MAX_ACTION_NUM];
        struct rte_flow *flow = NULL;
        struct rte_flow_action_queue queue = { .index = rx_q };
-       struct rte_flow_item_eth eth_spec;
-       struct rte_flow_item_eth eth_mask;
        struct rte_flow_item_ipv4 ip_spec;
        struct rte_flow_item_ipv4 ip_mask;
        int res;
@@ -64,26 +63,19 @@ generate_ipv4_flow(uint16_t port_id, uint16_t rx_q,
         * create the action sequence.
         * one action only,  move packet to queue
         */
-
        action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE;
        action[0].conf = &queue;
        action[1].type = RTE_FLOW_ACTION_TYPE_END;
 
        /*
-        * set the first level of the pattern (eth).
+        * set the first level of the pattern (ETH).
         * since in this example we just want to get the
         * ipv4 we set this level to allow all.
         */
-       memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
-       memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
-       eth_spec.type = 0;
-       eth_mask.type = 0;
        pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
-       pattern[0].spec = &eth_spec;
-       pattern[0].mask = &eth_mask;
 
        /*
-        * setting the third level of the pattern (ip).
+        * setting the second level of the pattern (IP).
         * in this example this is the level we care about
         * so we set it according to the parameters.
         */
index 1bc0b5b..f88fdb4 100644 (file)
@@ -451,38 +451,55 @@ inbound_sp_sa(struct sp_ctx *sp, struct sa_ctx *sa, struct traffic_type *ip,
        ip->num = j;
 }
 
-static inline void
-process_pkts_inbound(struct ipsec_ctx *ipsec_ctx,
-               struct ipsec_traffic *traffic)
+static void
+split46_traffic(struct ipsec_traffic *trf, struct rte_mbuf *mb[], uint32_t num)
 {
+       uint32_t i, n4, n6;
+       struct ip *ip;
        struct rte_mbuf *m;
-       uint16_t idx, nb_pkts_in, i, n_ip4, n_ip6;
 
-       nb_pkts_in = ipsec_inbound(ipsec_ctx, traffic->ipsec.pkts,
-                       traffic->ipsec.num, MAX_PKT_BURST);
+       n4 = trf->ip4.num;
+       n6 = trf->ip6.num;
 
-       n_ip4 = traffic->ip4.num;
-       n_ip6 = traffic->ip6.num;
+       for (i = 0; i < num; i++) {
+
+               m = mb[i];
+               ip = rte_pktmbuf_mtod(m, struct ip *);
 
-       /* SP/ACL Inbound check ipsec and ip4 */
-       for (i = 0; i < nb_pkts_in; i++) {
-               m = traffic->ipsec.pkts[i];
-               struct ip *ip = rte_pktmbuf_mtod(m, struct ip *);
                if (ip->ip_v == IPVERSION) {
-                       idx = traffic->ip4.num++;
-                       traffic->ip4.pkts[idx] = m;
-                       traffic->ip4.data[idx] = rte_pktmbuf_mtod_offset(m,
+                       trf->ip4.pkts[n4] = m;
+                       trf->ip4.data[n4] = rte_pktmbuf_mtod_offset(m,
                                        uint8_t *, offsetof(struct ip, ip_p));
+                       n4++;
                } else if (ip->ip_v == IP6_VERSION) {
-                       idx = traffic->ip6.num++;
-                       traffic->ip6.pkts[idx] = m;
-                       traffic->ip6.data[idx] = rte_pktmbuf_mtod_offset(m,
+                       trf->ip6.pkts[n6] = m;
+                       trf->ip6.data[n6] = rte_pktmbuf_mtod_offset(m,
                                        uint8_t *,
                                        offsetof(struct ip6_hdr, ip6_nxt));
+                       n6++;
                } else
                        rte_pktmbuf_free(m);
        }
 
+       trf->ip4.num = n4;
+       trf->ip6.num = n6;
+}
+
+
+static inline void
+process_pkts_inbound(struct ipsec_ctx *ipsec_ctx,
+               struct ipsec_traffic *traffic)
+{
+       uint16_t nb_pkts_in, n_ip4, n_ip6;
+
+       n_ip4 = traffic->ip4.num;
+       n_ip6 = traffic->ip6.num;
+
+       nb_pkts_in = ipsec_inbound(ipsec_ctx, traffic->ipsec.pkts,
+                       traffic->ipsec.num, MAX_PKT_BURST);
+
+       split46_traffic(traffic, traffic->ipsec.pkts, nb_pkts_in);
+
        inbound_sp_sa(ipsec_ctx->sp4_ctx, ipsec_ctx->sa_ctx, &traffic->ip4,
                        n_ip4);
 
@@ -594,32 +611,45 @@ process_pkts_outbound_nosp(struct ipsec_ctx *ipsec_ctx,
                struct ipsec_traffic *traffic)
 {
        struct rte_mbuf *m;
-       uint32_t nb_pkts_out, i;
+       uint32_t nb_pkts_out, i, n;
        struct ip *ip;
 
        /* Drop any IPsec traffic from protected ports */
        for (i = 0; i < traffic->ipsec.num; i++)
                rte_pktmbuf_free(traffic->ipsec.pkts[i]);
 
-       traffic->ipsec.num = 0;
+       n = 0;
 
-       for (i = 0; i < traffic->ip4.num; i++)
-               traffic->ip4.res[i] = single_sa_idx;
+       for (i = 0; i < traffic->ip4.num; i++) {
+               traffic->ipsec.pkts[n] = traffic->ip4.pkts[i];
+               traffic->ipsec.res[n++] = single_sa_idx;
+       }
 
-       for (i = 0; i < traffic->ip6.num; i++)
-               traffic->ip6.res[i] = single_sa_idx;
+       for (i = 0; i < traffic->ip6.num; i++) {
+               traffic->ipsec.pkts[n] = traffic->ip6.pkts[i];
+               traffic->ipsec.res[n++] = single_sa_idx;
+       }
+
+       traffic->ip4.num = 0;
+       traffic->ip6.num = 0;
+       traffic->ipsec.num = n;
 
-       nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ip4.pkts,
-                       traffic->ip4.res, traffic->ip4.num,
+       nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ipsec.pkts,
+                       traffic->ipsec.res, traffic->ipsec.num,
                        MAX_PKT_BURST);
 
        /* They all sue the same SA (ip4 or ip6 tunnel) */
        m = traffic->ipsec.pkts[i];
        ip = rte_pktmbuf_mtod(m, struct ip *);
-       if (ip->ip_v == IPVERSION)
+       if (ip->ip_v == IPVERSION) {
                traffic->ip4.num = nb_pkts_out;
-       else
+               for (i = 0; i < nb_pkts_out; i++)
+                       traffic->ip4.pkts[i] = traffic->ipsec.pkts[i];
+       } else {
                traffic->ip6.num = nb_pkts_out;
+               for (i = 0; i < nb_pkts_out; i++)
+                       traffic->ip6.pkts[i] = traffic->ipsec.pkts[i];
+       }
 }
 
 static inline int32_t
@@ -777,7 +807,7 @@ process_pkts(struct lcore_conf *qconf, struct rte_mbuf **pkts,
 }
 
 static inline void
-drain_buffers(struct lcore_conf *qconf)
+drain_tx_buffers(struct lcore_conf *qconf)
 {
        struct buffer *buf;
        uint32_t portid;
@@ -791,6 +821,81 @@ drain_buffers(struct lcore_conf *qconf)
        }
 }
 
+static inline void
+drain_crypto_buffers(struct lcore_conf *qconf)
+{
+       uint32_t i;
+       struct ipsec_ctx *ctx;
+
+       /* drain inbound buffers*/
+       ctx = &qconf->inbound;
+       for (i = 0; i != ctx->nb_qps; i++) {
+               if (ctx->tbl[i].len != 0)
+                       enqueue_cop_burst(ctx->tbl  + i);
+       }
+
+       /* drain outbound buffers*/
+       ctx = &qconf->outbound;
+       for (i = 0; i != ctx->nb_qps; i++) {
+               if (ctx->tbl[i].len != 0)
+                       enqueue_cop_burst(ctx->tbl  + i);
+       }
+}
+
+static void
+drain_inbound_crypto_queues(const struct lcore_conf *qconf,
+               struct ipsec_ctx *ctx)
+{
+       uint32_t n;
+       struct ipsec_traffic trf;
+
+       /* dequeue packets from crypto-queue */
+       n = ipsec_inbound_cqp_dequeue(ctx, trf.ipsec.pkts,
+                       RTE_DIM(trf.ipsec.pkts));
+       if (n == 0)
+               return;
+
+       trf.ip4.num = 0;
+       trf.ip6.num = 0;
+
+       /* split traffic by ipv4-ipv6 */
+       split46_traffic(&trf, trf.ipsec.pkts, n);
+
+       /* process ipv4 packets */
+       inbound_sp_sa(ctx->sp4_ctx, ctx->sa_ctx, &trf.ip4, 0);
+       route4_pkts(qconf->rt4_ctx, trf.ip4.pkts, trf.ip4.num);
+
+       /* process ipv6 packets */
+       inbound_sp_sa(ctx->sp6_ctx, ctx->sa_ctx, &trf.ip6, 0);
+       route6_pkts(qconf->rt6_ctx, trf.ip6.pkts, trf.ip6.num);
+}
+
+static void
+drain_outbound_crypto_queues(const struct lcore_conf *qconf,
+               struct ipsec_ctx *ctx)
+{
+       uint32_t n;
+       struct ipsec_traffic trf;
+
+       /* dequeue packets from crypto-queue */
+       n = ipsec_outbound_cqp_dequeue(ctx, trf.ipsec.pkts,
+                       RTE_DIM(trf.ipsec.pkts));
+       if (n == 0)
+               return;
+
+       trf.ip4.num = 0;
+       trf.ip6.num = 0;
+
+       /* split traffic by ipv4-ipv6 */
+       split46_traffic(&trf, trf.ipsec.pkts, n);
+
+       /* process ipv4 packets */
+       route4_pkts(qconf->rt4_ctx, trf.ip4.pkts, trf.ip4.num);
+
+       /* process ipv6 packets */
+       route6_pkts(qconf->rt6_ctx, trf.ip6.pkts, trf.ip6.num);
+}
+
 /* main processing loop */
 static int32_t
 main_loop(__attribute__((unused)) void *dummy)
@@ -848,12 +953,14 @@ main_loop(__attribute__((unused)) void *dummy)
                diff_tsc = cur_tsc - prev_tsc;
 
                if (unlikely(diff_tsc > drain_tsc)) {
-                       drain_buffers(qconf);
+                       drain_tx_buffers(qconf);
+                       drain_crypto_buffers(qconf);
                        prev_tsc = cur_tsc;
                }
 
-               /* Read packet from RX queues */
                for (i = 0; i < qconf->nb_rx_queue; ++i) {
+
+                       /* Read packets from RX queues */
                        portid = rxql[i].port_id;
                        queueid = rxql[i].queue_id;
                        nb_rx = rte_eth_rx_burst(portid, queueid,
@@ -861,6 +968,14 @@ main_loop(__attribute__((unused)) void *dummy)
 
                        if (nb_rx > 0)
                                process_pkts(qconf, pkts, nb_rx, portid);
+
+                       /* dequeue and process completed crypto-ops */
+                       if (UNPROTECTED_PORT(portid))
+                               drain_inbound_crypto_queues(qconf,
+                                       &qconf->inbound);
+                       else
+                               drain_outbound_crypto_queues(qconf,
+                                       &qconf->outbound);
                }
        }
 }
index 3d415f1..72a29bc 100644 (file)
@@ -333,33 +333,35 @@ flow_create_failure:
        return 0;
 }
 
+/*
+ * queue crypto-ops into PMD queue.
+ */
+void
+enqueue_cop_burst(struct cdev_qp *cqp)
+{
+       uint32_t i, len, ret;
+
+       len = cqp->len;
+       ret = rte_cryptodev_enqueue_burst(cqp->id, cqp->qp, cqp->buf, len);
+       if (ret < len) {
+               RTE_LOG_DP(DEBUG, IPSEC, "Cryptodev %u queue %u:"
+                       " enqueued %u crypto ops out of %u\n",
+                       cqp->id, cqp->qp, ret, len);
+                       /* drop packets that we fail to enqueue */
+                       for (i = ret; i < len; i++)
+                               rte_pktmbuf_free(cqp->buf[i]->sym->m_src);
+       }
+       cqp->in_flight += ret;
+       cqp->len = 0;
+}
+
 static inline void
 enqueue_cop(struct cdev_qp *cqp, struct rte_crypto_op *cop)
 {
-       int32_t ret = 0, i;
-
        cqp->buf[cqp->len++] = cop;
 
-       if (cqp->len == MAX_PKT_BURST) {
-               int enq_size = cqp->len;
-               if ((cqp->in_flight + enq_size) > MAX_INFLIGHT)
-                       enq_size -=
-                           (int)((cqp->in_flight + enq_size) - MAX_INFLIGHT);
-
-               if (enq_size > 0)
-                       ret = rte_cryptodev_enqueue_burst(cqp->id, cqp->qp,
-                                       cqp->buf, enq_size);
-               if (ret < cqp->len) {
-                       RTE_LOG_DP(DEBUG, IPSEC, "Cryptodev %u queue %u:"
-                                       " enqueued %u crypto ops out of %u\n",
-                                        cqp->id, cqp->qp,
-                                        ret, cqp->len);
-                       for (i = ret; i < cqp->len; i++)
-                               rte_pktmbuf_free(cqp->buf[i]->sym->m_src);
-               }
-               cqp->in_flight += ret;
-               cqp->len = 0;
-       }
+       if (cqp->len == MAX_PKT_BURST)
+               enqueue_cop_burst(cqp);
 }
 
 static inline void
@@ -473,6 +475,32 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
        }
 }
 
+static inline int32_t
+ipsec_inline_dequeue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
+             struct rte_mbuf *pkts[], uint16_t max_pkts)
+{
+       int32_t nb_pkts, ret;
+       struct ipsec_mbuf_metadata *priv;
+       struct ipsec_sa *sa;
+       struct rte_mbuf *pkt;
+
+       nb_pkts = 0;
+       while (ipsec_ctx->ol_pkts_cnt > 0 && nb_pkts < max_pkts) {
+               pkt = ipsec_ctx->ol_pkts[--ipsec_ctx->ol_pkts_cnt];
+               rte_prefetch0(pkt);
+               priv = get_priv(pkt);
+               sa = priv->sa;
+               ret = xform_func(pkt, sa, &priv->cop);
+               if (unlikely(ret)) {
+                       rte_pktmbuf_free(pkt);
+                       continue;
+               }
+               pkts[nb_pkts++] = pkt;
+       }
+
+       return nb_pkts;
+}
+
 static inline int
 ipsec_dequeue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
              struct rte_mbuf *pkts[], uint16_t max_pkts)
@@ -490,19 +518,6 @@ ipsec_dequeue(ipsec_xform_fn xform_func, struct ipsec_ctx *ipsec_ctx,
                if (ipsec_ctx->last_qp == ipsec_ctx->nb_qps)
                        ipsec_ctx->last_qp %= ipsec_ctx->nb_qps;
 
-               while (ipsec_ctx->ol_pkts_cnt > 0 && nb_pkts < max_pkts) {
-                       pkt = ipsec_ctx->ol_pkts[--ipsec_ctx->ol_pkts_cnt];
-                       rte_prefetch0(pkt);
-                       priv = get_priv(pkt);
-                       sa = priv->sa;
-                       ret = xform_func(pkt, sa, &priv->cop);
-                       if (unlikely(ret)) {
-                               rte_pktmbuf_free(pkt);
-                               continue;
-                       }
-                       pkts[nb_pkts++] = pkt;
-               }
-
                if (cqp->in_flight == 0)
                        continue;
 
@@ -545,6 +560,13 @@ ipsec_inbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
 
        ipsec_enqueue(esp_inbound, ctx, pkts, sas, nb_pkts);
 
+       return ipsec_inline_dequeue(esp_inbound_post, ctx, pkts, len);
+}
+
+uint16_t
+ipsec_inbound_cqp_dequeue(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+               uint16_t len)
+{
        return ipsec_dequeue(esp_inbound_post, ctx, pkts, len);
 }
 
@@ -558,5 +580,12 @@ ipsec_outbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
 
        ipsec_enqueue(esp_outbound, ctx, pkts, sas, nb_pkts);
 
+       return ipsec_inline_dequeue(esp_outbound_post, ctx, pkts, len);
+}
+
+uint16_t
+ipsec_outbound_cqp_dequeue(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+               uint16_t len)
+{
        return ipsec_dequeue(esp_outbound_post, ctx, pkts, len);
 }
index c998c80..508d87a 100644 (file)
@@ -182,6 +182,14 @@ uint16_t
 ipsec_outbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
                uint32_t sa_idx[], uint16_t nb_pkts, uint16_t len);
 
+uint16_t
+ipsec_inbound_cqp_dequeue(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+               uint16_t len);
+
+uint16_t
+ipsec_outbound_cqp_dequeue(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+               uint16_t len);
+
 static inline uint16_t
 ipsec_metadata_size(void)
 {
@@ -239,4 +247,7 @@ sa_init(struct socket_ctx *ctx, int32_t socket_id);
 void
 rt_init(struct socket_ctx *ctx, int32_t socket_id);
 
+void
+enqueue_cop_burst(struct cdev_qp *cqp);
+
 #endif /* __IPSEC_H__ */
index d2d3550..640f1d7 100644 (file)
@@ -947,10 +947,15 @@ int
 inbound_sa_check(struct sa_ctx *sa_ctx, struct rte_mbuf *m, uint32_t sa_idx)
 {
        struct ipsec_mbuf_metadata *priv;
+       struct ipsec_sa *sa;
 
        priv = get_priv(m);
+       sa = priv->sa;
+       if (sa != NULL)
+               return (sa_ctx->sa[sa_idx].spi == sa->spi);
 
-       return (sa_ctx->sa[sa_idx].spi == priv->sa->spi);
+       RTE_LOG(ERR, IPSEC, "SA not saved in private data\n");
+       return 0;
 }
 
 static inline void
index 8d3d3d8..6b05daa 100644 (file)
@@ -44,7 +44,7 @@ enum {
        RTE_ACL_IPV4_NUM
 };
 
-struct rte_acl_field_def ip4_defs[NUM_FIELDS_IPV4] = {
+static struct rte_acl_field_def ip4_defs[NUM_FIELDS_IPV4] = {
        {
        .type = RTE_ACL_FIELD_TYPE_BITMASK,
        .size = sizeof(uint8_t),
@@ -85,11 +85,11 @@ struct rte_acl_field_def ip4_defs[NUM_FIELDS_IPV4] = {
 
 RTE_ACL_RULE_DEF(acl4_rules, RTE_DIM(ip4_defs));
 
-struct acl4_rules acl4_rules_out[MAX_ACL_RULE_NUM];
-uint32_t nb_acl4_rules_out;
+static struct acl4_rules acl4_rules_out[MAX_ACL_RULE_NUM];
+static uint32_t nb_acl4_rules_out;
 
-struct acl4_rules acl4_rules_in[MAX_ACL_RULE_NUM];
-uint32_t nb_acl4_rules_in;
+static struct acl4_rules acl4_rules_in[MAX_ACL_RULE_NUM];
+static uint32_t nb_acl4_rules_in;
 
 void
 parse_sp4_tokens(char **tokens, uint32_t n_tokens,
index 6002afe..dc5b94c 100644 (file)
@@ -34,7 +34,7 @@ enum {
 
 #define IP6_ADDR_SIZE 16
 
-struct rte_acl_field_def ip6_defs[IP6_NUM] = {
+static struct rte_acl_field_def ip6_defs[IP6_NUM] = {
        {
        .type = RTE_ACL_FIELD_TYPE_BITMASK,
        .size = sizeof(uint8_t),
@@ -116,11 +116,11 @@ struct rte_acl_field_def ip6_defs[IP6_NUM] = {
 
 RTE_ACL_RULE_DEF(acl6_rules, RTE_DIM(ip6_defs));
 
-struct acl6_rules acl6_rules_out[MAX_ACL_RULE_NUM];
-uint32_t nb_acl6_rules_out;
+static struct acl6_rules acl6_rules_out[MAX_ACL_RULE_NUM];
+static uint32_t nb_acl6_rules_out;
 
-struct acl6_rules acl6_rules_in[MAX_ACL_RULE_NUM];
-uint32_t nb_acl6_rules_in;
+static struct acl6_rules acl6_rules_in[MAX_ACL_RULE_NUM];
+static uint32_t nb_acl6_rules_in;
 
 void
 parse_sp6_tokens(char **tokens, uint32_t n_tokens,
index e37b1ad..a58774a 100644 (file)
@@ -132,6 +132,7 @@ static int kni_config_network_interface(uint16_t port_id, uint8_t if_up);
 static int kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[]);
 
 static rte_atomic32_t kni_stop = RTE_ATOMIC32_INIT(0);
+static rte_atomic32_t kni_pause = RTE_ATOMIC32_INIT(0);
 
 /* Print out statistics on packets handled */
 static void
@@ -276,6 +277,7 @@ main_loop(__rte_unused void *arg)
 {
        uint16_t i;
        int32_t f_stop;
+       int32_t f_pause;
        const unsigned lcore_id = rte_lcore_id();
        enum lcore_rxtx {
                LCORE_NONE,
@@ -304,8 +306,11 @@ main_loop(__rte_unused void *arg)
                                        kni_port_params_array[i]->port_id);
                while (1) {
                        f_stop = rte_atomic32_read(&kni_stop);
+                       f_pause = rte_atomic32_read(&kni_pause);
                        if (f_stop)
                                break;
+                       if (f_pause)
+                               continue;
                        kni_ingress(kni_port_params_array[i]);
                }
        } else if (flag == LCORE_TX) {
@@ -314,8 +319,11 @@ main_loop(__rte_unused void *arg)
                                        kni_port_params_array[i]->port_id);
                while (1) {
                        f_stop = rte_atomic32_read(&kni_stop);
+                       f_pause = rte_atomic32_read(&kni_pause);
                        if (f_stop)
                                break;
+                       if (f_pause)
+                               continue;
                        kni_egress(kni_port_params_array[i]);
                }
        } else
@@ -807,12 +815,16 @@ kni_config_network_interface(uint16_t port_id, uint8_t if_up)
        RTE_LOG(INFO, APP, "Configure network interface of %d %s\n",
                                        port_id, if_up ? "up" : "down");
 
+       rte_atomic32_inc(&kni_pause);
+
        if (if_up != 0) { /* Configure network interface up */
                rte_eth_dev_stop(port_id);
                ret = rte_eth_dev_start(port_id);
        } else /* Configure network interface down */
                rte_eth_dev_stop(port_id);
 
+       rte_atomic32_dec(&kni_pause);
+
        if (ret < 0)
                RTE_LOG(ERR, APP, "Failed to start port %d\n", port_id);
 
index 7795d08..d6379e3 100644 (file)
@@ -71,9 +71,6 @@
 
 #define INVALID_PORT_ID 0xFFFF
 
-/* Size of buffers used for snprintfs. */
-#define MAX_PRINT_BUFF 6072
-
 /* Maximum character device basename size. */
 #define MAX_BASENAME_SZ 20
 
index dc9ea10..2261f08 100644 (file)
@@ -375,11 +375,19 @@ port_init(uint16_t port)
 static int
 us_vhost_parse_socket_path(const char *q_arg)
 {
+       char *old;
+
        /* parse number string */
        if (strnlen(q_arg, PATH_MAX) == PATH_MAX)
                return -1;
 
+       old = socket_files;
        socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1));
+       if (socket_files == NULL) {
+               free(old);
+               return -1;
+       }
+
        snprintf(socket_files + nb_sockets * PATH_MAX, PATH_MAX, "%s", q_arg);
        nb_sockets++;
 
@@ -1220,7 +1228,7 @@ destroy_device(int vid)
 
 /*
  * A new device is added to a data core. First the device is added to the main linked list
- * and the allocated to a specific data core.
+ * and then allocated to a specific data core.
  */
 static int
 new_device(int vid)
index f08babd..3deb526 100644 (file)
@@ -503,11 +503,12 @@ main(int argc, char *argv[])
                        if (strstr(dev_info.driver_name,
                                RTE_STR(VHOST_CRYPTO_CDEV_NAME_AESNI_MB_PMD)) ||
                                strstr(dev_info.driver_name,
-                               RTE_STR(VHOST_CRYPTO_CDEV_NAME_AESNI_GCM_PMD)))
-                       RTE_LOG(ERR, USER1, "Cannot enable zero-copy in %s\n",
+                               RTE_STR(VHOST_CRYPTO_CDEV_NAME_AESNI_GCM_PMD))) {
+                               RTE_LOG(ERR, USER1, "Cannot enable zero-copy in %s\n",
                                        dev_info.driver_name);
-                       ret = -EPERM;
-                       goto error_exit;
+                               ret = -EPERM;
+                               goto error_exit;
+                       }
                }
 
                if (dev_info.max_nb_queue_pairs < info->qid + 1) {
index 5aadebb..3c575c7 100644 (file)
 #undef NET_NAME_UNKNOWN
 #endif
 
+/*
+ * RHEL has two different version with different kernel version:
+ * 3.10 is for AMD, Intel, IBM POWER7 and POWER8;
+ * 4.14 is for ARM and IBM POWER9
+ */
 #if (defined(RHEL_RELEASE_CODE) && \
-       (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)))
+       (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \
+       (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0)) && \
+       (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)))
 #define ndo_change_mtu ndo_change_mtu_rh74
 #endif
 
index af378d2..0b4faea 100644 (file)
@@ -2207,7 +2207,12 @@ static int igb_ndo_fdb_dump(struct sk_buff *skb,
 #ifdef HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS
 static int igb_ndo_bridge_setlink(struct net_device *dev,
                                  struct nlmsghdr *nlh,
+#ifdef HAVE_NDO_BRIDGE_SETLINK_EXTACK
+                                 u16 flags, struct netlink_ext_ack *extack)
+#else
                                  u16 flags)
+#endif
+
 #else
 static int igb_ndo_bridge_setlink(struct net_device *dev,
                                  struct nlmsghdr *nlh)
index ae1b530..11b15f3 100644 (file)
@@ -3930,7 +3930,9 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 #endif
 
 #if (defined(RHEL_RELEASE_CODE) && \
-       (RHEL_RELEASE_VERSION(7, 5) <= RHEL_RELEASE_CODE))
+       (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \
+       (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0)) && \
+       (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0)))
 #define ndo_change_mtu ndo_change_mtu_rh74
 #endif
 
@@ -3938,6 +3940,11 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
 #define HAVE_PCI_ENABLE_MSIX
 #endif
 
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(5,0,0) )
+#define dev_open(x) dev_open(x, NULL)
+#define HAVE_NDO_BRIDGE_SETLINK_EXTACK
+#endif /* >= 5.0.0 */
+
 #if defined(timer_setup) && defined(from_timer)
 #define HAVE_TIMER_SETUP
 #endif
index 7c7d6c3..419fd1f 100644 (file)
@@ -3125,6 +3125,10 @@ static inline int __kc_pci_vfs_assigned(struct pci_dev *dev)
 #define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops))
 #endif /* >= 3.16.0 */
 
+#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(5,0,0) )
+#define dev_open(x) dev_open(x, NULL)
+#endif /* >= 5.0.0 */
+
 /*
  * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4)
  * For older kernels backported this commit, need to use renamed functions.
index 395ce29..ea306d5 100644 (file)
@@ -310,7 +310,7 @@ struct rte_comp_op {
        struct rte_mbuf *m_src;
        /**< source mbuf
         * The total size of the input buffer(s) can be retrieved using
-        * rte_pktmbuf_data_len(m_src). The max data size which can fit in a
+        * rte_pktmbuf_pkt_len(m_src). The max data size which can fit in a
         * single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1.
         * If the input data is bigger than this it can be passed to the PMD in
         * a chain of mbufs if the PMD's capabilities indicate it supports this.
@@ -318,7 +318,7 @@ struct rte_comp_op {
        struct rte_mbuf *m_dst;
        /**< destination mbuf
         * The total size of the output buffer(s) can be retrieved using
-        * rte_pktmbuf_data_len(m_dst). The max data size which can fit in a
+        * rte_pktmbuf_pkt_len(m_dst). The max data size which can fit in a
         * single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1.
         * If the output data is expected to be bigger than this a chain of
         * mbufs can be passed to the PMD if the PMD's capabilities indicate
index b8152a7..f01495e 100644 (file)
@@ -115,7 +115,7 @@ eal_create_runtime_dir(void)
 
        /* create prefix-specific subdirectory under DPDK runtime dir */
        ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
-                       tmp, internal_config.hugefile_prefix);
+                       tmp, eal_get_hugefile_prefix());
        if (ret < 0 || ret == sizeof(runtime_dir)) {
                RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
                return -1;
@@ -141,6 +141,16 @@ eal_create_runtime_dir(void)
        return 0;
 }
 
+int
+eal_clean_runtime_dir(void)
+{
+       /* FreeBSD doesn't need this implemented for now, because, unlike Linux,
+        * FreeBSD doesn't create per-process files, so no need to clean up.
+        */
+       return 0;
+}
+
+
 const char *
 rte_eal_get_runtime_dir(void)
 {
@@ -447,9 +457,21 @@ eal_parse_args(int argc, char **argv)
 
                switch (opt) {
                case OPT_MBUF_POOL_OPS_NAME_NUM:
-                       internal_config.user_mbuf_pool_ops_name =
-                           strdup(optarg);
+               {
+                       char *ops_name = strdup(optarg);
+                       if (ops_name == NULL)
+                               RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
+                       else {
+                               /* free old ops name */
+                               if (internal_config.user_mbuf_pool_ops_name !=
+                                               NULL)
+                                       free(internal_config.user_mbuf_pool_ops_name);
+
+                               internal_config.user_mbuf_pool_ops_name =
+                                               ops_name;
+                       }
                        break;
+               }
                case 'h':
                        eal_usage(prgname);
                        exit(EXIT_SUCCESS);
@@ -807,6 +829,18 @@ rte_eal_init(int argc, char **argv)
                return -1;
        }
 
+       /*
+        * Clean up unused files in runtime directory. We do this at the end of
+        * init and not at the beginning because we want to clean stuff up
+        * whether we are primary or secondary process, but we cannot remove
+        * primary process' files because secondary should be able to run even
+        * if primary process is dead.
+        */
+       if (eal_clean_runtime_dir() < 0) {
+               rte_eal_init_alert("Cannot clear runtime directory\n");
+               return -1;
+       }
+
        rte_eal_mcfg_complete();
 
        /* Call each registered callback, if enabled */
@@ -819,6 +853,8 @@ int __rte_experimental
 rte_eal_cleanup(void)
 {
        rte_service_finalize();
+       rte_mp_channel_cleanup();
+       eal_cleanup_config(&internal_config);
        return 0;
 }
 
index d47ea49..999ba24 100644 (file)
@@ -704,6 +704,12 @@ rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
                return -1;
        }
 
+       /* segment fd API is not supported for external segments */
+       if (msl->external) {
+               rte_errno = ENOTSUP;
+               return -1;
+       }
+
        ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
        if (ret < 0) {
                rte_errno = -ret;
@@ -754,6 +760,12 @@ rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
                return -1;
        }
 
+       /* segment fd API is not supported for external segments */
+       if (msl->external) {
+               rte_errno = ENOTSUP;
+               return -1;
+       }
+
        ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
        if (ret < 0) {
                rte_errno = -ret;
index b7081af..664df5b 100644 (file)
@@ -365,6 +365,7 @@ int
 rte_eal_memzone_init(void)
 {
        struct rte_mem_config *mcfg;
+       int ret = 0;
 
        /* get pointer to global configuration */
        mcfg = rte_eal_get_configuration()->mem_config;
@@ -375,17 +376,16 @@ rte_eal_memzone_init(void)
                        rte_fbarray_init(&mcfg->memzones, "memzone",
                        RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
                RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
-               return -1;
+               ret = -1;
        } else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
                        rte_fbarray_attach(&mcfg->memzones)) {
                RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n");
-               rte_rwlock_write_unlock(&mcfg->mlock);
-               return -1;
+               ret = -1;
        }
 
        rte_rwlock_write_unlock(&mcfg->mlock);
 
-       return 0;
+       return ret;
 }
 
 /* Walk all reserved memory zones */
index e31eca5..f6dfbc7 100644 (file)
@@ -168,6 +168,14 @@ eal_option_device_parse(void)
        return ret;
 }
 
+const char *
+eal_get_hugefile_prefix(void)
+{
+       if (internal_config.hugefile_prefix != NULL)
+               return internal_config.hugefile_prefix;
+       return HUGEFILE_PREFIX_DEFAULT;
+}
+
 void
 eal_reset_internal_config(struct internal_config *internal_cfg)
 {
@@ -176,7 +184,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
        internal_cfg->memory = 0;
        internal_cfg->force_nrank = 0;
        internal_cfg->force_nchannel = 0;
-       internal_cfg->hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
+       internal_cfg->hugefile_prefix = NULL;
        internal_cfg->hugepage_dir = NULL;
        internal_cfg->force_sockets = 0;
        /* zero out the NUMA config */
@@ -591,7 +599,9 @@ eal_parse_corelist(const char *corelist)
                if (*corelist == '\0')
                        return -1;
                errno = 0;
-               idx = strtoul(corelist, &end, 10);
+               idx = strtol(corelist, &end, 10);
+               if (idx < 0 || idx >= (int)cfg->lcore_count)
+                       return -1;
                if (errno || end == NULL)
                        return -1;
                while (isblank(*end))
@@ -1102,6 +1112,7 @@ eal_parse_common_option(int opt, const char *optarg,
 {
        static int b_used;
        static int w_used;
+       struct rte_config *cfg = rte_eal_get_configuration();
 
        switch (opt) {
        /* blacklist */
@@ -1144,7 +1155,9 @@ eal_parse_common_option(int opt, const char *optarg,
        /* corelist */
        case 'l':
                if (eal_parse_corelist(optarg) < 0) {
-                       RTE_LOG(ERR, EAL, "invalid core list\n");
+                       RTE_LOG(ERR, EAL,
+                               "invalid core list, please check core numbers are in [0, %u] range\n",
+                                       cfg->lcore_count-1);
                        return -1;
                }
 
@@ -1346,6 +1359,19 @@ eal_auto_detect_cores(struct rte_config *cfg)
        cfg->lcore_count -= removed;
 }
 
+int
+eal_cleanup_config(struct internal_config *internal_cfg)
+{
+       if (internal_cfg->hugefile_prefix != NULL)
+               free(internal_cfg->hugefile_prefix);
+       if (internal_cfg->hugepage_dir != NULL)
+               free(internal_cfg->hugepage_dir);
+       if (internal_cfg->user_mbuf_pool_ops_name != NULL)
+               free(internal_cfg->user_mbuf_pool_ops_name);
+
+       return 0;
+}
+
 int
 eal_adjust_config(struct internal_config *internal_cfg)
 {
@@ -1361,6 +1387,8 @@ eal_adjust_config(struct internal_config *internal_cfg)
        /* default master lcore is the first one */
        if (!master_lcore_parsed) {
                cfg->master_lcore = rte_get_next_lcore(-1, 0, 0);
+               if (cfg->master_lcore >= RTE_MAX_LCORE)
+                       return -1;
                lcore_config[cfg->master_lcore].core_role = ROLE_RTE;
        }
 
@@ -1386,7 +1414,22 @@ eal_check_common_options(struct internal_config *internal_cfg)
                RTE_LOG(ERR, EAL, "Invalid process type specified\n");
                return -1;
        }
-       if (index(internal_cfg->hugefile_prefix, '%') != NULL) {
+       if (internal_cfg->hugefile_prefix != NULL &&
+                       strlen(internal_cfg->hugefile_prefix) < 1) {
+               RTE_LOG(ERR, EAL, "Invalid length of --" OPT_FILE_PREFIX " option\n");
+               return -1;
+       }
+       if (internal_cfg->hugepage_dir != NULL &&
+                       strlen(internal_cfg->hugepage_dir) < 1) {
+               RTE_LOG(ERR, EAL, "Invalid length of --" OPT_HUGE_DIR" option\n");
+               return -1;
+       }
+       if (internal_cfg->user_mbuf_pool_ops_name != NULL &&
+                       strlen(internal_cfg->user_mbuf_pool_ops_name) < 1) {
+               RTE_LOG(ERR, EAL, "Invalid length of --" OPT_MBUF_POOL_OPS_NAME" option\n");
+               return -1;
+       }
+       if (index(eal_get_hugefile_prefix(), '%') != NULL) {
                RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" "
                        "option\n");
                return -1;
index 1c3f09a..b46d644 100644 (file)
@@ -37,6 +37,7 @@ static int mp_fd = -1;
 static char mp_filter[PATH_MAX];   /* Filter for secondary process sockets */
 static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */
 static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER;
+static char peer_name[PATH_MAX];
 
 struct action_entry {
        TAILQ_ENTRY(action_entry) next;
@@ -511,9 +512,9 @@ async_reply_handle(void *arg)
 static int
 open_socket_fd(void)
 {
-       char peer_name[PATH_MAX] = {0};
        struct sockaddr_un un;
 
+       peer_name[0] = '\0';
        if (rte_eal_process_type() == RTE_PROC_SECONDARY)
                snprintf(peer_name, sizeof(peer_name),
                                "%d_%"PRIx64, getpid(), rte_rdtsc());
@@ -542,27 +543,17 @@ open_socket_fd(void)
        return mp_fd;
 }
 
-static int
-unlink_sockets(const char *filter)
+static void
+close_socket_fd(void)
 {
-       int dir_fd;
-       DIR *mp_dir;
-       struct dirent *ent;
-
-       mp_dir = opendir(mp_dir_path);
-       if (!mp_dir) {
-               RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
-               return -1;
-       }
-       dir_fd = dirfd(mp_dir);
+       char path[PATH_MAX];
 
-       while ((ent = readdir(mp_dir))) {
-               if (fnmatch(filter, ent->d_name, 0) == 0)
-                       unlinkat(dir_fd, ent->d_name, 0);
-       }
+       if (mp_fd < 0)
+               return;
 
-       closedir(mp_dir);
-       return 0;
+       close(mp_fd);
+       create_socket_path(peer_name, path, sizeof(path));
+       unlink(path);
 }
 
 int
@@ -603,13 +594,6 @@ rte_mp_channel_init(void)
                return -1;
        }
 
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
-                       unlink_sockets(mp_filter)) {
-               RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
-               close(dir_fd);
-               return -1;
-       }
-
        if (open_socket_fd() < 0) {
                close(dir_fd);
                return -1;
@@ -632,6 +616,12 @@ rte_mp_channel_init(void)
        return 0;
 }
 
+void
+rte_mp_channel_cleanup(void)
+{
+       close_socket_fd();
+}
+
 /**
  * Return -1, as fail to send message and it's caused by the local side.
  * Return 0, as fail to send message and it's caused by the remote side.
index 6e0331f..89a3add 100644 (file)
 int
 eal_create_runtime_dir(void);
 
+int
+eal_clean_runtime_dir(void);
+
+/** Function to return hugefile prefix that's currently set up */
+const char *
+eal_get_hugefile_prefix(void);
+
 #define RUNTIME_CONFIG_FNAME "config"
 static inline const char *
 eal_runtime_config_path(void)
@@ -86,7 +93,7 @@ static inline const char *
 eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
 {
        snprintf(buffer, buflen, HUGEFILE_FMT, hugedir,
-                       internal_config.hugefile_prefix, f_id);
+                       eal_get_hugefile_prefix(), f_id);
        buffer[buflen - 1] = '\0';
        return buffer;
 }
index 737f17e..783ce7d 100644 (file)
@@ -64,9 +64,9 @@ struct internal_config {
        volatile int syslog_facility;     /**< facility passed to openlog() */
        /** default interrupt mode for VFIO */
        volatile enum rte_intr_mode vfio_intr_mode;
-       const char *hugefile_prefix;      /**< the base filename of hugetlbfs files */
-       const char *hugepage_dir;         /**< specific hugetlbfs directory to use */
-       const char *user_mbuf_pool_ops_name;
+       char *hugefile_prefix;      /**< the base filename of hugetlbfs files */
+       char *hugepage_dir;         /**< specific hugetlbfs directory to use */
+       char *user_mbuf_pool_ops_name;
                        /**< user defined mbuf pool ops name */
        unsigned num_hugepage_sizes;      /**< how many sizes on this system */
        struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
index 5271f94..327c95e 100644 (file)
@@ -75,6 +75,7 @@ int eal_parse_common_option(int opt, const char *argv,
                            struct internal_config *conf);
 int eal_option_device_parse(void);
 int eal_adjust_config(struct internal_config *internal_cfg);
+int eal_cleanup_config(struct internal_config *internal_cfg);
 int eal_check_common_options(struct internal_config *internal_cfg);
 void eal_common_usage(void);
 enum rte_proc_type_t eal_proc_type_detect(void);
index 442c6dc..4f48383 100644 (file)
@@ -255,9 +255,13 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
  *   0 on success;
  *   (<0) on failure.
  */
-
 int rte_mp_channel_init(void);
 
+/**
+ * Primary/secondary communication cleanup.
+ */
+void rte_mp_channel_cleanup(void);
+
 /**
  * @internal
  * Parse a device string and store its information in an
index 070e2e0..9d610a8 100644 (file)
@@ -208,6 +208,8 @@ handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
        ret = rte_eal_alarm_set(1, __handle_secondary_request, bundle);
        if (ret != 0) {
                RTE_LOG(ERR, EAL, "failed to add mp task\n");
+               free(bundle->peer);
+               free(bundle);
                return send_response_to_secondary(req, ret, peer);
        }
        return 0;
@@ -332,6 +334,8 @@ handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
         */
        ret = rte_eal_alarm_set(1, __handle_primary_request, bundle);
        if (ret != 0) {
+               free(bundle->peer);
+               free(bundle);
                resp->result = ret;
                ret = rte_mp_reply(&mp_resp, peer);
                if  (ret != 0) {
index b99ba46..4afd1ac 100644 (file)
@@ -212,7 +212,7 @@ rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val);
 static inline uint16_t
 rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
 {
-#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+#if defined(__clang__)
        return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
 #else
        return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
@@ -495,7 +495,7 @@ rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val);
 static inline uint32_t
 rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
 {
-#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+#if defined(__clang__)
        return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
 #else
        return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
@@ -777,7 +777,7 @@ rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val);
 static inline uint64_t
 rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
 {
-#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+#if defined(__clang__)
        return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
 #else
        return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST);
index 7249e6a..54a1246 100644 (file)
@@ -251,6 +251,9 @@ rte_malloc_validate(const void *ptr, size_t *size);
 /**
  * Get heap statistics for the specified heap.
  *
+ * @note This function is not thread-safe with respect to
+ *    ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions.
+ *
  * @param socket
  *   An unsigned integer specifying the socket to get heap statistics for
  * @param socket_stats
@@ -282,9 +285,9 @@ rte_malloc_get_socket_stats(int socket,
  * @param heap_name
  *   Name of the heap to add memory chunk to
  * @param va_addr
- *   Start of virtual area to add to the heap
+ *   Start of virtual area to add to the heap. Must be aligned by ``page_sz``.
  * @param len
- *   Length of virtual area to add to the heap
+ *   Length of virtual area to add to the heap. Must be aligned by ``page_sz``.
  * @param iova_addrs
  *   Array of page IOVA addresses corresponding to each page in this memory
  *   area. Can be NULL, in which case page IOVA addresses will be set to
@@ -461,6 +464,9 @@ rte_malloc_heap_socket_is_external(int socket_id);
  * Dump for the specified type to a file. If the type argument is
  * NULL, all memory types will be dumped.
  *
+ * @note This function is not thread-safe with respect to
+ *    ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions.
+ *
  * @param f
  *   A pointer to a file for output
  * @param type
@@ -473,6 +479,9 @@ rte_malloc_dump_stats(FILE *f, const char *type);
 /**
  * Dump contents of all malloc heaps to a file.
  *
+ * @note This function is not thread-safe with respect to
+ *    ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions.
+ *
  * @param f
  *   A pointer to a file for output
  */
index f01c227..b4c6dd3 100644 (file)
@@ -37,7 +37,7 @@ extern "C" {
 /**
  * Patch level number i.e. the z in yy.mm.z
  */
-#define RTE_VER_MINOR 0
+#define RTE_VER_MINOR 1
 
 /**
  * Extra string to be appended to version number
index 9d3dcb6..052aeeb 100644 (file)
@@ -38,6 +38,10 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
        /* segment must start after header and with specified alignment */
        contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
 
+       /* return if aligned address is already out of malloc element */
+       if (contig_seg_start > data_end)
+               return 0;
+
        /* if we're in IOVA as VA mode, or if we're in legacy mode with
         * hugepages, all elements are IOVA-contiguous. however, we can only
         * make these assumptions about internal memory - externally allocated
index 5f2d4e0..f3a1335 100644 (file)
@@ -209,6 +209,8 @@ handle_alloc_request(const struct malloc_mp_req *m,
 
        map_addr = ms[0]->addr;
 
+       eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
        /* we have succeeded in allocating memory, but we still need to sync
         * with other processes. however, since DPDK IPC is single-threaded, we
         * send an asynchronous request and exit this callback.
@@ -258,6 +260,9 @@ handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused)
        if (m->t == REQ_TYPE_ALLOC) {
                ret = handle_alloc_request(m, entry);
        } else if (m->t == REQ_TYPE_FREE) {
+               eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+                               m->free_req.addr, m->free_req.len);
+
                ret = malloc_heap_free_pages(m->free_req.addr,
                                m->free_req.len);
        } else {
@@ -436,6 +441,9 @@ handle_sync_response(const struct rte_mp_msg *request,
                memset(&rb_msg, 0, sizeof(rb_msg));
 
                /* we've failed to sync, so do a rollback */
+               eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+                               state->map_addr, state->map_len);
+
                rollback_expand_heap(state->ms, state->ms_len, state->elem,
                                state->map_addr, state->map_len);
 
index 0da5ad5..47c2bec 100644 (file)
@@ -156,20 +156,14 @@ rte_malloc_get_socket_stats(int socket,
                struct rte_malloc_socket_stats *socket_stats)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       int heap_idx, ret = -1;
-
-       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+       int heap_idx;
 
        heap_idx = malloc_socket_to_heap_id(socket);
        if (heap_idx < 0)
-               goto unlock;
+               return -1;
 
-       ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+       return malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
                        socket_stats);
-unlock:
-       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
-
-       return ret;
 }
 
 /*
@@ -181,14 +175,10 @@ rte_malloc_dump_heaps(FILE *f)
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
        unsigned int idx;
 
-       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
-
        for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
                fprintf(f, "Heap id: %u\n", idx);
                malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
        }
-
-       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 }
 
 int
@@ -262,8 +252,6 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
        unsigned int heap_id;
        struct rte_malloc_socket_stats sock_stats;
 
-       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
-
        /* Iterate through all initialised heaps */
        for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
                struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
@@ -280,7 +268,6 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
                fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
                fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
        }
-       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
        return;
 }
 
@@ -345,6 +332,9 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
 
        if (heap_name == NULL || va_addr == NULL ||
                        page_sz == 0 || !rte_is_power_of_2(page_sz) ||
+                       RTE_ALIGN(len, page_sz) != len ||
+                       !rte_is_aligned(va_addr, page_sz) ||
+                       ((len / page_sz) != n_pages && iova_addrs != NULL) ||
                        strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
                        strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
                                RTE_HEAP_NAME_MAX_LEN) {
@@ -367,11 +357,6 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
                goto unlock;
        }
        n = len / page_sz;
-       if (n != n_pages && iova_addrs != NULL) {
-               rte_errno = EINVAL;
-               ret = -1;
-               goto unlock;
-       }
 
        rte_spinlock_lock(&heap->lock);
        ret = malloc_heap_add_external_memory(heap, va_addr, iova_addrs, n,
@@ -517,13 +502,8 @@ sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
        if (wa.result < 0) {
                rte_errno = -wa.result;
                ret = -1;
-       } else {
-               /* notify all subscribers that a new memory area was added */
-               if (attach)
-                       eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
-                                       va_addr, len);
+       } else
                ret = 0;
-       }
 unlock:
        rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
        return ret;
index 02d59a8..198de6d 100644 (file)
@@ -35,10 +35,11 @@ void __rte_experimental
 rte_option_register(struct rte_option *opt)
 {
        TAILQ_FOREACH(option, &rte_option_list, next) {
-               if (strcmp(opt->opt_str, option->opt_str) == 0)
-                       RTE_LOG(INFO, EAL, "Option %s has already been registered.",
+               if (strcmp(opt->opt_str, option->opt_str) == 0) {
+                       RTE_LOG(ERR, EAL, "Option %s has already been registered.\n",
                                        opt->opt_str);
                        return;
+               }
        }
 
        TAILQ_INSERT_HEAD(&rte_option_list, opt, next);
index 361744d..30138b6 100644 (file)
@@ -13,7 +13,9 @@
 #include <syslog.h>
 #include <getopt.h>
 #include <sys/file.h>
+#include <dirent.h>
 #include <fcntl.h>
+#include <fnmatch.h>
 #include <stddef.h>
 #include <errno.h>
 #include <limits.h>
@@ -123,7 +125,7 @@ eal_create_runtime_dir(void)
 
        /* create prefix-specific subdirectory under DPDK runtime dir */
        ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
-                       tmp, internal_config.hugefile_prefix);
+                       tmp, eal_get_hugefile_prefix());
        if (ret < 0 || ret == sizeof(runtime_dir)) {
                RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
                return -1;
@@ -149,6 +151,91 @@ eal_create_runtime_dir(void)
        return 0;
 }
 
+int
+eal_clean_runtime_dir(void)
+{
+       DIR *dir;
+       struct dirent *dirent;
+       int dir_fd, fd, lck_result;
+       static const char * const filters[] = {
+               "fbarray_*",
+               "mp_socket_*"
+       };
+
+       /* open directory */
+       dir = opendir(runtime_dir);
+       if (!dir) {
+               RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n",
+                               runtime_dir);
+               goto error;
+       }
+       dir_fd = dirfd(dir);
+
+       /* lock the directory before doing anything, to avoid races */
+       if (flock(dir_fd, LOCK_EX) < 0) {
+               RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n",
+                       runtime_dir);
+               goto error;
+       }
+
+       dirent = readdir(dir);
+       if (!dirent) {
+               RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n",
+                               runtime_dir);
+               goto error;
+       }
+
+       while (dirent != NULL) {
+               unsigned int f_idx;
+               bool skip = true;
+
+               /* skip files that don't match the patterns */
+               for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) {
+                       const char *filter = filters[f_idx];
+
+                       if (fnmatch(filter, dirent->d_name, 0) == 0) {
+                               skip = false;
+                               break;
+                       }
+               }
+               if (skip) {
+                       dirent = readdir(dir);
+                       continue;
+               }
+
+               /* try and lock the file */
+               fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+
+               /* skip to next file */
+               if (fd == -1) {
+                       dirent = readdir(dir);
+                       continue;
+               }
+
+               /* non-blocking lock */
+               lck_result = flock(fd, LOCK_EX | LOCK_NB);
+
+               /* if lock succeeds, remove the file */
+               if (lck_result != -1)
+                       unlinkat(dir_fd, dirent->d_name, 0);
+               close(fd);
+               dirent = readdir(dir);
+       }
+
+       /* closedir closes dir_fd and drops the lock */
+       closedir(dir);
+       return 0;
+
+error:
+       if (dir)
+               closedir(dir);
+
+       RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n",
+               strerror(errno));
+
+       return -1;
+}
+
 const char *
 rte_eal_get_runtime_dir(void)
 {
@@ -494,10 +581,6 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
                socket_arg[i] = val;
        }
 
-       /* check if we have a positive amount of total memory */
-       if (total_mem == 0)
-               return -1;
-
        return 0;
 }
 
@@ -639,13 +722,31 @@ eal_parse_args(int argc, char **argv)
                        exit(EXIT_SUCCESS);
 
                case OPT_HUGE_DIR_NUM:
-                       internal_config.hugepage_dir = strdup(optarg);
+               {
+                       char *hdir = strdup(optarg);
+                       if (hdir == NULL)
+                               RTE_LOG(ERR, EAL, "Could not store hugepage directory\n");
+                       else {
+                               /* free old hugepage dir */
+                               if (internal_config.hugepage_dir != NULL)
+                                       free(internal_config.hugepage_dir);
+                               internal_config.hugepage_dir = hdir;
+                       }
                        break;
-
+               }
                case OPT_FILE_PREFIX_NUM:
-                       internal_config.hugefile_prefix = strdup(optarg);
+               {
+                       char *prefix = strdup(optarg);
+                       if (prefix == NULL)
+                               RTE_LOG(ERR, EAL, "Could not store file prefix\n");
+                       else {
+                               /* free old prefix */
+                               if (internal_config.hugefile_prefix != NULL)
+                                       free(internal_config.hugefile_prefix);
+                               internal_config.hugefile_prefix = prefix;
+                       }
                        break;
-
+               }
                case OPT_SOCKET_MEM_NUM:
                        if (eal_parse_socket_arg(optarg,
                                        internal_config.socket_mem) < 0) {
@@ -695,10 +796,21 @@ eal_parse_args(int argc, char **argv)
                        break;
 
                case OPT_MBUF_POOL_OPS_NAME_NUM:
-                       internal_config.user_mbuf_pool_ops_name =
-                           strdup(optarg);
+               {
+                       char *ops_name = strdup(optarg);
+                       if (ops_name == NULL)
+                               RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n");
+                       else {
+                               /* free old ops name */
+                               if (internal_config.user_mbuf_pool_ops_name !=
+                                               NULL)
+                                       free(internal_config.user_mbuf_pool_ops_name);
+
+                               internal_config.user_mbuf_pool_ops_name =
+                                               ops_name;
+                       }
                        break;
-
+               }
                default:
                        if (opt < OPT_LONG_MIN_NUM && isprint(opt)) {
                                RTE_LOG(ERR, EAL, "Option %c is not supported "
@@ -1096,6 +1208,18 @@ rte_eal_init(int argc, char **argv)
                return -1;
        }
 
+       /*
+        * Clean up unused files in runtime directory. We do this at the end of
+        * init and not at the beginning because we want to clean stuff up
+        * whether we are primary or secondary process, but we cannot remove
+        * primary process' files because secondary should be able to run even
+        * if primary process is dead.
+        */
+       if (eal_clean_runtime_dir() < 0) {
+               rte_eal_init_alert("Cannot clear runtime directory\n");
+               return -1;
+       }
+
        rte_eal_mcfg_complete();
 
        /* Call each registered callback, if enabled */
@@ -1130,6 +1254,8 @@ rte_eal_cleanup(void)
        if (rte_eal_process_type() == RTE_PROC_PRIMARY)
                rte_memseg_walk(mark_freeable, NULL);
        rte_service_finalize();
+       rte_mp_channel_cleanup();
+       eal_cleanup_config(&internal_config);
        return 0;
 }
 
index 7849395..f63d9ca 100644 (file)
 #include <sys/time.h>
 #include <signal.h>
 #include <setjmp.h>
+#ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */
+#include <linux/memfd.h>
+#define MEMFD_SUPPORTED
+#endif
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 #include <numa.h>
 #include <numaif.h>
@@ -53,8 +57,8 @@ const int anonymous_hugepages_supported =
 #endif
 
 /*
- * we don't actually care if memfd itself is supported - we only need to check
- * if memfd supports hugetlbfs, as that already implies memfd support.
+ * we've already checked memfd support at compile-time, but we also need to
+ * check if we can create hugepage files with memfd.
  *
  * also, this is not a constant, because while we may be *compiled* with memfd
  * hugetlbfs support, we might not be *running* on a system that supports memfd
@@ -63,10 +67,11 @@ const int anonymous_hugepages_supported =
  */
 static int memfd_create_supported =
 #ifdef MFD_HUGETLB
-#define MEMFD_SUPPORTED
                1;
+#define RTE_MFD_HUGETLB MFD_HUGETLB
 #else
                0;
+#define RTE_MFD_HUGETLB 4U
 #endif
 
 /*
@@ -171,7 +176,7 @@ prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id)
                RTE_LOG(ERR, EAL,
                        "Failed to get current mempolicy: %s. "
                        "Assuming MPOL_DEFAULT.\n", strerror(errno));
-               oldpolicy = MPOL_DEFAULT;
+               *oldpolicy = MPOL_DEFAULT;
        }
        RTE_LOG(DEBUG, EAL,
                "Setting policy MPOL_PREFERRED for socket %d\n",
@@ -338,12 +343,12 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused,
        int fd;
        char segname[250]; /* as per manpage, limit is 249 bytes plus null */
 
+       int flags = RTE_MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
+
        if (internal_config.single_file_segments) {
                fd = fd_list[list_idx].memseg_list_fd;
 
                if (fd < 0) {
-                       int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
-
                        snprintf(segname, sizeof(segname), "seg_%i", list_idx);
                        fd = memfd_create(segname, flags);
                        if (fd < 0) {
@@ -357,8 +362,6 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused,
                fd = fd_list[list_idx].fds[seg_idx];
 
                if (fd < 0) {
-                       int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
-
                        snprintf(segname, sizeof(segname), "seg_%i-%i",
                                        list_idx, seg_idx);
                        fd = memfd_create(segname, flags);
@@ -633,13 +636,13 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
        int mmap_flags;
 
        if (internal_config.in_memory && !memfd_create_supported) {
-               int pagesz_flag, flags;
+               const int in_memory_flags = MAP_HUGETLB | MAP_FIXED |
+                               MAP_PRIVATE | MAP_ANONYMOUS;
+               int pagesz_flag;
 
                pagesz_flag = pagesz_flags(alloc_sz);
-               flags = pagesz_flag | MAP_HUGETLB | MAP_FIXED |
-                               MAP_PRIVATE | MAP_ANONYMOUS;
                fd = -1;
-               mmap_flags = flags;
+               mmap_flags = in_memory_flags | pagesz_flag;
 
                /* single-file segments codepath will never be active
                 * here because in-memory mode is incompatible with the
@@ -1542,6 +1545,17 @@ int
 eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
 {
        int fd;
+
+       if (internal_config.in_memory || internal_config.no_hugetlbfs) {
+#ifndef MEMFD_SUPPORTED
+               /* in in-memory or no-huge mode, we rely on memfd support */
+               return -ENOTSUP;
+#endif
+               /* memfd supported, but hugetlbfs memfd may not be */
+               if (!internal_config.no_hugetlbfs && !memfd_create_supported)
+                       return -ENOTSUP;
+       }
+
        if (internal_config.single_file_segments) {
                fd = fd_list[list_idx].memseg_list_fd;
        } else if (fd_list[list_idx].len == 0) {
@@ -1565,7 +1579,7 @@ test_memfd_create(void)
                int pagesz_flag = pagesz_flags(pagesz);
                int flags;
 
-               flags = pagesz_flag | MFD_HUGETLB;
+               flags = pagesz_flag | RTE_MFD_HUGETLB;
                int fd = memfd_create("test", flags);
                if (fd < 0) {
                        /* we failed - let memalloc know this isn't working */
@@ -1589,6 +1603,16 @@ eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 
+       if (internal_config.in_memory || internal_config.no_hugetlbfs) {
+#ifndef MEMFD_SUPPORTED
+               /* in in-memory or no-huge mode, we rely on memfd support */
+               return -ENOTSUP;
+#endif
+               /* memfd supported, but hugetlbfs memfd may not be */
+               if (!internal_config.no_hugetlbfs && !memfd_create_supported)
+                       return -ENOTSUP;
+       }
+
        /* fd_list not initialized? */
        if (fd_list[list_idx].len == 0)
                return -ENODEV;
index 32feb41..e05da74 100644 (file)
@@ -434,7 +434,7 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
        }
 
        snprintf(hugedir_str, sizeof(hugedir_str),
-                       "%s/%s", hpi->hugedir, internal_config.hugefile_prefix);
+                       "%s/%s", hpi->hugedir, eal_get_hugefile_prefix());
 
        /* parse numa map */
        while (fgets(buf, sizeof(buf), f) != NULL) {
index 0516b15..c821e83 100644 (file)
@@ -549,6 +549,65 @@ next:
        }
 }
 
+static int
+vfio_sync_default_container(void)
+{
+       struct rte_mp_msg mp_req, *mp_rep;
+       struct rte_mp_reply mp_reply;
+       struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+       struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+       int iommu_type_id;
+       unsigned int i;
+
+       /* cannot be called from primary */
+       if (rte_eal_process_type() != RTE_PROC_SECONDARY)
+               return -1;
+
+       /* default container fd should have been opened in rte_vfio_enable() */
+       if (!default_vfio_cfg->vfio_enabled ||
+                       default_vfio_cfg->vfio_container_fd < 0) {
+               RTE_LOG(ERR, EAL, "VFIO support is not initialized\n");
+               return -1;
+       }
+
+       /* find default container's IOMMU type */
+       p->req = SOCKET_REQ_IOMMU_TYPE;
+       strcpy(mp_req.name, EAL_VFIO_MP);
+       mp_req.len_param = sizeof(*p);
+       mp_req.num_fds = 0;
+
+       iommu_type_id = -1;
+       if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+                       mp_reply.nb_received == 1) {
+               mp_rep = &mp_reply.msgs[0];
+               p = (struct vfio_mp_param *)mp_rep->param;
+               if (p->result == SOCKET_OK)
+                       iommu_type_id = p->iommu_type_id;
+               free(mp_reply.msgs);
+       }
+       if (iommu_type_id < 0) {
+               RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n");
+               return -1;
+       }
+
+       /* we now have an fd for default container, as well as its IOMMU type.
+        * now, set up default VFIO container config to match.
+        */
+       for (i = 0; i < RTE_DIM(iommu_types); i++) {
+               const struct vfio_iommu_type *t = &iommu_types[i];
+               if (t->type_id != iommu_type_id)
+                       continue;
+
+               /* we found our IOMMU type */
+               default_vfio_cfg->vfio_iommu_type = t;
+
+               return 0;
+       }
+       RTE_LOG(ERR, EAL, "Could not find IOMMU type id (%i)\n",
+                       iommu_type_id);
+       return -1;
+}
+
 int
 rte_vfio_clear_group(int vfio_group_fd)
 {
@@ -745,6 +804,26 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
                        else
                                RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n");
                }
+       } else if (rte_eal_process_type() != RTE_PROC_PRIMARY &&
+                       vfio_cfg == default_vfio_cfg &&
+                       vfio_cfg->vfio_iommu_type == NULL) {
+               /* if we're not a primary process, we do not set up the VFIO
+                * container because it's already been set up by the primary
+                * process. instead, we simply ask the primary about VFIO type
+                * we are using, and set the VFIO config up appropriately.
+                */
+               ret = vfio_sync_default_container();
+               if (ret < 0) {
+                       RTE_LOG(ERR, EAL, "Could not sync default VFIO container\n");
+                       close(vfio_group_fd);
+                       rte_vfio_clear_group(vfio_group_fd);
+                       return -1;
+               }
+               /* we have successfully initialized VFIO, notify user */
+               const struct vfio_iommu_type *t =
+                               default_vfio_cfg->vfio_iommu_type;
+               RTE_LOG(NOTICE, EAL, "  using IOMMU type %d (%s)\n",
+                               t->type_id, t->name);
        }
 
        /* get a file descriptor for the device */
@@ -857,7 +936,8 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
        /* if there are no active device groups, unregister the callback to
         * avoid spurious attempts to map/unmap memory from VFIO.
         */
-       if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0)
+       if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0 &&
+                       rte_eal_process_type() != RTE_PROC_SECONDARY)
                rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME,
                                NULL);
 
@@ -977,6 +1057,15 @@ vfio_get_default_container_fd(void)
        return -1;
 }
 
+int
+vfio_get_iommu_type(void)
+{
+       if (default_vfio_cfg->vfio_iommu_type == NULL)
+               return -1;
+
+       return default_vfio_cfg->vfio_iommu_type->type_id;
+}
+
 const struct vfio_iommu_type *
 vfio_set_iommu_type(int vfio_container_fd)
 {
index 63ae115..cb2d35f 100644 (file)
@@ -5,6 +5,8 @@
 #ifndef EAL_VFIO_H_
 #define EAL_VFIO_H_
 
+#include <rte_common.h>
+
 /*
  * determine if VFIO is present on the system
  */
@@ -122,6 +124,9 @@ int vfio_get_default_container_fd(void);
 const struct vfio_iommu_type *
 vfio_set_iommu_type(int vfio_container_fd);
 
+int
+vfio_get_iommu_type(void);
+
 /* check if we have any supported extensions */
 int
 vfio_has_supported_extensions(int vfio_container_fd);
@@ -133,6 +138,7 @@ int vfio_mp_sync_setup(void);
 #define SOCKET_REQ_CONTAINER 0x100
 #define SOCKET_REQ_GROUP 0x200
 #define SOCKET_REQ_DEFAULT_CONTAINER 0x400
+#define SOCKET_REQ_IOMMU_TYPE 0x800
 #define SOCKET_OK 0x0
 #define SOCKET_NO_FD 0x1
 #define SOCKET_ERR 0xFF
@@ -140,7 +146,11 @@ int vfio_mp_sync_setup(void);
 struct vfio_mp_param {
        int req;
        int result;
-       int group_num;
+       RTE_STD_C11
+       union {
+               int group_num;
+               int iommu_type_id;
+       };
 };
 
 #endif /* VFIO_PRESENT */
index a1e8c83..2a47f29 100644 (file)
@@ -77,6 +77,22 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
                        reply.fds[0] = fd;
                }
                break;
+       case SOCKET_REQ_IOMMU_TYPE:
+       {
+               int iommu_type_id;
+
+               r->req = SOCKET_REQ_IOMMU_TYPE;
+
+               iommu_type_id = vfio_get_iommu_type();
+
+               if (iommu_type_id < 0)
+                       r->result = SOCKET_ERR;
+               else {
+                       r->iommu_type_id = iommu_type_id;
+                       r->result = SOCKET_OK;
+               }
+               break;
+       }
        default:
                RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
                return -1;
index e6e5cfd..1a97ece 100644 (file)
@@ -740,6 +740,8 @@ void
 rte_efd_free(struct rte_efd_table *table)
 {
        uint8_t socket_id;
+       struct rte_efd_list *efd_list;
+       struct rte_tailq_entry *te, *temp;
 
        if (table == NULL)
                return;
@@ -747,6 +749,18 @@ rte_efd_free(struct rte_efd_table *table)
        for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES; socket_id++)
                rte_free(table->chunks[socket_id]);
 
+       efd_list = RTE_TAILQ_CAST(rte_efd_tailq.head, rte_efd_list);
+       rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+       TAILQ_FOREACH_SAFE(te, efd_list, next, temp) {
+               if (te->data == (void *) table) {
+                       TAILQ_REMOVE(efd_list, te, next);
+                       rte_free(te);
+                       break;
+               }
+       }
+
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
        rte_ring_free(table->free_slots);
        rte_free(table->offline_chunks);
        rte_free(table->keys);
index 5f85817..9d5107d 100644 (file)
@@ -1594,7 +1594,7 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
                        nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
 
                RTE_ETHDEV_LOG(ERR,
-                       "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+                       "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n",
                        nb_rx_desc, dev_info.rx_desc_lim.nb_max,
                        dev_info.rx_desc_lim.nb_min,
                        dev_info.rx_desc_lim.nb_align);
@@ -1698,7 +1698,7 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
            nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
            nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
                RTE_ETHDEV_LOG(ERR,
-                       "Invalid value for nb_tx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+                       "Invalid value for nb_tx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n",
                        nb_tx_desc, dev_info.tx_desc_lim.nb_max,
                        dev_info.tx_desc_lim.nb_min,
                        dev_info.tx_desc_lim.nb_align);
index 1960f3a..a3c864a 100644 (file)
@@ -4159,9 +4159,6 @@ rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
 }
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Process a burst of output packets on a transmit queue of an Ethernet device.
  *
  * The rte_eth_tx_prepare() function is invoked to prepare output packets to be
@@ -4225,7 +4222,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
        if (!rte_eth_dev_is_valid_port(port_id)) {
                RTE_ETHDEV_LOG(ERR, "Invalid TX port_id=%u\n", port_id);
-               rte_errno = -EINVAL;
+               rte_errno = EINVAL;
                return 0;
        }
 #endif
@@ -4235,7 +4232,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
        if (queue_id >= dev->data->nb_tx_queues) {
                RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
-               rte_errno = -EINVAL;
+               rte_errno = EINVAL;
                return 0;
        }
 #endif
index 8831bc3..8d178be 100644 (file)
@@ -912,7 +912,7 @@ rxa_intr_ring_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
                 */
                if (err)
                        RTE_EDEV_LOG_ERR("Failed to enqueue interrupt"
-                               " to ring: %s", strerror(err));
+                               " to ring: %s", strerror(-err));
                else
                        rte_eth_dev_rx_intr_disable(port_id, queue);
        }
index ccf8a75..67216a3 100644 (file)
@@ -59,6 +59,20 @@ do {\
                return -EINVAL; \
 } while (0)
 
+#define TXA_CHECK_TXQ(dev, queue) \
+do {\
+       if ((dev)->data->nb_tx_queues == 0) { \
+               RTE_EDEV_LOG_ERR("No tx queues configured"); \
+               return -EINVAL; \
+       } \
+       if ((queue) != -1 && \
+               (uint16_t)(queue) >= (dev)->data->nb_tx_queues) { \
+               RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16, \
+                               (uint16_t)(queue)); \
+               return -EINVAL; \
+       } \
+} while (0)
+
 /* Tx retry callback structure */
 struct txa_retry {
        /* Ethernet port id */
@@ -795,20 +809,35 @@ txa_service_queue_del(uint8_t id,
        struct rte_eth_dev_tx_buffer *tb;
        uint16_t port_id;
 
+       txa = txa_service_id_to_data(id);
+       port_id = dev->data->port_id;
+
        if (tx_queue_id == -1) {
-               uint16_t i;
-               int ret = -1;
+               uint16_t i, q, nb_queues;
+               int ret = 0;
 
-               for (i = 0; i < dev->data->nb_tx_queues; i++) {
-                       ret = txa_service_queue_del(id, dev, i);
-                       if (ret != 0)
-                               break;
+               nb_queues = txa->nb_queues;
+               if (nb_queues == 0)
+                       return 0;
+
+               i = 0;
+               q = 0;
+               tqi = txa->txa_ethdev[port_id].queues;
+
+               while (i < nb_queues) {
+
+                       if (tqi[q].added) {
+                               ret = txa_service_queue_del(id, dev, q);
+                               if (ret != 0)
+                                       break;
+                       }
+                       i++;
+                       q++;
                }
                return ret;
        }
 
        txa = txa_service_id_to_data(id);
-       port_id = dev->data->port_id;
 
        tqi = txa_service_queue(txa, port_id, tx_queue_id);
        if (tqi == NULL || !tqi->added)
@@ -999,11 +1028,7 @@ rte_event_eth_tx_adapter_queue_add(uint8_t id,
        TXA_CHECK_OR_ERR_RET(id);
 
        eth_dev = &rte_eth_devices[eth_dev_id];
-       if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) {
-               RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16,
-                               (uint16_t)queue);
-               return -EINVAL;
-       }
+       TXA_CHECK_TXQ(eth_dev, queue);
 
        caps = 0;
        if (txa_dev_caps_get(id))
@@ -1034,11 +1059,6 @@ rte_event_eth_tx_adapter_queue_del(uint8_t id,
        TXA_CHECK_OR_ERR_RET(id);
 
        eth_dev = &rte_eth_devices[eth_dev_id];
-       if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) {
-               RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16,
-                               (uint16_t)queue);
-               return -EINVAL;
-       }
 
        caps = 0;
 
index d4ea6f1..db98dec 100644 (file)
@@ -461,61 +461,8 @@ rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter,
  *   - 0: Successfully reset;
  *   - <0: Failure; error code returned.
  */
-int __rte_experimental rte_event_timer_adapter_stats_reset(
-               struct rte_event_timer_adapter *adapter);
-
-/**
- * Retrieve the service ID of the event timer adapter. If the adapter doesn't
- * use an rte_service function, this function returns -ESRCH.
- *
- * @param adapter
- *   A pointer to an event timer adapter.
- *
- * @param [out] service_id
- *   A pointer to a uint32_t, to be filled in with the service id.
- *
- * @return
- *   - 0: Success
- *   - <0: Error code on failure, if the event dev doesn't use a rte_service
- *   function, this function returns -ESRCH.
- */
-int
-rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter,
-                                      uint32_t *service_id);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
- * Retrieve statistics for an event timer adapter instance.
- *
- * @param adapter
- *   A pointer to an event timer adapter structure.
- * @param[out] stats
- *   A pointer to a structure to fill with statistics.
- *
- * @return
- *   - 0: Successfully retrieved.
- *   - <0: Failure; error code returned.
- */
-int rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter,
-                               struct rte_event_timer_adapter_stats *stats);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
- * Reset statistics for an event timer adapter instance.
- *
- * @param adapter
- *   A pointer to an event timer adapter structure.
- *
- * @return
- *   - 0: Successfully reset;
- *   - <0: Failure; error code returned.
- */
-int rte_event_timer_adapter_stats_reset(
-                               struct rte_event_timer_adapter *adapter);
+int __rte_experimental
+rte_event_timer_adapter_stats_reset(struct rte_event_timer_adapter *adapter);
 
 /**
  * @warning
index d7eb69d..ef10a85 100644 (file)
@@ -1893,7 +1893,7 @@ rte_event_dev_xstats_names_get(uint8_t dev_id,
  * @param ids
  *   The id numbers of the stats to get. The ids can be got from the stat
  *   position in the stat list from rte_event_dev_get_xstats_names(), or
- *   by using rte_eventdev_get_xstats_by_name()
+ *   by using rte_event_dev_xstats_by_name_get().
  * @param[out] values
  *   The values for each stats request by ID.
  * @param n
@@ -1921,7 +1921,7 @@ rte_event_dev_xstats_get(uint8_t dev_id,
  *   The stat name to retrieve
  * @param[out] id
  *   If non-NULL, the numerical id of the stat will be returned, so that further
- *   requests for the stat can be got using rte_eventdev_xstats_get, which will
+ *   requests for the stat can be got using rte_event_dev_xstats_get, which will
  *   be faster as it doesn't need to scan a list of names for the stat.
  *   If the stat cannot be found, the id returned will be (unsigned)-1.
  * @return
index 2c0f35c..7d128a4 100644 (file)
@@ -198,7 +198,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
        struct ipv4_hdr *ipv4_hdr;
        struct tcp_hdr *tcp_hdr;
        uint32_t sent_seq;
-       uint16_t tcp_dl, ip_id, hdr_len, frag_off;
+       int32_t tcp_dl;
+       uint16_t ip_id, hdr_len, frag_off;
        uint8_t is_atomic;
 
        struct tcp4_flow_key key;
@@ -207,6 +208,13 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
        int cmp;
        uint8_t find;
 
+       /*
+        * Don't process the packet whose TCP header length is greater
+        * than 60 bytes or less than 20 bytes.
+        */
+       if (unlikely(INVALID_TCP_HDRLEN(pkt->l4_len)))
+               return -1;
+
        eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
        ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
        tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
index 6bb30cd..d979248 100644 (file)
  */
 #define MAX_IPV4_PKT_LENGTH UINT16_MAX
 
+/* The maximum TCP header length */
+#define MAX_TCP_HLEN 60
+#define INVALID_TCP_HDRLEN(len) \
+       (((len) < sizeof(struct tcp_hdr)) || ((len) > MAX_TCP_HLEN))
+
 /* Header fields representing a TCP/IPv4 flow */
 struct tcp4_flow_key {
        struct ether_addr eth_saddr;
index ca86f01..acb9bc9 100644 (file)
@@ -295,7 +295,8 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
        struct udp_hdr *udp_hdr;
        struct vxlan_hdr *vxlan_hdr;
        uint32_t sent_seq;
-       uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
+       int32_t tcp_dl;
+       uint16_t frag_off, outer_ip_id, ip_id;
        uint8_t outer_is_atomic, is_atomic;
 
        struct vxlan_tcp4_flow_key key;
@@ -305,6 +306,13 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
        uint16_t hdr_len;
        uint8_t find;
 
+       /*
+        * Don't process the packet whose TCP header length is greater
+        * than 60 bytes or less than 20 bytes.
+        */
+       if (unlikely(INVALID_TCP_HDRLEN(pkt->l4_len)))
+               return -1;
+
        outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
        outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
                        pkt->outer_l2_len);
index 6cd764f..b6ff1b8 100644 (file)
                (PKT_TX_TCP_SEG | PKT_TX_IPV4))
 
 #define IS_IPV4_VXLAN_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
-                               PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_VXLAN)) == \
+                               PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
                (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
                 PKT_TX_TUNNEL_VXLAN))
 
 #define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
-                               PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_GRE)) == \
+                               PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
                (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
                 PKT_TX_TUNNEL_GRE))
 
index c55a4f2..c01489b 100644 (file)
@@ -1347,6 +1347,9 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
                        n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
                                                cached_free_slots->objs,
                                                LCORE_CACHE_SIZE, NULL);
+                       ERR_IF_TRUE((n_slots == 0),
+                               "%s: could not enqueue free slots in global ring\n",
+                               __func__);
                        cached_free_slots->len -= n_slots;
                }
                /* Put index of new free slot in cache. */
@@ -1552,6 +1555,7 @@ rte_hash_free_key_with_position(const struct rte_hash *h,
                        n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
                                                cached_free_slots->objs,
                                                LCORE_CACHE_SIZE, NULL);
+                       RETURN_IF_TRUE((n_slots == 0), -EFAULT);
                        cached_free_slots->len -= n_slots;
                }
                /* Put index of new free slot in cache. */
@@ -2022,11 +2026,11 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
                        uint64_t *hit_mask, void *data[])
 {
        if (h->readwrite_concur_lf_support)
-               return __rte_hash_lookup_bulk_lf(h, keys, num_keys,
-                                               positions, hit_mask, data);
+               __rte_hash_lookup_bulk_lf(h, keys, num_keys, positions,
+                                         hit_mask, data);
        else
-               return __rte_hash_lookup_bulk_l(h, keys, num_keys,
-                                               positions, hit_mask, data);
+               __rte_hash_lookup_bulk_l(h, keys, num_keys, positions,
+                                        hit_mask, data);
 }
 
 int
index 5dfbbc4..eacdaa8 100644 (file)
 #define RETURN_IF_TRUE(cond, retval)
 #endif
 
+#if defined(RTE_LIBRTE_HASH_DEBUG)
+#define ERR_IF_TRUE(cond, fmt, args...) do { \
+       if (cond) { \
+               RTE_LOG(ERR, HASH, fmt, ##args); \
+               return; \
+       } \
+} while (0)
+#else
+#define ERR_IF_TRUE(cond, fmt, args...)
+#endif
+
 #include <rte_hash_crc.h>
 #include <rte_jhash.h>
 
index a4ccaf9..04fd9df 100644 (file)
@@ -115,6 +115,7 @@ struct rte_ip_frag_tbl {
 #define        RTE_IPV6_EHDR_MF_MASK                   1
 #define        RTE_IPV6_EHDR_FO_SHIFT                  3
 #define        RTE_IPV6_EHDR_FO_MASK                   (~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1))
+#define        RTE_IPV6_EHDR_FO_ALIGN                  (1 << RTE_IPV6_EHDR_FO_SHIFT)
 
 #define RTE_IPV6_FRAG_USED_MASK                        \
        (RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK)
index 62a7e4e..b9437eb 100644 (file)
@@ -77,11 +77,14 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
        uint32_t out_pkt_pos, in_seg_data_pos;
        uint32_t more_in_segs;
        uint16_t fragment_offset, frag_size;
+       uint64_t frag_bytes_remaining;
 
-       frag_size = (uint16_t)(mtu_size - sizeof(struct ipv6_hdr));
-
-       /* Fragment size should be a multiple of 8. */
-       RTE_ASSERT((frag_size & ~RTE_IPV6_EHDR_FO_MASK) == 0);
+       /*
+        * Ensure the IP payload length of all fragments (except the
+        * the last fragment) are a multiple of 8 bytes per RFC2460.
+        */
+       frag_size = RTE_ALIGN_FLOOR(mtu_size - sizeof(struct ipv6_hdr),
+                                   RTE_IPV6_EHDR_FO_ALIGN);
 
        /* Check that pkts_out is big enough to hold all fragments */
        if (unlikely (frag_size * nb_pkts_out <
@@ -111,6 +114,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
                /* Reserve space for the IP header that will be built later */
                out_pkt->data_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment);
                out_pkt->pkt_len  = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment);
+               frag_bytes_remaining = frag_size;
 
                out_seg_prev = out_pkt;
                more_out_segs = 1;
@@ -130,7 +134,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
 
                        /* Prepare indirect buffer */
                        rte_pktmbuf_attach(out_seg, in_seg);
-                       len = mtu_size - out_pkt->pkt_len;
+                       len = frag_bytes_remaining;
                        if (len > (in_seg->data_len - in_seg_data_pos)) {
                                len = in_seg->data_len - in_seg_data_pos;
                        }
@@ -140,11 +144,11 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
                            out_pkt->pkt_len);
                        out_pkt->nb_segs += 1;
                        in_seg_data_pos += len;
+                       frag_bytes_remaining -= len;
 
                        /* Current output packet (i.e. fragment) done ? */
-                       if (unlikely(out_pkt->pkt_len >= mtu_size)) {
+                       if (unlikely(frag_bytes_remaining == 0))
                                more_out_segs = 0;
-                       }
 
                        /* Current input segment done ? */
                        if (unlikely(in_seg_data_pos == in_seg->data_len)) {
index f2a8904..f9b9090 100644 (file)
@@ -310,16 +310,20 @@ rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
  * @param l4_hdr
  *   The pointer to the beginning of the L4 header.
  * @return
- *   The complemented checksum to set in the IP packet.
+ *   The complemented checksum to set in the IP packet
+ *   or 0 on error
  */
 static inline uint16_t
 rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr)
 {
        uint32_t cksum;
-       uint32_t l4_len;
+       uint32_t l3_len, l4_len;
+
+       l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length);
+       if (l3_len < sizeof(struct ipv4_hdr))
+               return 0;
 
-       l4_len = (uint32_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -
-               sizeof(struct ipv4_hdr));
+       l4_len = l3_len - sizeof(struct ipv4_hdr);
 
        cksum = rte_raw_cksum(l4_hdr, l4_len);
        cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
index 587d5e6..89c3d1e 100644 (file)
@@ -667,6 +667,7 @@ rte_sched_port_config(struct rte_sched_port_params *params)
                                params->red_params[i][j].min_th,
                                params->red_params[i][j].max_th,
                                params->red_params[i][j].maxp_inv) != 0) {
+                               rte_free(port);
                                return NULL;
                        }
                }
@@ -726,6 +727,7 @@ rte_sched_port_config(struct rte_sched_port_params *params)
                                    bmp_mem_size);
        if (port->bmp == NULL) {
                RTE_LOG(ERR, SCHED, "Bitmap init error\n");
+               rte_free(port);
                return NULL;
        }
 
index 016431f..7fb247e 100644 (file)
@@ -558,7 +558,7 @@ rte_telemetry_send_ports_stats_values(uint32_t *metric_ids, int num_metric_ids,
                }
 
                ret = rte_telemetry_update_metrics_ethdev(telemetry,
-                               port_ids[i], telemetry->reg_index);
+                               port_ids[i], telemetry->reg_index[i]);
                if (ret < 0) {
                        TELEMETRY_LOG_ERR("Failed to update ethdev metrics");
                        return -1;
@@ -658,23 +658,45 @@ free_xstats:
 static int32_t
 rte_telemetry_initial_accept(struct telemetry_impl *telemetry)
 {
+       struct driver_index {
+               const void *dev_ops;
+               int reg_index;
+       } drv_idx[RTE_MAX_ETHPORTS];
+       int nb_drv_idx = 0;
        uint16_t pid;
        int ret;
        int selftest = 0;
 
        RTE_ETH_FOREACH_DEV(pid) {
-               telemetry->reg_index = rte_telemetry_reg_ethdev_to_metrics(pid);
-               break;
-       }
+               int i;
+               /* Different device types have different numbers of stats, so
+                * first check if the stats for this type of device have
+                * already been registered
+                */
+               for (i = 0; i < nb_drv_idx; i++) {
+                       if (rte_eth_devices[pid].dev_ops == drv_idx[i].dev_ops) {
+                               telemetry->reg_index[pid] = drv_idx[i].reg_index;
+                               break;
+                       }
+               }
+               if (i < nb_drv_idx)
+                       continue; /* we found a match, go to next port */
 
-       if (telemetry->reg_index < 0) {
-               TELEMETRY_LOG_ERR("Failed to register ethdev metrics");
-               return -1;
+               /* No match, register a new set of xstats for this port */
+               ret = rte_telemetry_reg_ethdev_to_metrics(pid);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Failed to register ethdev metrics");
+                       return -1;
+               }
+               telemetry->reg_index[pid] = ret;
+               drv_idx[nb_drv_idx].dev_ops = rte_eth_devices[pid].dev_ops;
+               drv_idx[nb_drv_idx].reg_index = ret;
+               nb_drv_idx++;
        }
 
        telemetry->metrics_register_done = 1;
        if (selftest) {
-               ret = rte_telemetry_socket_messaging_testing(telemetry->reg_index,
+               ret = rte_telemetry_socket_messaging_testing(telemetry->reg_index[0],
                                telemetry->server_fd);
                if (ret < 0)
                        return -1;
@@ -1299,7 +1321,7 @@ rte_telemetry_socket_messaging_testing(int index, int socket)
        }
 
        telemetry->server_fd = socket;
-       telemetry->reg_index = index;
+       telemetry->reg_index[0] = index;
        TELEMETRY_LOG_INFO("Beginning Telemetry socket message Selftest");
        rte_telemetry_socket_test_setup(telemetry, &send_fd, &recv_fd);
        TELEMETRY_LOG_INFO("Register valid client test");
index de7afda..c298c39 100644 (file)
@@ -36,7 +36,7 @@ typedef struct telemetry_impl {
        pthread_t thread_id;
        int thread_status;
        uint32_t socket_id;
-       int reg_index;
+       int reg_index[RTE_MAX_ETHPORTS];
        int metrics_register_done;
        TAILQ_HEAD(, telemetry_client) client_list_head;
        struct telemetry_client *request_client;
index 590488c..30c7b0a 100644 (file)
@@ -241,24 +241,17 @@ timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
        }
 }
 
-/*
- * add in list, lock if needed
+/* call with lock held as necessary
+ * add in list
  * timer must be in config state
  * timer must not be in a list
  */
 static void
-timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
+timer_add(struct rte_timer *tim, unsigned int tim_lcore)
 {
-       unsigned lcore_id = rte_lcore_id();
        unsigned lvl;
        struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
 
-       /* if timer needs to be scheduled on another core, we need to
-        * lock the list; if it is on local core, we need to lock if
-        * we are not called from rte_timer_manage() */
-       if (tim_lcore != lcore_id || !local_is_locked)
-               rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
-
        /* find where exactly this element goes in the list of elements
         * for each depth. */
        timer_get_prev_entries(tim->expire, tim_lcore, prev);
@@ -282,9 +275,6 @@ timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
         * NOTE: this is not atomic on 32-bit*/
        priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
                        pending_head.sl_next[0]->expire;
-
-       if (tim_lcore != lcore_id || !local_is_locked)
-               rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
 }
 
 /*
@@ -379,8 +369,15 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
        tim->f = fct;
        tim->arg = arg;
 
+       /* if timer needs to be scheduled on another core, we need to
+        * lock the destination list; if it is on local core, we need to lock if
+        * we are not called from rte_timer_manage()
+        */
+       if (tim_lcore != lcore_id || !local_is_locked)
+               rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
+
        __TIMER_STAT_ADD(pending, 1);
-       timer_add(tim, tim_lcore, local_is_locked);
+       timer_add(tim, tim_lcore);
 
        /* update state: as we are in CONFIG state, only us can modify
         * the state so we don't need to use cmpset() here */
@@ -389,6 +386,9 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
        status.owner = (int16_t)tim_lcore;
        tim->status.u32 = status.u32;
 
+       if (tim_lcore != lcore_id || !local_is_locked)
+               rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
+
        return 0;
 }
 
index 38347ab..55d4856 100644 (file)
@@ -129,7 +129,9 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
        pthread_mutex_lock(&pfdset->fd_mutex);
        i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
        if (i == -1) {
+               pthread_mutex_lock(&pfdset->fd_pooling_mutex);
                fdset_shrink_nolock(pfdset);
+               pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
                i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
                if (i == -1) {
                        pthread_mutex_unlock(&pfdset->fd_mutex);
@@ -246,7 +248,9 @@ fdset_event_dispatch(void *arg)
                numfds = pfdset->num;
                pthread_mutex_unlock(&pfdset->fd_mutex);
 
+               pthread_mutex_lock(&pfdset->fd_pooling_mutex);
                val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
+               pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
                if (val < 0)
                        continue;
 
index 3331bcd..3ab5cfd 100644 (file)
@@ -24,6 +24,7 @@ struct fdset {
        struct pollfd rwfds[MAX_FDS];
        struct fdentry fd[MAX_FDS];
        pthread_mutex_t fd_mutex;
+       pthread_mutex_t fd_pooling_mutex;
        int num;        /* current fd number of this fdset */
 
        union pipefds {
index 01b60ff..9883b04 100644 (file)
@@ -90,6 +90,7 @@ static struct vhost_user vhost_user = {
        .fdset = {
                .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} },
                .fd_mutex = PTHREAD_MUTEX_INITIALIZER,
+               .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER,
                .num = 0
        },
        .vsocket_cnt = 0,
@@ -960,13 +961,13 @@ rte_vhost_driver_unregister(const char *path)
        int count;
        struct vhost_user_connection *conn, *next;
 
+again:
        pthread_mutex_lock(&vhost_user.mutex);
 
        for (i = 0; i < vhost_user.vsocket_cnt; i++) {
                struct vhost_user_socket *vsocket = vhost_user.vsockets[i];
 
                if (!strcmp(vsocket->path, path)) {
-again:
                        pthread_mutex_lock(&vsocket->conn_mutex);
                        for (conn = TAILQ_FIRST(&vsocket->conn_list);
                             conn != NULL;
@@ -982,6 +983,7 @@ again:
                                                  conn->connfd) == -1) {
                                        pthread_mutex_unlock(
                                                        &vsocket->conn_mutex);
+                                       pthread_mutex_unlock(&vhost_user.mutex);
                                        goto again;
                                }
 
index 5218f1b..552b929 100644 (file)
@@ -393,8 +393,10 @@ vq_is_packed(struct virtio_net *dev)
 static inline bool
 desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
 {
-       return wrap_counter == !!(desc->flags & VRING_DESC_F_AVAIL) &&
-               wrap_counter != !!(desc->flags & VRING_DESC_F_USED);
+       uint16_t flags = *((volatile uint16_t *) &desc->flags);
+
+       return wrap_counter == !!(flags & VRING_DESC_F_AVAIL) &&
+               wrap_counter != !!(flags & VRING_DESC_F_USED);
 }
 
 #define VHOST_LOG_PAGE 4096
index dd01afc..0694c0a 100644 (file)
@@ -466,12 +466,17 @@ vhost_crypto_msg_post_handler(int vid, void *msg)
 }
 
 static __rte_always_inline struct vring_desc *
-find_write_desc(struct vring_desc *head, struct vring_desc *desc)
+find_write_desc(struct vring_desc *head, struct vring_desc *desc,
+               uint32_t *nb_descs, uint32_t vq_size)
 {
        if (desc->flags & VRING_DESC_F_WRITE)
                return desc;
 
        while (desc->flags & VRING_DESC_F_NEXT) {
+               if (unlikely(*nb_descs == 0 || desc->next >= vq_size))
+                       return NULL;
+               (*nb_descs)--;
+
                desc = &head[desc->next];
                if (desc->flags & VRING_DESC_F_WRITE)
                        return desc;
@@ -481,13 +486,18 @@ find_write_desc(struct vring_desc *head, struct vring_desc *desc)
 }
 
 static struct virtio_crypto_inhdr *
-reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc)
+reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc,
+               uint32_t *nb_descs, uint32_t vq_size)
 {
        uint64_t dlen;
        struct virtio_crypto_inhdr *inhdr;
 
-       while (desc->flags & VRING_DESC_F_NEXT)
+       while (desc->flags & VRING_DESC_F_NEXT) {
+               if (unlikely(*nb_descs == 0 || desc->next >= vq_size))
+                       return NULL;
+               (*nb_descs)--;
                desc = &vc_req->head[desc->next];
+       }
 
        dlen = desc->len;
        inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, vc_req, desc->addr,
@@ -500,15 +510,16 @@ reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc)
 
 static __rte_always_inline int
 move_desc(struct vring_desc *head, struct vring_desc **cur_desc,
-               uint32_t size)
+               uint32_t size, uint32_t *nb_descs, uint32_t vq_size)
 {
        struct vring_desc *desc = *cur_desc;
-       int left = size;
-
-       rte_prefetch0(&head[desc->next]);
-       left -= desc->len;
+       int left = size - desc->len;
 
        while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+               (*nb_descs)--;
+               if (unlikely(*nb_descs == 0 || desc->next >= vq_size))
+                       return -1;
+
                desc = &head[desc->next];
                rte_prefetch0(&head[desc->next]);
                left -= desc->len;
@@ -517,7 +528,14 @@ move_desc(struct vring_desc *head, struct vring_desc **cur_desc,
        if (unlikely(left > 0))
                return -1;
 
-       *cur_desc = &head[desc->next];
+       if (unlikely(*nb_descs == 0))
+               *cur_desc = NULL;
+       else {
+               if (unlikely(desc->next >= vq_size))
+                       return -1;
+               *cur_desc = &head[desc->next];
+       }
+
        return 0;
 }
 
@@ -539,7 +557,8 @@ get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc *cur_desc,
 
 static int
 copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
-               struct vring_desc **cur_desc, uint32_t size)
+               struct vring_desc **cur_desc, uint32_t size,
+               uint32_t *nb_descs, uint32_t vq_size)
 {
        struct vring_desc *desc = *cur_desc;
        uint64_t remain, addr, dlen, len;
@@ -548,7 +567,6 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
        uint8_t *src;
        int left = size;
 
-       rte_prefetch0(&vc_req->head[desc->next]);
        to_copy = RTE_MIN(desc->len, (uint32_t)left);
        dlen = to_copy;
        src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen,
@@ -582,6 +600,12 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
        left -= to_copy;
 
        while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) {
+               if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) {
+                       VC_LOG_ERR("Invalid descriptors");
+                       return -1;
+               }
+               (*nb_descs)--;
+
                desc = &vc_req->head[desc->next];
                rte_prefetch0(&vc_req->head[desc->next]);
                to_copy = RTE_MIN(desc->len, (uint32_t)left);
@@ -624,7 +648,13 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req,
                return -1;
        }
 
-       *cur_desc = &vc_req->head[desc->next];
+       if (unlikely(*nb_descs == 0))
+               *cur_desc = NULL;
+       else {
+               if (unlikely(desc->next >= vq_size))
+                       return -1;
+               *cur_desc = &vc_req->head[desc->next];
+       }
 
        return 0;
 }
@@ -635,7 +665,6 @@ write_back_data(struct vhost_crypto_data_req *vc_req)
        struct vhost_crypto_writeback_data *wb_data = vc_req->wb, *wb_last;
 
        while (wb_data) {
-               rte_prefetch0(wb_data->next);
                rte_memcpy(wb_data->dst, wb_data->src, wb_data->len);
                wb_last = wb_data;
                wb_data = wb_data->next;
@@ -684,7 +713,8 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req,
                struct vhost_crypto_writeback_data **end_wb_data,
                uint8_t *src,
                uint32_t offset,
-               uint64_t write_back_len)
+               uint64_t write_back_len,
+               uint32_t *nb_descs, uint32_t vq_size)
 {
        struct vhost_crypto_writeback_data *wb_data, *head;
        struct vring_desc *desc = *cur_desc;
@@ -731,6 +761,12 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req,
                offset -= desc->len;
 
        while (write_back_len) {
+               if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) {
+                       VC_LOG_ERR("Invalid descriptors");
+                       goto error_exit;
+               }
+               (*nb_descs)--;
+
                desc = &vc_req->head[desc->next];
                if (unlikely(!(desc->flags & VRING_DESC_F_WRITE))) {
                        VC_LOG_ERR("incorrect descriptor");
@@ -770,7 +806,13 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req,
                        wb_data->next = NULL;
        }
 
-       *cur_desc = &vc_req->head[desc->next];
+       if (unlikely(*nb_descs == 0))
+               *cur_desc = NULL;
+       else {
+               if (unlikely(desc->next >= vq_size))
+                       goto error_exit;
+               *cur_desc = &vc_req->head[desc->next];
+       }
 
        *end_wb_data = wb_data;
 
@@ -787,7 +829,8 @@ static uint8_t
 prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                struct vhost_crypto_data_req *vc_req,
                struct virtio_crypto_cipher_data_req *cipher,
-               struct vring_desc *cur_desc)
+               struct vring_desc *cur_desc,
+               uint32_t *nb_descs, uint32_t vq_size)
 {
        struct vring_desc *desc = cur_desc;
        struct vhost_crypto_writeback_data *ewb = NULL;
@@ -797,8 +840,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
        /* prepare */
        /* iv */
-       if (unlikely(copy_data(iv_data, vc_req, &desc,
-                       cipher->para.iv_len) < 0)) {
+       if (unlikely(copy_data(iv_data, vc_req, &desc, cipher->para.iv_len,
+                       nb_descs, vq_size) < 0)) {
                ret = VIRTIO_CRYPTO_BADMSG;
                goto error_exit;
        }
@@ -818,7 +861,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                }
 
                if (unlikely(move_desc(vc_req->head, &desc,
-                               cipher->para.src_data_len) < 0)) {
+                               cipher->para.src_data_len, nb_descs,
+                               vq_size) < 0)) {
                        VC_LOG_ERR("Incorrect descriptor");
                        ret = VIRTIO_CRYPTO_ERR;
                        goto error_exit;
@@ -835,8 +879,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                        goto error_exit;
                }
                if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *),
-                               vc_req, &desc, cipher->para.src_data_len)
-                               < 0)) {
+                               vc_req, &desc, cipher->para.src_data_len,
+                               nb_descs, vq_size) < 0)) {
                        ret = VIRTIO_CRYPTO_BADMSG;
                        goto error_exit;
                }
@@ -847,7 +891,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
        }
 
        /* dst */
-       desc = find_write_desc(vc_req->head, desc);
+       desc = find_write_desc(vc_req->head, desc, nb_descs, vq_size);
        if (unlikely(!desc)) {
                VC_LOG_ERR("Cannot find write location");
                ret = VIRTIO_CRYPTO_BADMSG;
@@ -866,7 +910,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                }
 
                if (unlikely(move_desc(vc_req->head, &desc,
-                               cipher->para.dst_data_len) < 0)) {
+                               cipher->para.dst_data_len,
+                               nb_descs, vq_size) < 0)) {
                        VC_LOG_ERR("Incorrect descriptor");
                        ret = VIRTIO_CRYPTO_ERR;
                        goto error_exit;
@@ -877,7 +922,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
        case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
                vc_req->wb = prepare_write_back_data(vc_req, &desc, &ewb,
                                rte_pktmbuf_mtod(m_src, uint8_t *), 0,
-                               cipher->para.dst_data_len);
+                               cipher->para.dst_data_len, nb_descs, vq_size);
                if (unlikely(vc_req->wb == NULL)) {
                        ret = VIRTIO_CRYPTO_ERR;
                        goto error_exit;
@@ -919,7 +964,8 @@ static uint8_t
 prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                struct vhost_crypto_data_req *vc_req,
                struct virtio_crypto_alg_chain_data_req *chain,
-               struct vring_desc *cur_desc)
+               struct vring_desc *cur_desc,
+               uint32_t *nb_descs, uint32_t vq_size)
 {
        struct vring_desc *desc = cur_desc, *digest_desc;
        struct vhost_crypto_writeback_data *ewb = NULL, *ewb2 = NULL;
@@ -932,7 +978,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
        /* prepare */
        /* iv */
        if (unlikely(copy_data(iv_data, vc_req, &desc,
-                       chain->para.iv_len) < 0)) {
+                       chain->para.iv_len, nb_descs, vq_size) < 0)) {
                ret = VIRTIO_CRYPTO_BADMSG;
                goto error_exit;
        }
@@ -953,7 +999,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                }
 
                if (unlikely(move_desc(vc_req->head, &desc,
-                               chain->para.src_data_len) < 0)) {
+                               chain->para.src_data_len,
+                               nb_descs, vq_size) < 0)) {
                        VC_LOG_ERR("Incorrect descriptor");
                        ret = VIRTIO_CRYPTO_ERR;
                        goto error_exit;
@@ -969,7 +1016,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                        goto error_exit;
                }
                if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *),
-                               vc_req, &desc, chain->para.src_data_len)) < 0) {
+                               vc_req, &desc, chain->para.src_data_len,
+                               nb_descs, vq_size)) < 0) {
                        ret = VIRTIO_CRYPTO_BADMSG;
                        goto error_exit;
                }
@@ -981,7 +1029,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
        }
 
        /* dst */
-       desc = find_write_desc(vc_req->head, desc);
+       desc = find_write_desc(vc_req->head, desc, nb_descs, vq_size);
        if (unlikely(!desc)) {
                VC_LOG_ERR("Cannot find write location");
                ret = VIRTIO_CRYPTO_BADMSG;
@@ -1000,7 +1048,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                }
 
                if (unlikely(move_desc(vc_req->head, &desc,
-                               chain->para.dst_data_len) < 0)) {
+                               chain->para.dst_data_len,
+                               nb_descs, vq_size) < 0)) {
                        VC_LOG_ERR("Incorrect descriptor");
                        ret = VIRTIO_CRYPTO_ERR;
                        goto error_exit;
@@ -1017,7 +1066,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                }
 
                if (unlikely(move_desc(vc_req->head, &desc,
-                               chain->para.hash_result_len) < 0)) {
+                               chain->para.hash_result_len,
+                               nb_descs, vq_size) < 0)) {
                        VC_LOG_ERR("Incorrect descriptor");
                        ret = VIRTIO_CRYPTO_ERR;
                        goto error_exit;
@@ -1029,7 +1079,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
                                rte_pktmbuf_mtod(m_src, uint8_t *),
                                chain->para.cipher_start_src_offset,
                                chain->para.dst_data_len -
-                               chain->para.cipher_start_src_offset);
+                               chain->para.cipher_start_src_offset,
+                               nb_descs, vq_size);
                if (unlikely(vc_req->wb == NULL)) {
                        ret = VIRTIO_CRYPTO_ERR;
                        goto error_exit;
@@ -1042,14 +1093,16 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op,
 
                /** create a wb_data for digest */
                ewb->next = prepare_write_back_data(vc_req, &desc, &ewb2,
-                               digest_addr, 0, chain->para.hash_result_len);
+                               digest_addr, 0, chain->para.hash_result_len,
+                               nb_descs, vq_size);
                if (unlikely(ewb->next == NULL)) {
                        ret = VIRTIO_CRYPTO_ERR;
                        goto error_exit;
                }
 
                if (unlikely(copy_data(digest_addr, vc_req, &digest_desc,
-                               chain->para.hash_result_len)) < 0) {
+                               chain->para.hash_result_len,
+                               nb_descs, vq_size)) < 0) {
                        ret = VIRTIO_CRYPTO_BADMSG;
                        goto error_exit;
                }
@@ -1108,6 +1161,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
        struct vring_desc *desc = NULL;
        uint64_t session_id;
        uint64_t dlen;
+       uint32_t nb_descs = vq->size;
        int err = 0;
 
        vc_req->desc_idx = desc_idx;
@@ -1116,6 +1170,10 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
 
        if (likely(head->flags & VRING_DESC_F_INDIRECT)) {
                dlen = head->len;
+               nb_descs = dlen / sizeof(struct vring_desc);
+               /* drop invalid descriptors */
+               if (unlikely(nb_descs > vq->size))
+                       return -1;
                desc = IOVA_TO_VVA(struct vring_desc *, vc_req, head->addr,
                                &dlen, VHOST_ACCESS_RO);
                if (unlikely(!desc || dlen != head->len))
@@ -1138,8 +1196,8 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
                        goto error_exit;
                case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE:
                        req = &tmp_req;
-                       if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req))
-                                       < 0)) {
+                       if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req),
+                                       &nb_descs, vq->size) < 0)) {
                                err = VIRTIO_CRYPTO_BADMSG;
                                VC_LOG_ERR("Invalid descriptor");
                                goto error_exit;
@@ -1152,7 +1210,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
                }
        } else {
                if (unlikely(move_desc(vc_req->head, &desc,
-                               sizeof(*req)) < 0)) {
+                               sizeof(*req), &nb_descs, vq->size) < 0)) {
                        VC_LOG_ERR("Incorrect descriptor");
                        goto error_exit;
                }
@@ -1193,11 +1251,13 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
                        break;
                case VIRTIO_CRYPTO_SYM_OP_CIPHER:
                        err = prepare_sym_cipher_op(vcrypto, op, vc_req,
-                                       &req->u.sym_req.u.cipher, desc);
+                                       &req->u.sym_req.u.cipher, desc,
+                                       &nb_descs, vq->size);
                        break;
                case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING:
                        err = prepare_sym_chain_op(vcrypto, op, vc_req,
-                                       &req->u.sym_req.u.chain, desc);
+                                       &req->u.sym_req.u.chain, desc,
+                                       &nb_descs, vq->size);
                        break;
                }
                if (unlikely(err != 0)) {
@@ -1215,7 +1275,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
 
 error_exit:
 
-       inhdr = reach_inhdr(vc_req, desc);
+       inhdr = reach_inhdr(vc_req, desc, &nb_descs, vq->size);
        if (likely(inhdr != NULL))
                inhdr->status = (uint8_t)err;
 
index 3ea64eb..19e04c9 100644 (file)
@@ -489,6 +489,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len)
        struct rte_vhost_mem_region *r;
        uint32_t i;
 
+       if (unlikely(!dev || !dev->mem))
+               goto out_error;
+
        /* Find the region where the address lives. */
        for (i = 0; i < dev->mem->nregions; i++) {
                r = &dev->mem->regions[i];
@@ -503,6 +506,7 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len)
                               r->host_user_addr;
                }
        }
+out_error:
        *len = 0;
 
        return 0;
@@ -537,7 +541,7 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
 {
        struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
        struct vhost_vring_addr *addr = &vq->ring_addrs;
-       uint64_t len;
+       uint64_t len, expected_len;
 
        if (vq_is_packed(dev)) {
                len = sizeof(struct vring_packed_desc) * vq->size;
@@ -603,11 +607,12 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
        addr = &vq->ring_addrs;
 
        len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size;
+       if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+               len += sizeof(uint16_t);
+       expected_len = len;
        vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev,
                        vq, addr->avail_user_addr, &len);
-       if (vq->avail == 0 ||
-                       len != sizeof(struct vring_avail) +
-                       sizeof(uint16_t) * vq->size) {
+       if (vq->avail == 0 || len != expected_len) {
                RTE_LOG(DEBUG, VHOST_CONFIG,
                        "(%d) failed to map avail ring.\n",
                        dev->vid);
@@ -616,10 +621,12 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
 
        len = sizeof(struct vring_used) +
                sizeof(struct vring_used_elem) * vq->size;
+       if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
+               len += sizeof(uint16_t);
+       expected_len = len;
        vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev,
                        vq, addr->used_user_addr, &len);
-       if (vq->used == 0 || len != sizeof(struct vring_used) +
-                       sizeof(struct vring_used_elem) * vq->size) {
+       if (vq->used == 0 || len != expected_len) {
                RTE_LOG(DEBUG, VHOST_CONFIG,
                        "(%d) failed to map used ring.\n",
                        dev->vid);
@@ -726,13 +733,16 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
                   uint64_t host_phys_addr, uint64_t size)
 {
        struct guest_page *page, *last_page;
+       struct guest_page *old_pages;
 
        if (dev->nr_guest_pages == dev->max_guest_pages) {
                dev->max_guest_pages *= 2;
+               old_pages = dev->guest_pages;
                dev->guest_pages = realloc(dev->guest_pages,
                                        dev->max_guest_pages * sizeof(*page));
                if (!dev->guest_pages) {
                        RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n");
+                       free(old_pages);
                        return -1;
                }
        }
index 5e1a1a7..15d682c 100644 (file)
@@ -335,13 +335,22 @@ fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
        uint16_t vec_id = *vec_idx;
        uint32_t len    = 0;
        uint64_t dlen;
+       uint32_t nr_descs = vq->size;
+       uint32_t cnt    = 0;
        struct vring_desc *descs = vq->desc;
        struct vring_desc *idesc = NULL;
 
+       if (unlikely(idx >= vq->size))
+               return -1;
+
        *desc_chain_head = idx;
 
        if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
                dlen = vq->desc[idx].len;
+               nr_descs = dlen / sizeof(struct vring_desc);
+               if (unlikely(nr_descs > vq->size))
+                       return -1;
+
                descs = (struct vring_desc *)(uintptr_t)
                        vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
                                                &dlen,
@@ -366,7 +375,7 @@ fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
        }
 
        while (1) {
-               if (unlikely(idx >= vq->size)) {
+               if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) {
                        free_ind_table(idesc);
                        return -1;
                }
@@ -520,6 +529,12 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
        if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
                return -1;
 
+       /*
+        * The ordering between desc flags and desc
+        * content reads need to be enforced.
+        */
+       rte_smp_rmb();
+
        *desc_count = 0;
        *len = 0;
 
@@ -527,6 +542,9 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
                if (unlikely(vec_id >= BUF_VECTOR_MAX))
                        return -1;
 
+               if (unlikely(*desc_count >= vq->size))
+                       return -1;
+
                *desc_count += 1;
                *buf_id = descs[avail_idx].id;
 
@@ -791,6 +809,12 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
        rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
        avail_head = *((volatile uint16_t *)&vq->avail->idx);
 
+       /*
+        * The ordering between avail index and
+        * desc reads needs to be enforced.
+        */
+       rte_smp_rmb();
+
        for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
                uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
                uint16_t nr_vec = 0;
@@ -1373,6 +1397,12 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
        if (free_entries == 0)
                return 0;
 
+       /*
+        * The ordering between avail index and
+        * desc reads needs to be enforced.
+        */
+       rte_smp_rmb();
+
        VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
 
        count = RTE_MIN(count, MAX_PKT_BURST);
index bb7f443..df4226c 100644 (file)
@@ -79,7 +79,7 @@ foreach l:libraries
                        foreach d:deps
                                if not is_variable('shared_rte_' + d)
                                        error('Missing dependency ' + d +
-                                               ' for library ' + lib_name)
+                                               ' for library ' + libname)
                                endif
                                shared_deps += [get_variable('shared_rte_' + d)]
                                static_deps += [get_variable('static_rte_' + d)]
index 4122be8..c48ecb8 100644 (file)
@@ -2,7 +2,7 @@
 # Copyright(c) 2017 Intel Corporation
 
 project('DPDK', 'C',
-       version: '18.11.0',
+       version: '18.11.1',
        license: 'BSD',
        default_options: ['buildtype=release', 'default_library=static'],
        meson_version: '>= 0.41'
@@ -81,9 +81,11 @@ pkg.generate(name: meson.project_name(),
        filebase: 'lib' + meson.project_name().to_lower(),
        version: meson.project_version(),
        libraries: dpdk_libraries,
-       libraries_private: dpdk_drivers + dpdk_libraries +
+       libraries_private: dpdk_drivers + dpdk_static_libraries +
                        ['-Wl,-Bdynamic'] + dpdk_extra_ldflags,
-       description: 'The Data Plane Development Kit (DPDK)',
+       description: '''The Data Plane Development Kit (DPDK).
+Note that CFLAGS might contain an -march flag higher than typical baseline.
+This is required for a number of static inline functions in the public headers.''',
        subdirs: [get_option('include_subdir_arch'), '.'],
        extra_cflags: ['-include', 'rte_config.h'] + machine_args
 )
index c3291b1..541211c 100644 (file)
@@ -69,8 +69,8 @@ ifneq ($(filter $(AUTO_CPUFLAGS),__AVX512F__),)
 ifeq ($(CONFIG_RTE_ENABLE_AVX512),y)
 CPUFLAGS += AVX512F
 else
-# disable AVX512F support of gcc as a workaround for Bug 97
-ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+# disable AVX512F support for GCC & binutils 2.30 as a workaround for Bug 97
+ifeq ($(FORCE_DISABLE_AVX512),y)
 MACHINE_CFLAGS += -mno-avx512f
 endif
 endif
index 8296e6d..2d34b4e 100644 (file)
@@ -99,8 +99,9 @@ install-runtime:
                --exclude 'app/cmdline*' --exclude app/test \
                --exclude app/testacl --exclude app/testpipeline app | \
            tar -xf -      -C $(DESTDIR)$(bindir) $(TAR_X_FLAGS)
-       $(Q)$(call rte_mkdir,      $(DESTDIR)$(datadir))
-       $(Q)cp $(CP_FLAGS) $(RTE_SDK)/usertools $(DESTDIR)$(datadir)
+       $(Q)$(call rte_mkdir,      $(DESTDIR)$(datadir)/usertools)
+       $(Q)tar -cf -      -C $(RTE_SDK) --exclude meson.build usertools | \
+           tar -xf -      -C $(DESTDIR)$(datadir)/usertools $(TAR_X_FLAGS)
        $(Q)$(call rte_mkdir,      $(DESTDIR)$(sbindir))
        $(Q)$(call rte_symlink,    $(DESTDIR)$(datadir)/usertools/dpdk-devbind.py, \
                                   $(DESTDIR)$(sbindir)/dpdk-devbind)
index 4490429..dbddc98 100644 (file)
@@ -20,6 +20,16 @@ HOST_GCC_MINOR = $(shell echo __GNUC_MINOR__ | $(HOSTCC) -E -x c - | tail -n 1)
 HOST_GCC_PATCHLEVEL = $(shell echo __GNUC_PATCHLEVEL__ | $(HOSTCC) -E -x c - | tail -n 1)
 HOST_GCC_VERSION = $(HOST_GCC_MAJOR)$(HOST_GCC_MINOR)
 
+LD_VERSION = $(shell $(LD) -v)
+# disable AVX512F support for GCC & binutils 2.30 as a workaround for Bug 97
+ifneq ($(filter 2.30%,$(LD_VERSION)),)
+FORCE_DISABLE_AVX512 := y
+# print warning only once for librte_eal
+ifneq ($(filter %librte_eal,$(CURDIR)),)
+$(warning AVX512 support disabled because of ld 2.30. See Bug 97)
+endif
+endif
+
 # if GCC is older than 4.x
 ifeq ($(shell test $(GCC_VERSION) -lt 40 && echo 1), 1)
        MACHINE_CFLAGS =
index b98bd91..36c7ee8 100644 (file)
@@ -2,7 +2,7 @@
 # Copyright 2014 6WIND S.A.
 
 Name: dpdk
-Version: 18.11
+Version: 18.11.1
 Release: 1
 Packager: packaging@6wind.com
 URL: http://dpdk.org
index 554e994..5a4816f 100644 (file)
@@ -39,6 +39,8 @@ test_sources = files('commands.c',
        'test_event_eth_tx_adapter.c',
        'test_event_timer_adapter.c',
        'test_eventdev.c',
+       'test_external_mem.c',
+       'test_fbarray.c',
        'test_func_reentrancy.c',
        'test_flow_classify.c',
        'test_hash.c',
@@ -170,6 +172,7 @@ test_names = [
        'eventdev_octeontx_autotest',
        'eventdev_sw_autotest',
        'external_mem_autotest',
+       'fbarray_autotest',
        'func_reentrancy_autotest',
        'flow_classify_autotest',
        'hash_autotest',
index 1c3f29f..4f1298e 100644 (file)
@@ -401,13 +401,14 @@ test_blockcipher_one_case(const struct blockcipher_test_case *t,
 
        /* Verify results */
        if (op->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
-               if (t->op_mask & BLOCKCIPHER_TEST_OP_AUTH_VERIFY)
+               if ((t->op_mask & BLOCKCIPHER_TEST_OP_AUTH_VERIFY) &&
+                       (op->status == RTE_CRYPTO_OP_STATUS_AUTH_FAILED))
                        snprintf(test_msg, BLOCKCIPHER_TEST_MSG_LEN, "line %u "
                                "FAILED: Digest verification failed "
                                "(0x%X)", __LINE__, op->status);
                else
                        snprintf(test_msg, BLOCKCIPHER_TEST_MSG_LEN, "line %u "
-                               "FAILED: Digest verification failed "
+                               "FAILED: Operation failed "
                                "(0x%X)", __LINE__, op->status);
                status = TEST_FAILED;
                goto error_exit;
index 2acab9d..81e345b 100644 (file)
@@ -18,6 +18,7 @@
 #include <sys/file.h>
 #include <limits.h>
 
+#include <rte_per_lcore.h>
 #include <rte_debug.h>
 #include <rte_string_fns.h>
 
@@ -477,40 +478,50 @@ test_missing_c_flag(void)
                                "-n", "3", "-l", "1," };
        const char *argv10[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "-l", "1#2" };
+       /* core number is negative value */
+       const char * const argv11[] = { prgname, prefix, mp_flag,
+                               "-n", "3", "-l", "-5" };
+       const char * const argv12[] = { prgname, prefix, mp_flag,
+                               "-n", "3", "-l", "-5-7" };
+       /* core number is maximum value */
+       const char * const argv13[] = { prgname, prefix, mp_flag,
+                               "-n", "3", "-l", RTE_STR(RTE_MAX_LCORE) };
+       const char * const argv14[] = { prgname, prefix, mp_flag,
+                               "-n", "3", "-l", "1-"RTE_STR(RTE_MAX_LCORE) };
        /* sanity check test - valid corelist value */
-       const char *argv11[] = { prgname, prefix, mp_flag,
+       const char * const argv15[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "-l", "1-2,3" };
 
        /* --lcores flag but no lcores value */
-       const char *argv12[] = { prgname, prefix, mp_flag,
+       const char * const argv16[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores" };
-       const char *argv13[] = { prgname, prefix, mp_flag,
+       const char * const argv17[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", " " };
        /* bad lcores value */
-       const char *argv14[] = { prgname, prefix, mp_flag,
+       const char * const argv18[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "1-3-5" };
-       const char *argv15[] = { prgname, prefix, mp_flag,
+       const char * const argv19[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "0-1,,2" };
-       const char *argv16[] = { prgname, prefix, mp_flag,
+       const char * const argv20[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "0-,1" };
-       const char *argv17[] = { prgname, prefix, mp_flag,
+       const char * const argv21[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "(0-,2-4)" };
-       const char *argv18[] = { prgname, prefix, mp_flag,
+       const char * const argv22[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "(-1,2)" };
-       const char *argv19[] = { prgname, prefix, mp_flag,
+       const char * const argv23[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "(2-4)@(2-4-6)" };
-       const char *argv20[] = { prgname, prefix, mp_flag,
+       const char * const argv24[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "(a,2)" };
-       const char *argv21[] = { prgname, prefix, mp_flag,
+       const char * const argv25[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "1-3@(1,3)" };
-       const char *argv22[] = { prgname, prefix, mp_flag,
+       const char * const argv26[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "3@((1,3)" };
-       const char *argv23[] = { prgname, prefix, mp_flag,
+       const char * const argv27[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "(4-7)=(1,3)" };
-       const char *argv24[] = { prgname, prefix, mp_flag,
+       const char * const argv28[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores", "[4-7]@(1,3)" };
        /* sanity check of tests - valid lcores value */
-       const char *argv25[] = { prgname, prefix, mp_flag,
+       const char * const argv29[] = { prgname, prefix, mp_flag,
                                 "-n", "3", "--lcores",
                                 "0-1,2@(5-7),(3-5)@(0,2),(0,6),7"};
 
@@ -538,31 +549,35 @@ test_missing_c_flag(void)
                        || launch_proc(argv7) == 0
                        || launch_proc(argv8) == 0
                        || launch_proc(argv9) == 0
-                       || launch_proc(argv10) == 0) {
+                       || launch_proc(argv10) == 0
+                       || launch_proc(argv11) == 0
+                       || launch_proc(argv12) == 0
+                       || launch_proc(argv13) == 0
+                       || launch_proc(argv14) == 0) {
                printf("Error - "
                       "process ran without error with invalid -l flag\n");
                return -1;
        }
-       if (launch_proc(argv11) != 0) {
+       if (launch_proc(argv15) != 0) {
                printf("Error - "
                       "process did not run ok with valid corelist value\n");
                return -1;
        }
 
        /* start --lcores tests */
-       if (launch_proc(argv12) == 0 || launch_proc(argv13) == 0 ||
-           launch_proc(argv14) == 0 || launch_proc(argv15) == 0 ||
-           launch_proc(argv16) == 0 || launch_proc(argv17) == 0 ||
+       if (launch_proc(argv16) == 0 || launch_proc(argv17) == 0 ||
            launch_proc(argv18) == 0 || launch_proc(argv19) == 0 ||
            launch_proc(argv20) == 0 || launch_proc(argv21) == 0 ||
-           launch_proc(argv21) == 0 || launch_proc(argv22) == 0 ||
-           launch_proc(argv23) == 0 || launch_proc(argv24) == 0) {
+           launch_proc(argv22) == 0 || launch_proc(argv23) == 0 ||
+           launch_proc(argv24) == 0 || launch_proc(argv25) == 0 ||
+           launch_proc(argv26) == 0 || launch_proc(argv27) == 0 ||
+           launch_proc(argv28) == 0) {
                printf("Error - "
                       "process ran without error with invalid --lcore flag\n");
                return -1;
        }
 
-       if (launch_proc(argv25) != 0) {
+       if (launch_proc(argv29) != 0) {
                printf("Error - "
                       "process did not run ok with valid corelist value\n");
                return -1;
@@ -1143,7 +1158,7 @@ test_memory_flags(void)
        const char *argv1[] = {prgname, "-c", "10", "-n", "2",
                        "--file-prefix=" memtest, "-m", DEFAULT_MEM_SIZE};
 
-       /* invalid (zero) --socket-mem flag */
+       /* valid (zero) --socket-mem flag */
        const char *argv2[] = {prgname, "-c", "10", "-n", "2",
                        "--file-prefix=" memtest, "--socket-mem=0,0,0,0"};
 
@@ -1241,8 +1256,8 @@ test_memory_flags(void)
                printf("Error - process failed with valid -m flag!\n");
                return -1;
        }
-       if (launch_proc(argv2) == 0) {
-               printf("Error - process run ok with invalid (zero) --socket-mem!\n");
+       if (launch_proc(argv2) != 0) {
+               printf("Error - process failed with valid (zero) --socket-mem!\n");
                return -1;
        }
 
index 6b695ce..480ae97 100644 (file)
@@ -39,7 +39,7 @@ static struct perf htm_results, non_htm_results;
 
 struct {
        uint32_t *keys;
-       uint32_t *found;
+       uint8_t *found;
        uint32_t num_insert;
        uint32_t rounded_tot_insert;
        struct rte_hash *h;
@@ -126,7 +126,7 @@ init_params(int use_ext, int use_htm, int use_jhash)
        unsigned int i;
 
        uint32_t *keys = NULL;
-       uint32_t *found = NULL;
+       uint8_t *found = NULL;
        struct rte_hash *handle;
 
        struct rte_hash_parameters hash_params = {
@@ -173,7 +173,7 @@ init_params(int use_ext, int use_htm, int use_jhash)
                goto err;
        }
 
-       found = rte_zmalloc(NULL, sizeof(uint32_t) * TOTAL_ENTRY, 0);
+       found = rte_zmalloc(NULL, sizeof(uint8_t) * TOTAL_ENTRY, 0);
        if (found == NULL) {
                printf("RTE_ZMALLOC failed\n");
                goto err;
@@ -361,7 +361,7 @@ test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
 
        const void *next_key;
        void *next_data;
-       uint32_t iter = 0;
+       uint32_t iter;
        int use_jhash = 0;
 
        uint32_t duplicated_keys = 0;
@@ -536,6 +536,8 @@ test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
 
                rte_eal_mp_wait_lcore();
 
+               iter = 0;
+               memset(tbl_rw_test_param.found, 0, TOTAL_ENTRY);
                while (rte_hash_iterate(tbl_rw_test_param.h,
                                &next_key, &next_data, &iter) >= 0) {
                        /* Search for the key in the list of keys added .*/
@@ -619,7 +621,7 @@ test_hash_readwrite_main(void)
        if (rte_lcore_count() <= 2) {
                printf("More than two lcores are required "
                        "to do read write test\n");
-               return 0;
+               return -1;
        }
 
        RTE_LCORE_FOREACH_SLAVE(core_id) {
index 94d54d7..3c2f36b 100644 (file)
@@ -154,7 +154,7 @@ test_metrics_update_values(void)
 
        /* Failed Test: Invalid count size */
        err = rte_metrics_update_values(RTE_METRICS_GLOBAL,
-                        KEY, &value[0], 0);
+                        KEY, &value[0], ARRAY_SIZE(value));
        TEST_ASSERT(err < 0, "%s, %d", __func__, __LINE__);
 
        /* Failed Test: Invalid port_id(lower value) and valid data */